├── README.txt ├── README.md └── xen-vgt.patch /README.txt: -------------------------------------------------------------------------------- 1 | Currently our project has moved to 01org gvt-linux after upstreamed. 2 | Please refer to below guide: 3 | https://github.com/01org/gvt-linux/wiki/GVTg_Setup_Guide 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DISCONTINUATION OF PROJECT. 2 | 3 | This project will no longer be maintained by Intel. 4 | 5 | Intel has ceased development and contributions including, but not limited to, maintenance, bug fixes, new releases, or updates, to this project. 6 | 7 | Intel no longer accepts patches to this project. 8 | 9 | If you have an ongoing need to use this project, are interested in independently developing it, or would like to maintain patches for the open source software community, please create your own fork of this project. 10 | -------------------------------------------------------------------------------- /xen-vgt.patch: -------------------------------------------------------------------------------- 1 | diff --git a/tools/examples/xmexample.hvm b/tools/examples/xmexample.hvm 2 | index 96b6cc4..19358a4 100644 3 | --- a/tools/examples/xmexample.hvm 4 | +++ b/tools/examples/xmexample.hvm 5 | @@ -350,6 +350,25 @@ tsc_mode=0 6 | # like pci=['xx:xx.x'], it enables graphics passthrough, default=0 (disabled) 7 | #gfx_passthru=0 8 | 9 | +# Enable virtual graphics, default is disabled 10 | +#vgt=0 11 | + 12 | +# The low gm size which is CPU visible, default is 64MB. 13 | +#vgt_low_gm_sz=64 14 | + 15 | +# The high gm size which is CPU invisible, default is 448MB. 16 | +#vgt_high_gm_sz=448 17 | + 18 | +# The number of the fence registers, default is 4. 19 | +#vgt_fence_sz=4 20 | + 21 | +# The boolean variable(0|1) means if the vgt device should be the primary VGA. 22 | +# The default is 0. For Windows vgt guest, we usually need to set it to 1 after 23 | +# the gfx driver is installed. 24 | +# If the user doesn't set this variable explicitly, the global setting of the 25 | +# value in the vgt driver is used. 26 | +#vgt_primary=0 27 | +# 28 | #----------------------------------------------------------------------------- 29 | # Configure PVSCSI devices: 30 | # 31 | diff --git a/tools/firmware/Makefile b/tools/firmware/Makefile 32 | index aff8e56..2eb9e60 100644 33 | --- a/tools/firmware/Makefile 34 | +++ b/tools/firmware/Makefile 35 | @@ -19,6 +19,7 @@ ovmf: 36 | 37 | seabios-dir: 38 | GIT=$(GIT) $(XEN_ROOT)/scripts/git-checkout.sh $(SEABIOS_UPSTREAM_URL) $(SEABIOS_UPSTREAM_TAG) seabios-dir 39 | + patch -p1 < $(XEN_ROOT)/tools/firmware/xengt_seabios.patch 40 | cp seabios-config seabios-dir/.config; 41 | 42 | .PHONY: all 43 | diff --git a/tools/firmware/hvmloader/Makefile b/tools/firmware/hvmloader/Makefile 44 | index c6e7376..47365db 100644 45 | --- a/tools/firmware/hvmloader/Makefile 46 | +++ b/tools/firmware/hvmloader/Makefile 47 | @@ -111,7 +111,12 @@ ifneq ($(STDVGA_ROM),) 48 | echo "#ifdef ROM_INCLUDE_VGABIOS" >> $@.new 49 | sh ./mkhex vgabios_stdvga $(STDVGA_ROM) >> $@.new 50 | echo "#endif" >> $@.new 51 | + 52 | + echo "#ifdef SEABIOS_INCLUDE_VGABIOS" >> $@.new 53 | + sh ./mkhex seabios_vgabios_stdvga $(STDVGA_ROM) >> $@.new 54 | + echo "#endif" >> $@.new 55 | endif 56 | + 57 | ifneq ($(CIRRUSVGA_ROM),) 58 | echo "#ifdef ROM_INCLUDE_VGABIOS" >> $@.new 59 | sh ./mkhex vgabios_cirrusvga $(CIRRUSVGA_ROM) >> $@.new 60 | diff --git a/tools/firmware/hvmloader/config.h b/tools/firmware/hvmloader/config.h 61 | index 6641197..09ab9a9 100644 62 | --- a/tools/firmware/hvmloader/config.h 63 | +++ b/tools/firmware/hvmloader/config.h 64 | @@ -53,7 +53,7 @@ extern struct bios_config ovmf_config; 65 | #define PCI_ISA_IRQ_MASK 0x0c20U /* ISA IRQs 5,10,11 are PCI connected */ 66 | 67 | /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */ 68 | -#define PCI_MEM_START 0xf0000000 69 | +#define PCI_MEM_START 0xc0000000 70 | #define PCI_MEM_END 0xfc000000 71 | 72 | extern unsigned long pci_mem_start, pci_mem_end; 73 | diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c 74 | index 627e8cb..804565f 100644 75 | --- a/tools/firmware/hvmloader/pci.c 76 | +++ b/tools/firmware/hvmloader/pci.c 77 | @@ -24,6 +24,7 @@ 78 | #include "hypercall.h" 79 | #include "config.h" 80 | #include "pci_regs.h" 81 | +#include "vgt.h" 82 | 83 | #include 84 | #include 85 | @@ -36,6 +37,9 @@ unsigned long pci_mem_end = PCI_MEM_END; 86 | enum virtual_vga virtual_vga = VGA_none; 87 | unsigned long igd_opregion_pgbase = 0; 88 | 89 | +#define VESA_MMIO_RSVD_START 0xe0000000UL 90 | +#define VESA_MMIO_RSVD_END 0xe0130000UL 91 | + 92 | void pci_setup(void) 93 | { 94 | uint8_t is_64bar, using_64bar, bar64_relocate = 0; 95 | @@ -111,35 +115,31 @@ void pci_setup(void) 96 | ASSERT((devfn != PCI_ISA_DEVFN) || 97 | ((vendor_id == 0x8086) && (device_id == 0x7000))); 98 | 99 | + printf("Detect %x.%x devfn, with class: %x\n", devfn>>3, devfn&7, class); 100 | switch ( class ) 101 | { 102 | case 0x0300: 103 | /* If emulated VGA is found, preserve it as primary VGA. */ 104 | if ( (vendor_id == 0x1234) && (device_id == 0x1111) ) 105 | { 106 | + printf("Detect emulated stdvga\n"); 107 | vga_devfn = devfn; 108 | virtual_vga = VGA_std; 109 | } 110 | else if ( (vendor_id == 0x1013) && (device_id == 0xb8) ) 111 | { 112 | + printf("Detect emulated cirrus vga\n"); 113 | vga_devfn = devfn; 114 | virtual_vga = VGA_cirrus; 115 | } 116 | - else if ( virtual_vga == VGA_none ) 117 | + else if ( (vendor_id == 0x8086) && 118 | + (_is_sandybridge(device_id) || _is_ivybridge(device_id) 119 | + || _is_haswell(device_id) || _is_broadwell(device_id) ) 120 | + ) 121 | { 122 | + printf("Detect Intel Graphics Device\n"); 123 | vga_devfn = devfn; 124 | virtual_vga = VGA_pt; 125 | - if ( vendor_id == 0x8086 ) 126 | - { 127 | - igd_opregion_pgbase = mem_hole_alloc(IGD_OPREGION_PAGES); 128 | - /* 129 | - * Write the the OpRegion offset to give the opregion 130 | - * address to the device model. The device model will trap 131 | - * and map the OpRegion at the give address. 132 | - */ 133 | - pci_writel(vga_devfn, PCI_INTEL_OPREGION, 134 | - igd_opregion_pgbase << PAGE_SHIFT); 135 | - } 136 | } 137 | break; 138 | case 0x0680: 139 | @@ -272,6 +272,13 @@ void pci_setup(void) 140 | unsigned int, 141 | hvm_info->low_mem_pgend - (pci_mem_start >> PAGE_SHIFT), 142 | (1u << 16) - 1); 143 | + 144 | + /* This is a temporary check and will be removed once upstream Xen 145 | + * fix the issue "QEMU hardcodes PCI MMIO base. 146 | + */ 147 | + printf("can not find a big enough MMIO hole!!!\n"); 148 | + BUG(); 149 | + 150 | if ( hvm_info->high_mem_pgend == 0 ) 151 | hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT); 152 | hvm_info->low_mem_pgend -= nr_pages; 153 | @@ -366,6 +373,15 @@ void pci_setup(void) 154 | base = (resource->base + bar_sz - 1) & ~(uint64_t)(bar_sz - 1); 155 | bar_data |= (uint32_t)base; 156 | bar_data_upper = (uint32_t)(base >> 32); 157 | + 158 | + /* Skip allocate the reserved range by vesafb */ 159 | + if (resource == &mem_resource && 160 | + (base + bar_sz > VESA_MMIO_RSVD_START) && (base < VESA_MMIO_RSVD_END)) { 161 | + resource->base = VESA_MMIO_RSVD_END; 162 | + base = (resource->base + bar_sz - 1) & ~(uint64_t)(bar_sz - 1); 163 | + bar_data |= (uint32_t)base; 164 | + } 165 | + 166 | base += bar_sz; 167 | 168 | if ( (base < resource->base) || (base > resource->max) ) 169 | @@ -400,6 +416,47 @@ void pci_setup(void) 170 | 171 | if ( vga_devfn != 256 ) 172 | { 173 | + if (virtual_vga == VGA_pt) { 174 | + uint32_t bar = pci_readl(vga_devfn, PCI_BASE_ADDRESS_0) 175 | + & PCI_BASE_ADDRESS_MEM_MASK; 176 | + 177 | + void *pvinfo = (void *)bar + VGT_PVINFO_PAGE; 178 | + uint64_t *magic = pvinfo; 179 | + 180 | + if (*magic == VGT_MAGIC) { 181 | + /* 182 | + * Found VGT device, and use standard VGA bios. 183 | + */ 184 | + printf("Found VGT\n"); 185 | + virtual_vga = VGA_std; 186 | + 187 | + /* XXX: we use this hack to tell vGT driver the 188 | + * top of <4G mem, so vGT can avoid unnecessary 189 | + * attempts to map the mem hole. This optimization 190 | + * can speed up guest bootup time and improve Win7 191 | + * SMP guest's stability. 192 | + * NOTE: here we're actually trying to write 32 bits 193 | + * into VENDOR_ID and DEVICE_ID -- we assume normally 194 | + * sane codes in guest won't do this... 195 | + */ 196 | + pci_writel(vga_devfn, PCI_VENDOR_ID, hvm_info->low_mem_pgend); 197 | + } else { 198 | + /* 199 | + * Found VTD device, and use physical VGA bios. 200 | + */ 201 | + printf("Found VTD\n"); 202 | + } 203 | + 204 | + igd_opregion_pgbase = mem_hole_alloc(2); 205 | + /* 206 | + * Write the the OpRegion offset to give the opregion 207 | + * address to the device model. The device model will trap 208 | + * and map the OpRegion at the give address. 209 | + */ 210 | + pci_writel(vga_devfn, PCI_INTEL_OPREGION, 211 | + igd_opregion_pgbase << PAGE_SHIFT); 212 | + } 213 | + 214 | /* 215 | * VGA registers live in I/O space so ensure that primary VGA 216 | * has IO enabled, even if there is no I/O BAR on that 217 | diff --git a/tools/firmware/hvmloader/seabios.c b/tools/firmware/hvmloader/seabios.c 218 | index dd7dfbe..6938ef8 100644 219 | --- a/tools/firmware/hvmloader/seabios.c 220 | +++ b/tools/firmware/hvmloader/seabios.c 221 | @@ -29,6 +29,7 @@ 222 | #include "acpi/acpi2_0.h" 223 | 224 | #define ROM_INCLUDE_SEABIOS 225 | +#define SEABIOS_INCLUDE_VGABIOS 226 | #include "roms.inc" 227 | 228 | extern unsigned char dsdt_anycpu_qemu_xen[]; 229 | @@ -133,6 +134,20 @@ static void seabios_setup_e820(void) 230 | dump_e820_table(e820, info->e820_nr); 231 | } 232 | 233 | +//BUILD_BUG_ON(sizeof(seabios) > (0x00100000U - SEABIOS_PHYSICAL_ADDRESS)); 234 | + 235 | +#ifdef SEABIOS_INCLUDE_VGABIOS 236 | +static void seabios_load_roms(void) 237 | +{ 238 | + if (virtual_vga != VGA_std) 239 | + return; 240 | + 241 | + printf("Loading Standard VGABIOS ...\n"); 242 | + memcpy((void *)VGABIOS_PHYSICAL_ADDRESS, 243 | + seabios_vgabios_stdvga, sizeof(seabios_vgabios_stdvga)); 244 | +} 245 | +#endif 246 | + 247 | struct bios_config seabios_config = { 248 | .name = "SeaBIOS", 249 | 250 | @@ -141,7 +156,11 @@ struct bios_config seabios_config = { 251 | 252 | .bios_address = 0x100000 - sizeof(seabios), 253 | 254 | +#ifdef SEABIOS_INCLUDE_VGABIOS 255 | + .load_roms = seabios_load_roms, 256 | +#else 257 | .load_roms = NULL, 258 | +#endif 259 | 260 | .bios_load = NULL, 261 | 262 | diff --git a/tools/firmware/hvmloader/vgt.h b/tools/firmware/hvmloader/vgt.h 263 | new file mode 100644 264 | index 0000000..49d3d60 265 | --- /dev/null 266 | +++ b/tools/firmware/hvmloader/vgt.h 267 | @@ -0,0 +1,146 @@ 268 | +/* 269 | + * Copyright (c) 2012-2013, Intel Corporation. 270 | + * 271 | + * This program is free software; you can redistribute it and/or modify it 272 | + * under the terms and conditions of the GNU General Public License, 273 | + * version 2, as published by the Free Software Foundation. 274 | + * 275 | + * This program is distributed in the hope it will be useful, but WITHOUT 276 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 277 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 278 | + * more details. 279 | + * 280 | + * You should have received a copy of the GNU General Public License along with 281 | + * this program; if not, write to the Free Software Foundation, Inc., 282 | + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 283 | + */ 284 | + 285 | +#ifndef _VGT_DEVTABLE_H 286 | +#define _VGT_DEVTABLE_H 287 | + 288 | +#define VGT_PVINFO_PAGE 0x78000 289 | +#define VGT_MAGIC 0x4776544776544776 /* 'vGTvGTvG' */ 290 | +#define VGT_VERSION_MAJOR 1 291 | +#define VGT_VERSION_MINOR 0 292 | + 293 | +static inline int _is_sandybridge(int devid) 294 | +{ 295 | + int ret = 0; 296 | + 297 | + switch (devid) { 298 | + case 0x0102: 299 | + case 0x0112: 300 | + case 0x0122: 301 | + case 0x0106: 302 | + case 0x0116: 303 | + case 0x0126: 304 | + case 0x010A: 305 | + ret = 1; 306 | + break; 307 | + default: 308 | + break; 309 | + } 310 | + return ret; 311 | +} 312 | + 313 | +static inline int _is_ivybridge(int devid) 314 | +{ 315 | + int ret = 0; 316 | + 317 | + switch (devid) { 318 | + case 0x0156: 319 | + case 0x0166: 320 | + case 0x0152: 321 | + case 0x0162: 322 | + case 0x015a: 323 | + case 0x016a: 324 | + ret = 1; 325 | + break; 326 | + default: 327 | + break; 328 | + } 329 | + return ret; 330 | +} 331 | + 332 | +static inline int _is_haswell(int devid) 333 | +{ 334 | + int ret = 0; 335 | + 336 | + switch (devid) { 337 | + case 0x0400: 338 | + case 0x0402: 339 | + case 0x0404: 340 | + case 0x0406: 341 | + case 0x0408: 342 | + case 0x040a: 343 | + case 0x0412: 344 | + case 0x0416: 345 | + case 0x041a: 346 | + case 0x0422: 347 | + case 0x0426: 348 | + case 0x042a: 349 | + case 0x0a02: 350 | + case 0x0a06: 351 | + case 0x0a0a: 352 | + case 0x0a12: 353 | + case 0x0a16: 354 | + case 0x0a1a: 355 | + case 0x0a22: 356 | + case 0x0a26: 357 | + case 0x0a2a: 358 | + case 0x0c02: 359 | + case 0x0c04: 360 | + case 0x0c06: 361 | + case 0x0c0a: 362 | + case 0x0c12: 363 | + case 0x0c16: 364 | + case 0x0c1a: 365 | + case 0x0c22: 366 | + case 0x0c26: 367 | + case 0x0c2a: 368 | + case 0x0d12: 369 | + case 0x0d16: 370 | + case 0x0d1a: 371 | + case 0x0d22: 372 | + case 0x0d26: 373 | + case 0x0d2a: 374 | + case 0x0d32: 375 | + case 0x0d36: 376 | + case 0x0d3a: 377 | + ret = 1; 378 | + break; 379 | + default: 380 | + break; 381 | + } 382 | + return ret; 383 | +} 384 | + 385 | +static inline int _is_broadwell(int devid) 386 | +{ 387 | + switch ((devid >> 4) & 0xf) { 388 | + case 0: 389 | + case 1: 390 | + case 2: 391 | + break; 392 | + default: 393 | + return 0; 394 | + } 395 | + 396 | + devid &= ~0xf0; 397 | + 398 | + switch (devid) { 399 | + case 0x1602: 400 | + case 0x1606: 401 | + case 0x160B: 402 | + case 0x160E: 403 | + case 0x160A: 404 | + case 0x160D: 405 | + break; 406 | + default: 407 | + return 0; 408 | + } 409 | + 410 | + return 1; 411 | +} 412 | + 413 | +#endif /* _VGT_DEVTABLE_H */ 414 | diff --git a/tools/firmware/vgabios/Makefile b/tools/firmware/vgabios/Makefile 415 | index 26bb871..6ff921b 100644 416 | --- a/tools/firmware/vgabios/Makefile 417 | +++ b/tools/firmware/vgabios/Makefile 418 | @@ -37,7 +37,7 @@ release: 419 | tar czvf ../$(RELEASE).tgz --exclude CVS -C .. $(RELEASE)/ 420 | 421 | vgabios.bin: biossums vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbetables.h 422 | - $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE $(VGABIOS_DATE) > _vgabios_.c 423 | + $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE -DVGT $(VGABIOS_DATE) > _vgabios_.c 424 | $(BCC) -o vgabios.s -C-c -D__i86__ -S -0 _vgabios_.c 425 | sed -e 's/^\.text//' -e 's/^\.data//' vgabios.s > _vgabios_.s 426 | $(AS86) _vgabios_.s -b vgabios.bin -u -w- -g -0 -j -O -l vgabios.txt 427 | @@ -47,7 +47,7 @@ vgabios.bin: biossums vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbe 428 | ls -l VGABIOS-lgpl-latest.bin 429 | 430 | vgabios.debug.bin: biossums vgabios.c vgabios.h vgafonts.h vgatables.h vbe.h vbe.c vbetables.h 431 | - $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE -DDEBUG $(VGABIOS_DATE) > _vgabios-debug_.c 432 | + $(GCC) -E -P vgabios.c $(VGABIOS_VERS) -DVBE -DVGT -DDEBUG $(VGABIOS_DATE) > _vgabios-debug_.c 433 | $(BCC) -o vgabios-debug.s -C-c -D__i86__ -S -0 _vgabios-debug_.c 434 | sed -e 's/^\.text//' -e 's/^\.data//' vgabios-debug.s > _vgabios-debug_.s 435 | $(AS86) _vgabios-debug_.s -b vgabios.debug.bin -u -w- -g -0 -j -O -l vgabios.debug.txt 436 | diff --git a/tools/firmware/vgabios/vgabios.c b/tools/firmware/vgabios/vgabios.c 437 | index a9dbe00..c81d7cd 100644 438 | --- a/tools/firmware/vgabios/vgabios.c 439 | +++ b/tools/firmware/vgabios/vgabios.c 440 | @@ -149,6 +149,12 @@ vgabios_entry_point: 441 | 442 | jmp vgabios_init_func 443 | 444 | +/* 445 | + * GEN Windows driver assume the identifying string 446 | + * to be located from the VBIOS offset 6. 447 | + * Remove the following bytes to make Windows happy for VGT. 448 | + */ 449 | +#ifndef VGT 450 | #ifdef PCIBIOS 451 | .org 0x18 452 | .word vgabios_pci_data 453 | @@ -158,6 +164,7 @@ vgabios_entry_point: 454 | .org 0x1e 455 | .ascii "IBM" 456 | .byte 0x00 457 | +#endif /* VGT */ 458 | 459 | vgabios_name: 460 | .ascii "Plex86/Bochs VGABios" 461 | diff --git a/tools/firmware/xengt_seabios.patch b/tools/firmware/xengt_seabios.patch 462 | new file mode 100644 463 | index 0000000..d917519 464 | --- /dev/null 465 | +++ b/tools/firmware/xengt_seabios.patch 466 | @@ -0,0 +1,12 @@ 467 | +--- a/seabios-dir/src/optionroms.c 468 | ++++ b/seabios-dir/src/optionroms.c 469 | +@@ -478,7 +478,8 @@ vga_setup(void) 470 | + S3ResumeVgaInit = romfile_loadint("etc/s3-resume-vga-init", 0); 471 | + ScreenAndDebug = romfile_loadint("etc/screen-and-debug", 1); 472 | + 473 | +- if (CONFIG_OPTIONROMS_DEPLOYED) { 474 | ++ if (CONFIG_OPTIONROMS_DEPLOYED || 475 | ++ ((*(u16 *)BUILD_ROM_START) == 0xaa55)) { 476 | + // Option roms are already deployed on the system. 477 | + init_optionrom((void*)BUILD_ROM_START, 0, 1); 478 | + } else { 479 | diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c 480 | index 826236f..0530303 100644 481 | --- a/tools/libxl/libxl.c 482 | +++ b/tools/libxl/libxl.c 483 | @@ -1285,6 +1285,29 @@ static void domain_destroy_callback(libxl__egc *egc, 484 | static void destroy_finish_check(libxl__egc *egc, 485 | libxl__domain_destroy_state *dds); 486 | 487 | +// We don't care the return value: 488 | +// 1) the guest may not be a VGT guest; 489 | +// 2) normally when a VGT guest shutdown, the ioemu has already tried to 490 | +// destroy the vgt instance and we shouldn't come here by "xl dest dom_id". 491 | +// 3) we come here because the ioemu didn't destroy the vgt instance 492 | +// successfully(e.g., ioemu exits abnormally) or we want to kill the guest by 493 | +// force while it's running. In this case, we still try our best to destroy 494 | +// the vgt instance. 495 | +static void destroy_vgt_instance(int domid) 496 | +{ 497 | + const char *path = "/sys/kernel/vgt/control/create_vgt_instance"; 498 | + FILE *vgt_file; 499 | + 500 | + if (domid <= 0) 501 | + return; 502 | + 503 | + if ((vgt_file = fopen(path, "w")) == NULL) 504 | + return; 505 | + 506 | + (void)fprintf(vgt_file, "%d\n", -domid); 507 | + (void)fclose(vgt_file); 508 | +} 509 | + 510 | void libxl__domain_destroy(libxl__egc *egc, libxl__domain_destroy_state *dds) 511 | { 512 | STATE_AO_GC(dds->ao); 513 | @@ -1467,6 +1490,8 @@ static void devices_destroy_cb(libxl__egc *egc, 514 | 515 | libxl__userdata_destroyall(gc, domid); 516 | 517 | + destroy_vgt_instance(domid); 518 | + 519 | rc = xc_domain_destroy(ctx->xch, domid); 520 | if (rc < 0) { 521 | LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_destroy failed for %d", domid); 522 | diff --git a/tools/libxl/libxl_create.c b/tools/libxl/libxl_create.c 523 | index 0c32d0b..bf29699 100644 524 | --- a/tools/libxl/libxl_create.c 525 | +++ b/tools/libxl/libxl_create.c 526 | @@ -275,7 +275,7 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc, 527 | } 528 | 529 | libxl_defbool_setdefault(&b_info->u.hvm.nographic, false); 530 | - 531 | + libxl_defbool_setdefault(&b_info->u.hvm.vgt, false); 532 | libxl_defbool_setdefault(&b_info->u.hvm.gfx_passthru, false); 533 | 534 | break; 535 | diff --git a/tools/libxl/libxl_dm.c b/tools/libxl/libxl_dm.c 536 | index 7e54c02..ded1bf9 100644 537 | --- a/tools/libxl/libxl_dm.c 538 | +++ b/tools/libxl/libxl_dm.c 539 | @@ -185,12 +185,36 @@ static char ** libxl__build_device_model_args_old(libxl__gc *gc, 540 | NULL); 541 | } 542 | 543 | - switch (b_info->u.hvm.vga.kind) { 544 | - case LIBXL_VGA_INTERFACE_TYPE_STD: 545 | - flexarray_append(dm_args, "-std-vga"); 546 | - break; 547 | - case LIBXL_VGA_INTERFACE_TYPE_CIRRUS: 548 | - break; 549 | + if (libxl_defbool_val(b_info->u.hvm.vgt)) { 550 | + flexarray_vappend(dm_args, "-vga", "xengt", NULL); 551 | + if (b_info->u.hvm.vgt_low_gm_sz) { 552 | + flexarray_vappend(dm_args, "-vgt_low_gm_sz", 553 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_low_gm_sz), NULL); 554 | + } 555 | + if (b_info->u.hvm.vgt_high_gm_sz) { 556 | + flexarray_vappend(dm_args, "-vgt_high_gm_sz", 557 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_high_gm_sz), NULL); 558 | + } 559 | + if (b_info->u.hvm.vgt_fence_sz) { 560 | + flexarray_vappend(dm_args, "-vgt_fence_sz", 561 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_fence_sz), NULL); 562 | + } 563 | + if (b_info->u.hvm.vgt_primary != -1) { 564 | + flexarray_vappend(dm_args, "-vgt_primary", 565 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_primary), NULL); 566 | + } 567 | + if (b_info->u.hvm.vgt_monitor_config_file) { 568 | + flexarray_vappend(dm_args, "-vgt_monitor_config_file", 569 | + libxl__sprintf(gc, "%s", b_info->u.hvm.vgt_monitor_config_file), NULL); 570 | + } 571 | + } else { 572 | + switch (b_info->u.hvm.vga.kind) { 573 | + case LIBXL_VGA_INTERFACE_TYPE_STD: 574 | + flexarray_append(dm_args, "-std-vga"); 575 | + break; 576 | + case LIBXL_VGA_INTERFACE_TYPE_CIRRUS: 577 | + break; 578 | + } 579 | } 580 | 581 | if (b_info->u.hvm.boot) { 582 | @@ -465,19 +489,44 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc, 583 | flexarray_append(dm_args, spiceoptions); 584 | } 585 | 586 | - switch (b_info->u.hvm.vga.kind) { 587 | - case LIBXL_VGA_INTERFACE_TYPE_STD: 588 | - flexarray_vappend(dm_args, "-vga", "std", NULL); 589 | - break; 590 | - case LIBXL_VGA_INTERFACE_TYPE_CIRRUS: 591 | - flexarray_vappend(dm_args, "-vga", "cirrus", NULL); 592 | - if (b_info->video_memkb) { 593 | - flexarray_vappend(dm_args, "-global", 594 | - GCSPRINTF("vga.vram_size_mb=%d", 595 | - libxl__sizekb_to_mb(b_info->video_memkb)), NULL); 596 | + /* TODO: some vga options are exclusive */ 597 | + if (libxl_defbool_val(b_info->u.hvm.vgt)) { 598 | + flexarray_vappend(dm_args, "-vga", "xengt", NULL); 599 | + if (b_info->u.hvm.vgt_low_gm_sz) { 600 | + flexarray_vappend(dm_args, "-vgt_low_gm_sz", 601 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_low_gm_sz), NULL); 602 | + } 603 | + if (b_info->u.hvm.vgt_high_gm_sz) { 604 | + flexarray_vappend(dm_args, "-vgt_high_gm_sz", 605 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_high_gm_sz), NULL); 606 | + } 607 | + if (b_info->u.hvm.vgt_fence_sz) { 608 | + flexarray_vappend(dm_args, "-vgt_fence_sz", 609 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_fence_sz), NULL); 610 | + } 611 | + if (b_info->u.hvm.vgt_primary != -1) { 612 | + flexarray_vappend(dm_args, "-vgt_primary", 613 | + libxl__sprintf(gc, "%d", b_info->u.hvm.vgt_primary), NULL); 614 | } 615 | - break; 616 | - } 617 | + if (b_info->u.hvm.vgt_monitor_config_file) { 618 | + flexarray_vappend(dm_args, "-vgt_monitor_config_file", 619 | + libxl__sprintf(gc, "%s", b_info->u.hvm.vgt_monitor_config_file), NULL); 620 | + } 621 | + } else { 622 | + switch (b_info->u.hvm.vga.kind) { 623 | + case LIBXL_VGA_INTERFACE_TYPE_STD: 624 | + flexarray_vappend(dm_args, "-vga", "std", NULL); 625 | + break; 626 | + case LIBXL_VGA_INTERFACE_TYPE_CIRRUS: 627 | + flexarray_vappend(dm_args, "-vga", "cirrus", NULL); 628 | + if (b_info->video_memkb) { 629 | + flexarray_vappend(dm_args, "-global", 630 | + GCSPRINTF("vga.vram_size_mb=%d", 631 | + libxl__sizekb_to_mb(b_info->video_memkb)), NULL); 632 | + } 633 | + break; 634 | + } 635 | + } 636 | 637 | if (b_info->u.hvm.boot) { 638 | flexarray_vappend(dm_args, "-boot", 639 | diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl 640 | index d218a2d..6af715c 100644 641 | --- a/tools/libxl/libxl_types.idl 642 | +++ b/tools/libxl/libxl_types.idl 643 | @@ -23,6 +23,7 @@ libxl_hwcap = Builtin("hwcap", passby=PASS_BY_REFERENCE) 644 | # 645 | 646 | MemKB = UInt(64, init_val = "LIBXL_MEMKB_DEFAULT") 647 | +VgtInt = UInt(32, init_val = "0UL") 648 | 649 | # 650 | # Constants / Enumerations 651 | @@ -314,12 +315,18 @@ libxl_domain_build_info = Struct("domain_build_info",[ 652 | ("acpi_firmware", string), 653 | ("nographic", libxl_defbool), 654 | ("vga", libxl_vga_interface_info), 655 | + ("vgt", libxl_defbool), 656 | + ("vgt_low_gm_sz", VgtInt), 657 | + ("vgt_high_gm_sz", VgtInt), 658 | + ("vgt_fence_sz", VgtInt), 659 | + ("vgt_primary", VgtInt), 660 | + ("vgt_monitor_config_file", string), 661 | ("vnc", libxl_vnc_info), 662 | # keyboard layout, default is en-us keyboard 663 | ("keymap", string), 664 | ("sdl", libxl_sdl_info), 665 | ("spice", libxl_spice_info), 666 | - 667 | + 668 | ("gfx_passthru", libxl_defbool), 669 | 670 | ("serial", string), 671 | diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c 672 | index f5943a4..7885671 100644 673 | --- a/tools/libxl/xl_cmdimpl.c 674 | +++ b/tools/libxl/xl_cmdimpl.c 675 | @@ -1464,6 +1464,18 @@ skip_vfb: 676 | b_info->u.hvm.vga.kind = l ? LIBXL_VGA_INTERFACE_TYPE_STD : 677 | LIBXL_VGA_INTERFACE_TYPE_CIRRUS; 678 | 679 | + xlu_cfg_get_defbool(config, "vgt", &b_info->u.hvm.vgt, 0); 680 | + if(!xlu_cfg_get_long(config, "vgt_low_gm_sz", &l, 0)) 681 | + b_info->u.hvm.vgt_low_gm_sz = l; 682 | + if(!xlu_cfg_get_long(config, "vgt_high_gm_sz", &l, 0)) 683 | + b_info->u.hvm.vgt_high_gm_sz = l; 684 | + if(!xlu_cfg_get_long(config, "vgt_fence_sz", &l, 0)) 685 | + b_info->u.hvm.vgt_fence_sz = l; 686 | + if(!xlu_cfg_get_long(config, "vgt_primary", &l, 0)) 687 | + b_info->u.hvm.vgt_primary = l; 688 | + else 689 | + b_info->u.hvm.vgt_primary = -1; /* not specified */ 690 | + xlu_cfg_replace_string (config, "vgt_monitor_config_file", &b_info->u.hvm.vgt_monitor_config_file, 0); 691 | xlu_cfg_get_defbool(config, "vnc", &b_info->u.hvm.vnc.enable, 0); 692 | xlu_cfg_replace_string (config, "vnclisten", 693 | &b_info->u.hvm.vnc.listen, 0); 694 | diff --git a/tools/libxl/xl_sxp.c b/tools/libxl/xl_sxp.c 695 | index a16a025..345c6d0 100644 696 | --- a/tools/libxl/xl_sxp.c 697 | +++ b/tools/libxl/xl_sxp.c 698 | @@ -113,6 +113,8 @@ void printf_info_sexp(int domid, libxl_domain_config *d_config) 699 | printf("\t\t\t(stdvga %s)\n", b_info->u.hvm.vga.kind == 700 | LIBXL_VGA_INTERFACE_TYPE_STD ? 701 | "True" : "False"); 702 | + printf("\t\t\t(vgt %s)\n", 703 | + libxl_defbool_to_string(b_info->u.hvm.vgt)); 704 | printf("\t\t\t(vnc %s)\n", 705 | libxl_defbool_to_string(b_info->u.hvm.vnc.enable)); 706 | printf("\t\t\t(vnclisten %s)\n", b_info->u.hvm.vnc.listen); 707 | diff --git a/tools/tests/Makefile b/tools/tests/Makefile 708 | index adeb120..f94bbde 100644 709 | --- a/tools/tests/Makefile 710 | +++ b/tools/tests/Makefile 711 | @@ -5,7 +5,7 @@ CFLAGS += $(CFLAGS_libxenctrl) 712 | LDLIBS += $(LDLIBS_libxenctrl) 713 | 714 | SUBDIRS-y := 715 | -SUBDIRS-$(CONFIG_X86) += mce-test 716 | +#SUBDIRS-$(CONFIG_X86) += mce-test 717 | SUBDIRS-y += mem-sharing 718 | ifeq ($(XEN_TARGET_ARCH),__fixme__) 719 | SUBDIRS-y += regression 720 | diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile 721 | index d502bdf..e8a3c4b 100644 722 | --- a/xen/arch/x86/Makefile 723 | +++ b/xen/arch/x86/Makefile 724 | @@ -58,6 +58,7 @@ obj-y += crash.o 725 | obj-y += tboot.o 726 | obj-y += hpet.o 727 | obj-y += xstate.o 728 | +obj-y += vgt.o 729 | 730 | obj-$(crash_debug) += gdbstub.o 731 | 732 | diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c 733 | index f41f0de..d36b93f 100644 734 | --- a/xen/arch/x86/acpi/power.c 735 | +++ b/xen/arch/x86/acpi/power.c 736 | @@ -30,6 +30,7 @@ 737 | #include 738 | #include 739 | #include 740 | +#include 741 | 742 | uint32_t system_reset_counter = 1; 743 | 744 | @@ -73,6 +74,8 @@ static void device_power_up(void) 745 | time_resume(); 746 | 747 | console_resume(); 748 | + 749 | + vgt_resume(); 750 | } 751 | 752 | static void freeze_domains(void) 753 | diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c 754 | index 146fb9f..5b6f8fb 100644 755 | --- a/xen/arch/x86/domain.c 756 | +++ b/xen/arch/x86/domain.c 757 | @@ -60,6 +60,7 @@ 758 | #include 759 | #include 760 | #include 761 | +#include 762 | 763 | DEFINE_PER_CPU(struct vcpu *, curr_vcpu); 764 | DEFINE_PER_CPU(unsigned long, cr4); 765 | @@ -469,6 +470,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) 766 | (domcr_flags & DOMCRF_hap); 767 | d->arch.hvm_domain.mem_sharing_enabled = 0; 768 | 769 | + d->arch.hvm_domain.vgt_enabled = 0; 770 | + 771 | d->arch.s3_integrity = !!(domcr_flags & DOMCRF_s3_integrity); 772 | 773 | INIT_LIST_HEAD(&d->arch.pdev_list); 774 | @@ -532,6 +535,8 @@ int arch_domain_create(struct domain *d, unsigned int domcr_flags) 775 | 776 | d->arch.ioport_caps = 777 | rangeset_new(d, "I/O Ports", RANGESETF_prettyprint_hex); 778 | + d->arch.ioport_forwarding_caps = 779 | + rangeset_new(d, "I/O Ports Self-forwarding", RANGESETF_prettyprint_hex); 780 | rc = -ENOMEM; 781 | if ( d->arch.ioport_caps == NULL ) 782 | goto fail; 783 | @@ -1012,6 +1017,48 @@ arch_do_vcpu_op( 784 | break; 785 | } 786 | 787 | + case VCPUOP_request_io_emulation: 788 | + { 789 | + struct vcpu_emul_ioreq ioreq; 790 | + 791 | + rc = -EFAULT; 792 | + if (dom0 != v->domain) /* only for dom0 */ 793 | + break; 794 | + if ( copy_from_guest(&ioreq, arg, 1) ) 795 | + break; 796 | + 797 | + rc = -EINVAL; 798 | + if ( !hypercall_io_emulation(v, &ioreq) ) 799 | + break; 800 | + 801 | + rc = -EFAULT; 802 | + if ( copy_to_guest(arg, &ioreq, 1) ) 803 | + break; 804 | + rc = 0; 805 | + break; 806 | + } 807 | + 808 | + case VCPUOP_get_sysdata: 809 | + { 810 | + struct vcpu_sysdata_request req; 811 | + 812 | + rc = -EFAULT; 813 | + if (dom0 != v->domain) /* only for dom0 */ 814 | + break; 815 | + if ( copy_from_guest(&req, arg, 1) ) 816 | + break; 817 | + 818 | + rc = -EINVAL; 819 | + if ( !hypercall_get_sysdata(v, &req) ) 820 | + break; 821 | + 822 | + rc = -EFAULT; 823 | + if ( copy_to_guest(arg, &req, 1) ) 824 | + break; 825 | + rc = 0; 826 | + break; 827 | + } 828 | + 829 | default: 830 | rc = -ENOSYS; 831 | break; 832 | diff --git a/xen/arch/x86/domain_build.c b/xen/arch/x86/domain_build.c 833 | index 8365f32..ef91981 100644 834 | --- a/xen/arch/x86/domain_build.c 835 | +++ b/xen/arch/x86/domain_build.c 836 | @@ -35,6 +35,7 @@ 837 | #include 838 | #include /* for bzimage_parse */ 839 | #include 840 | +#include 841 | 842 | #include 843 | 844 | diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c 845 | index e75918a..f0bd9ee 100644 846 | --- a/xen/arch/x86/domctl.c 847 | +++ b/xen/arch/x86/domctl.c 848 | @@ -35,6 +35,7 @@ 849 | #include 850 | #include 851 | #include 852 | +#include 853 | 854 | static int gdbsx_guest_mem_io( 855 | domid_t domid, struct xen_domctl_gdbsx_memio *iop) 856 | @@ -641,7 +642,8 @@ long arch_do_domctl( 857 | if ( ret ) 858 | break; 859 | 860 | - if ( add ) 861 | + ret=0; 862 | + if ( add == DPCI_ADD_MAPPING ) 863 | { 864 | printk(XENLOG_G_INFO 865 | "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n", 866 | @@ -1223,6 +1225,19 @@ long arch_do_domctl( 867 | } 868 | break; 869 | 870 | + case XEN_DOMCTL_vgt_io_trap: 871 | + { 872 | + struct xen_domctl_vgt_io_trap *info = &domctl->u.vgt_io_trap; 873 | + 874 | + if (!info->n_pio || !info->n_mmio) 875 | + copyback = 1; 876 | + 877 | + ret = vgt_io_trap(d, info); 878 | + if (ret) 879 | + break; 880 | + } 881 | + break; 882 | + 883 | default: 884 | ret = iommu_do_domctl(domctl, d, u_domctl); 885 | break; 886 | diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile 887 | index eea5555..765a09f 100644 888 | --- a/xen/arch/x86/hvm/Makefile 889 | +++ b/xen/arch/x86/hvm/Makefile 890 | @@ -22,4 +22,5 @@ obj-y += vlapic.o 891 | obj-y += vmsi.o 892 | obj-y += vpic.o 893 | obj-y += vpt.o 894 | -obj-y += vpmu.o 895 | \ No newline at end of file 896 | +obj-y += vpmu.o 897 | +obj-y += vgt.o 898 | diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c 899 | index b206997..44877fa 100644 900 | --- a/xen/arch/x86/hvm/emulate.c 901 | +++ b/xen/arch/x86/hvm/emulate.c 902 | @@ -61,6 +61,7 @@ static int hvmemul_do_io( 903 | unsigned long ram_gfn = paddr_to_pfn(ram_gpa); 904 | p2m_type_t p2mt; 905 | struct page_info *ram_page; 906 | + int do_mmio_split = (is_mmio && !value_is_ptr && size > sizeof(p->data)); 907 | int rc; 908 | 909 | /* Check for paged out page */ 910 | @@ -83,25 +84,62 @@ static int hvmemul_do_io( 911 | * Weird-sized accesses have undefined behaviour: we discard writes 912 | * and read all-ones. 913 | */ 914 | - if ( unlikely((size > sizeof(long)) || (size & (size - 1))) ) 915 | + if ( unlikely((!do_mmio_split && size > sizeof(long)) 916 | + || (do_mmio_split && size > MAX_INS_EMULATE_MMIO_SIZE) 917 | + || (size & (size - 1))) ) 918 | { 919 | gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size); 920 | ASSERT(p_data != NULL); /* cannot happen with a REP prefix */ 921 | if ( dir == IOREQ_READ ) 922 | memset(p_data, ~0, size); 923 | - if ( ram_page ) 924 | - put_page(ram_page); 925 | - return X86EMUL_UNHANDLEABLE; 926 | + goto out_unhandleable; 927 | + } 928 | + 929 | + vio = &curr->arch.hvm_vcpu.hvm_io; 930 | + 931 | + if ( unlikely(do_mmio_split) ) 932 | + { 933 | + if ( !vio->mmio_split ) 934 | + { 935 | + /* Beginning of a split MMIO emulation. */ 936 | + if ( vio->io_state != HVMIO_none ) 937 | + { 938 | + gdprintk(XENLOG_WARNING, "Split MMIO emulation:\n"); 939 | + gdprintk(XENLOG_WARNING, "Invalid IO state when trying to begin a split MMIO emulation.\n"); 940 | + goto out_unhandleable; 941 | + } 942 | + 943 | + vio->mmio_split = 1; 944 | + 945 | + vio->mmio_split_pa = addr; 946 | + vio->mmio_split_size = sizeof(p->data); 947 | + vio->mmio_split_done_size = 0; 948 | + vio->mmio_split_dir = dir; 949 | + 950 | + /* Load write buffer. */ 951 | + if ( dir == IOREQ_WRITE ) 952 | + memcpy(vio->mmio_split_buf, p_data, vio->io_size); 953 | + } 954 | } 955 | 956 | if ( (p_data != NULL) && (dir == IOREQ_WRITE) ) 957 | { 958 | - memcpy(&value, p_data, size); 959 | + /* 960 | + * Enter here at: 961 | + * Beginning of a split MMIO emulation 962 | + * -> Load value from write buffer for the first round ioreq_t, 963 | + * End of a split MMIO emulation 964 | + * -> Do nothing. 965 | + */ 966 | + if ( unlikely(vio->mmio_split && vio->mmio_split_done_size != vio->io_size) ) 967 | + memcpy(&value, vio->mmio_split_buf + vio->mmio_split_done_size, 968 | + vio->mmio_split_size); 969 | + else 970 | + memcpy(&value, p_data, size); 971 | + 972 | p_data = NULL; 973 | } 974 | 975 | - vio = &curr->arch.hvm_vcpu.hvm_io; 976 | - 977 | if ( is_mmio && !value_is_ptr ) 978 | { 979 | /* Part of a multi-cycle read or write? */ 980 | @@ -137,7 +175,7 @@ static int hvmemul_do_io( 981 | break; 982 | case HVMIO_completed: 983 | vio->io_state = HVMIO_none; 984 | - if ( p_data == NULL ) 985 | + if ( p_data == NULL && !vio->mmio_split ) 986 | { 987 | if ( ram_page ) 988 | put_page(ram_page); 989 | @@ -173,14 +211,30 @@ static int hvmemul_do_io( 990 | (p_data == NULL) ? HVMIO_dispatched : HVMIO_awaiting_completion; 991 | vio->io_size = size; 992 | 993 | + if ( !vio->mmio_split ) 994 | + { 995 | + p->data_is_ptr = value_is_ptr; 996 | + p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; 997 | + p->size = size; 998 | + p->addr = addr; 999 | + p->count = *reps; 1000 | + } 1001 | + else 1002 | + { 1003 | + /* 1004 | + * The first ioreq_t of a split MMIO emulation. 1005 | + */ 1006 | + p->data_is_ptr = 0; 1007 | + p->type = IOREQ_TYPE_COPY; 1008 | + p->size = vio->mmio_split_size; 1009 | + p->addr = vio->mmio_split_pa; 1010 | + p->count = 1; 1011 | + } 1012 | + 1013 | p->dir = dir; 1014 | - p->data_is_ptr = value_is_ptr; 1015 | - p->type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO; 1016 | - p->size = size; 1017 | - p->addr = addr; 1018 | - p->count = *reps; 1019 | p->df = df; 1020 | p->data = value; 1021 | + p->is_vgt = 0; 1022 | 1023 | if ( dir == IOREQ_WRITE ) 1024 | hvmtrace_io_assist(is_mmio, p); 1025 | @@ -209,7 +263,7 @@ static int hvmemul_do_io( 1026 | rc = X86EMUL_RETRY; 1027 | if ( !hvm_send_assist_req(curr) ) 1028 | vio->io_state = HVMIO_none; 1029 | - else if ( p_data == NULL ) 1030 | + else if ( p_data == NULL && !vio->mmio_split ) 1031 | rc = X86EMUL_OKAY; 1032 | break; 1033 | default: 1034 | @@ -227,8 +281,30 @@ static int hvmemul_do_io( 1035 | if ( dir == IOREQ_READ ) 1036 | hvmtrace_io_assist(is_mmio, p); 1037 | 1038 | - if ( p_data != NULL ) 1039 | - memcpy(p_data, &vio->io_data, size); 1040 | + if ( unlikely(vio->mmio_split) ) 1041 | + { 1042 | + if ( vio->io_size != vio->mmio_split_done_size ) 1043 | + { 1044 | + gdprintk(XENLOG_WARNING, "Split MMIO emulation:\n"); 1045 | + gdprintk(XENLOG_WARNING, "Finish a uncompleted split MMIO emulation.\n"); 1046 | + gdprintk(XENLOG_WARNING, "vio->io_size %d, vio->mmio_split_done_size %d.\n", 1047 | + vio->io_size, vio->mmio_split_done_size); 1048 | + 1049 | + vio->mmio_split = 0; 1050 | + goto out_unhandleable; 1051 | + } 1052 | + 1053 | + if ( vio->mmio_split_dir == IOREQ_READ ) 1054 | + memcpy(p_data, vio->mmio_split_buf, size); 1055 | + 1056 | + /* This is the end of a split MMIO emulation. */ 1057 | + vio->mmio_split = 0; 1058 | + } 1059 | + else 1060 | + { 1061 | + if ( p_data != NULL ) 1062 | + memcpy(p_data, &vio->io_data, size); 1063 | + } 1064 | 1065 | if ( is_mmio && !value_is_ptr ) 1066 | { 1067 | @@ -261,6 +337,11 @@ static int hvmemul_do_io( 1068 | if ( ram_page ) 1069 | put_page(ram_page); 1070 | return X86EMUL_OKAY; 1071 | + 1072 | +out_unhandleable: 1073 | + if ( ram_page ) 1074 | + put_page(ram_page); 1075 | + return X86EMUL_UNHANDLEABLE; 1076 | } 1077 | 1078 | int hvmemul_do_pio( 1079 | diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c 1080 | index 4cf503b..e0700f9 100644 1081 | --- a/xen/arch/x86/hvm/hvm.c 1082 | +++ b/xen/arch/x86/hvm/hvm.c 1083 | @@ -59,6 +59,7 @@ 1084 | #include 1085 | #include 1086 | #include 1087 | +#include 1088 | #include 1089 | #include 1090 | #include 1091 | @@ -357,7 +358,8 @@ void hvm_do_resume(struct vcpu *v) 1092 | break; 1093 | case STATE_IOREQ_READY: /* IOREQ_{READY,INPROCESS} -> IORESP_READY */ 1094 | case STATE_IOREQ_INPROCESS: 1095 | - wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port, 1096 | + wait_on_xen_event_channel(p->is_vgt ? v->arch.hvm_vcpu.vgt_port : 1097 | + v->arch.hvm_vcpu.xen_port, 1098 | (p->state != STATE_IOREQ_READY) && 1099 | (p->state != STATE_IOREQ_INPROCESS)); 1100 | break; 1101 | @@ -524,6 +526,7 @@ int hvm_domain_initialise(struct domain *d) 1102 | spin_lock_init(&d->arch.hvm_domain.pbuf_lock); 1103 | spin_lock_init(&d->arch.hvm_domain.irq_lock); 1104 | spin_lock_init(&d->arch.hvm_domain.uc_lock); 1105 | + spin_lock_init(&d->arch.hvm_domain.vgt_wp_hash_lock); 1106 | 1107 | INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list); 1108 | spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock); 1109 | @@ -612,6 +615,7 @@ void hvm_domain_destroy(struct domain *d) 1110 | rtc_deinit(d); 1111 | stdvga_deinit(d); 1112 | vioapic_deinit(d); 1113 | + vgt_hvm_deinit(d); 1114 | hvm_destroy_cacheattr_region_list(d); 1115 | } 1116 | 1117 | @@ -1094,6 +1098,119 @@ static int __init __hvm_register_CPU_XSAVE_save_and_restore(void) 1118 | } 1119 | __initcall(__hvm_register_CPU_XSAVE_save_and_restore); 1120 | 1121 | +static int hvm_mmio_split_check_status(struct vcpu *v) 1122 | +{ 1123 | + struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io; 1124 | + ioreq_t *p = get_ioreq(v); 1125 | + 1126 | + if ( p->state != STATE_IORESP_READY ) 1127 | + { 1128 | + gdprintk(XENLOG_WARNING, "The state of ioreq isn't STATE_IORESP_READY.\n"); 1129 | + goto invalid; 1130 | + } 1131 | + 1132 | + if ( p->dir != vio->mmio_split_dir ) 1133 | + { 1134 | + gdprintk(XENLOG_WARNING, "The direction of ioreq isn't same as mmio_split_dir.\n"); 1135 | + goto invalid; 1136 | + } 1137 | + 1138 | + if ( p->data_is_ptr 1139 | + || p->type != IOREQ_TYPE_COPY 1140 | + || p->size != vio->mmio_split_size 1141 | + || p->count != 1 ) 1142 | + { 1143 | + gdprintk(XENLOG_WARNING, "The configuration of ioreq is invalid.\n"); 1144 | + goto invalid; 1145 | + } 1146 | + 1147 | + if ( !test_bit(_VPF_blocked_in_xen, &v->pause_flags) ) 1148 | + { 1149 | + gdprintk(XENLOG_WARNING, "The state of target VCPU isn't _VPF_blocked_in_xen.\n"); 1150 | + goto invalid; 1151 | + } 1152 | + 1153 | + return 1; 1154 | + 1155 | +invalid: 1156 | + return 0; 1157 | +} 1158 | + 1159 | +static void hvm_io_notification_fn(struct vcpu *v, unsigned int port) 1160 | +{ 1161 | + struct hvm_vcpu_io *vio = &v->arch.hvm_vcpu.hvm_io; 1162 | + struct hvm_ioreq_page *iorp = &v->domain->arch.hvm_domain.ioreq; 1163 | + ioreq_t *p; 1164 | + 1165 | + int mmio_split_finished; 1166 | + 1167 | + if ( !vio->mmio_split ) 1168 | + { 1169 | + /* Consumer needs notification only if blocked. */ 1170 | + if ( test_and_clear_bit(_VPF_blocked_in_xen, &v->pause_flags) ) 1171 | + vcpu_wake(v); 1172 | + 1173 | + return; 1174 | + } 1175 | + 1176 | + spin_lock(&iorp->lock); 1177 | + 1178 | + p = get_ioreq(v); 1179 | + 1180 | + if ( !hvm_mmio_split_check_status(v) ) 1181 | + { 1182 | + gdprintk(XENLOG_WARNING, "The status of split MMIO is invalid.\n"); 1183 | + vio->mmio_split = 0; 1184 | + 1185 | + goto out_unlock; 1186 | + } 1187 | + 1188 | + if ( vio->mmio_split_dir == IOREQ_READ ) 1189 | + { 1190 | + memcpy(vio->mmio_split_buf + vio->mmio_split_done_size, 1191 | + &p->data, vio->mmio_split_size); 1192 | + 1193 | + vio->mmio_split_done_size += vio->mmio_split_size; 1194 | + 1195 | + mmio_split_finished = (vio->mmio_split_done_size == vio->io_size); 1196 | + } 1197 | + else 1198 | + { 1199 | + vio->mmio_split_done_size += vio->mmio_split_size; 1200 | + 1201 | + mmio_split_finished = (vio->mmio_split_done_size == vio->io_size); 1202 | + 1203 | + if ( !mmio_split_finished ) 1204 | + memcpy(&p->data, vio->mmio_split_buf + vio->mmio_split_done_size, 1205 | + vio->mmio_split_size); 1206 | + } 1207 | + 1208 | + if ( !mmio_split_finished ) 1209 | + { 1210 | + /* Trigger next round ioreq_t. */ 1211 | + p->state = STATE_IOREQ_READY; 1212 | + 1213 | + p->addr = vio->mmio_split_pa + vio->mmio_split_done_size; 1214 | + 1215 | + spin_unlock(&iorp->lock); 1216 | + 1217 | + __notify_via_xen_event_channel(v->domain, port); 1218 | + 1219 | + return; 1220 | + } 1221 | + else 1222 | + { 1223 | + /* Consumer needs notification only if blocked. */ 1224 | + if ( test_and_clear_bit(_VPF_blocked_in_xen, &v->pause_flags) ) 1225 | + vcpu_wake(v); 1226 | + } 1227 | + 1228 | +out_unlock: 1229 | + spin_unlock(&iorp->lock); 1230 | + 1231 | + return; 1232 | +} 1233 | + 1234 | int hvm_vcpu_initialise(struct vcpu *v) 1235 | { 1236 | int rc; 1237 | @@ -1113,13 +1230,21 @@ int hvm_vcpu_initialise(struct vcpu *v) 1238 | goto fail3; 1239 | 1240 | /* Create ioreq event channel. */ 1241 | - rc = alloc_unbound_xen_event_channel(v, dm_domid, NULL); 1242 | + rc = alloc_unbound_xen_event_channel(v, dm_domid, hvm_io_notification_fn); 1243 | if ( rc < 0 ) 1244 | goto fail4; 1245 | 1246 | /* Register ioreq event channel. */ 1247 | v->arch.hvm_vcpu.xen_port = rc; 1248 | 1249 | + /* Create ioreq vgt event channel. */ 1250 | + rc = alloc_unbound_xen_event_channel(v, 0, hvm_io_notification_fn); 1251 | + if ( rc < 0 ) 1252 | + goto fail4; 1253 | + 1254 | + /* Register ioreq vgt event channel. */ 1255 | + v->arch.hvm_vcpu.vgt_port = rc; 1256 | + 1257 | if ( v->vcpu_id == 0 ) 1258 | { 1259 | /* Create bufioreq event channel. */ 1260 | @@ -1131,7 +1256,10 @@ int hvm_vcpu_initialise(struct vcpu *v) 1261 | 1262 | spin_lock(&d->arch.hvm_domain.ioreq.lock); 1263 | if ( d->arch.hvm_domain.ioreq.va != NULL ) 1264 | + { 1265 | get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; 1266 | + get_ioreq(v)->vgt_eport = v->arch.hvm_vcpu.vgt_port; 1267 | + } 1268 | spin_unlock(&d->arch.hvm_domain.ioreq.lock); 1269 | 1270 | spin_lock_init(&v->arch.hvm_vcpu.tm_lock); 1271 | @@ -1225,6 +1353,7 @@ void hvm_vcpu_down(struct vcpu *v) 1272 | bool_t hvm_send_assist_req(struct vcpu *v) 1273 | { 1274 | ioreq_t *p; 1275 | + int port; 1276 | 1277 | if ( unlikely(!vcpu_start_shutdown_deferral(v)) ) 1278 | return 0; /* implicitly bins the i/o operation */ 1279 | @@ -1238,14 +1367,16 @@ bool_t hvm_send_assist_req(struct vcpu *v) 1280 | return 0; 1281 | } 1282 | 1283 | - prepare_wait_on_xen_event_channel(v->arch.hvm_vcpu.xen_port); 1284 | + port = p->is_vgt ? v->arch.hvm_vcpu.vgt_port : v->arch.hvm_vcpu.xen_port; 1285 | + 1286 | + prepare_wait_on_xen_event_channel(port); 1287 | 1288 | /* 1289 | * Following happens /after/ blocking and setting up ioreq contents. 1290 | * prepare_wait_on_xen_event_channel() is an implicit barrier. 1291 | */ 1292 | p->state = STATE_IOREQ_READY; 1293 | - notify_via_xen_event_channel(v->domain, v->arch.hvm_vcpu.xen_port); 1294 | + notify_via_xen_event_channel(v->domain, port); 1295 | 1296 | return 1; 1297 | } 1298 | @@ -1458,7 +1589,8 @@ int hvm_hap_nested_page_fault(paddr_t gpa, 1299 | * to the mmio handler. 1300 | */ 1301 | if ( (p2mt == p2m_mmio_dm) || 1302 | - (access_w && (p2mt == p2m_ram_ro)) ) 1303 | + (access_w && (p2mt == p2m_ram_ro)) || 1304 | + (p2mt == p2m_writeprotection) ) 1305 | { 1306 | put_gfn(p2m->domain, gfn); 1307 | if ( !handle_mmio() ) 1308 | @@ -3819,7 +3951,10 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) 1309 | if ( iorp->va != NULL ) 1310 | /* Initialise evtchn port info if VCPUs already created. */ 1311 | for_each_vcpu ( d, v ) 1312 | + { 1313 | get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; 1314 | + get_ioreq(v)->vgt_eport = v->arch.hvm_vcpu.vgt_port; 1315 | + } 1316 | spin_unlock(&iorp->lock); 1317 | break; 1318 | case HVM_PARAM_BUFIOREQ_PFN: 1319 | @@ -3898,9 +4033,17 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) 1320 | if ( rc ) 1321 | break; 1322 | 1323 | + rc = hvm_replace_event_channel(v, a.value, 1324 | + &v->arch.hvm_vcpu.vgt_port); 1325 | + if ( rc ) 1326 | + break; 1327 | + 1328 | spin_lock(&iorp->lock); 1329 | if ( iorp->va != NULL ) 1330 | + { 1331 | get_ioreq(v)->vp_eport = v->arch.hvm_vcpu.xen_port; 1332 | + get_ioreq(v)->vgt_eport = v->arch.hvm_vcpu.vgt_port; 1333 | + } 1334 | spin_unlock(&iorp->lock); 1335 | } 1336 | domain_unpause(d); 1337 | @@ -4398,6 +4541,84 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) 1338 | break; 1339 | } 1340 | 1341 | + case HVMOP_vgt_wp_pages: { 1342 | + xen_hvm_vgt_wp_pages_t wp; 1343 | + struct domain *d; 1344 | + 1345 | + if ( copy_from_guest(&wp, arg, 1 ) ) 1346 | + return -EFAULT; 1347 | + 1348 | + if ( wp.nr_pages >= MAX_WP_BATCH_PAGES ) 1349 | + return -EFAULT; 1350 | + 1351 | + d = rcu_lock_domain_by_any_id(wp.domid); 1352 | + if ( d == NULL ) 1353 | + return -ESRCH; 1354 | + 1355 | + if ( !is_hvm_domain(d) ){ 1356 | + rc = -EINVAL; 1357 | + } 1358 | + else { 1359 | + rc = hap_write_protect_pages(d, wp.wp_pages, wp.nr_pages, wp.set); 1360 | + } 1361 | + rcu_unlock_domain(d); 1362 | + 1363 | + break; 1364 | + } 1365 | + 1366 | + case HVMOP_vgt_map_mmio: { 1367 | + xen_hvm_vgt_map_mmio_t memmap; 1368 | + struct domain *d; 1369 | + 1370 | + if ( copy_from_guest(&memmap, arg, 1 ) ) 1371 | + return -EFAULT; 1372 | + 1373 | + d = rcu_lock_domain_by_id(memmap.domid); 1374 | + if ( d == NULL ) 1375 | + return -ESRCH; 1376 | + 1377 | + if ( !is_hvm_domain(d) ){ 1378 | + rc = -EINVAL; 1379 | + goto param_fail_vgt_map_mmio; 1380 | + } 1381 | + 1382 | + if ( memmap.map ) 1383 | + vgt_domctl_add_range(d, memmap.first_gfn, 1384 | + memmap.first_mfn, memmap.nr_mfns); 1385 | + else 1386 | + vgt_domctl_remove_range(d, memmap.first_gfn, 1387 | + memmap.first_mfn, memmap.nr_mfns); 1388 | + 1389 | + param_fail_vgt_map_mmio: 1390 | + rcu_unlock_domain(d); 1391 | + 1392 | + break; 1393 | + } 1394 | + 1395 | + case HVMOP_vgt_enable: { 1396 | + xen_hvm_vgt_enable_t vgt_enable; 1397 | + struct domain *d; 1398 | + 1399 | + if ( copy_from_guest(&vgt_enable, arg, 1 ) ) 1400 | + return -EFAULT; 1401 | + 1402 | + d = rcu_lock_domain_by_any_id(vgt_enable.domid); 1403 | + if ( d == NULL ) 1404 | + return -ESRCH; 1405 | + 1406 | + if ( !is_hvm_domain(d) ){ 1407 | + rc = -EINVAL; 1408 | + goto param_fail_vgt_enable; 1409 | + } 1410 | + 1411 | + rc = vgt_hvm_init(d); 1412 | + 1413 | + param_fail_vgt_enable: 1414 | + rcu_unlock_domain(d); 1415 | + 1416 | + break; 1417 | + } 1418 | + 1419 | case HVMOP_xentrace: { 1420 | xen_hvm_xentrace_t tr; 1421 | 1422 | diff --git a/xen/arch/x86/hvm/intercept.c b/xen/arch/x86/hvm/intercept.c 1423 | index 5bb1c17..d13cae4 100644 1424 | --- a/xen/arch/x86/hvm/intercept.c 1425 | +++ b/xen/arch/x86/hvm/intercept.c 1426 | @@ -22,6 +22,7 @@ 1427 | #include 1428 | #include 1429 | #include 1430 | +#include 1431 | #include 1432 | #include 1433 | #include 1434 | @@ -39,7 +40,9 @@ hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] = 1435 | &vlapic_mmio_handler, 1436 | &vioapic_mmio_handler, 1437 | &msixtbl_mmio_handler, 1438 | - &iommu_mmio_handler 1439 | + &iommu_mmio_handler, 1440 | + &writeprotection_handler, /* write protection to guest pages */ 1441 | + &vgt_mmio_handler 1442 | }; 1443 | 1444 | static int hvm_mmio_access(struct vcpu *v, 1445 | @@ -224,6 +227,12 @@ int hvm_io_intercept(ioreq_t *p, int type) 1446 | int i; 1447 | unsigned long addr, size; 1448 | 1449 | + /* 1450 | + * vGT memory access is high frequency, so it needs to be 1451 | + * in fast path. We add hook here before the io chain 1452 | + * walk. But 0xcf8/cfc is not in this list. 1453 | + */ 1454 | + 1455 | if ( type == HVM_PORTIO ) 1456 | { 1457 | int rc = dpci_ioport_intercept(p); 1458 | diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c 1459 | index 5f5009a..6d6d434 100644 1460 | --- a/xen/arch/x86/hvm/io.c 1461 | +++ b/xen/arch/x86/hvm/io.c 1462 | @@ -284,6 +284,15 @@ void hvm_io_assist(void) 1463 | memcpy(&guest_cpu_user_regs()->eax, 1464 | &p->data, vio->io_size); 1465 | break; 1466 | + case HVMIO_dispatched: 1467 | + if ( vio->mmio_split ) 1468 | + { 1469 | + /* Emulate and finish split MMIO write. */ 1470 | + vio->io_state = HVMIO_completed; 1471 | + vio->io_data = p->data; 1472 | + (void)handle_mmio(); 1473 | + } 1474 | + break; 1475 | default: 1476 | break; 1477 | } 1478 | diff --git a/xen/arch/x86/hvm/vgt.c b/xen/arch/x86/hvm/vgt.c 1479 | new file mode 100644 1480 | index 0000000..61cb09b 1481 | --- /dev/null 1482 | +++ b/xen/arch/x86/hvm/vgt.c 1483 | @@ -0,0 +1,234 @@ 1484 | +/* 1485 | + * vgt.c: code to trace the vGT MMIO access in HVM domain 1486 | + * 1487 | + * Copyright (c) 2011, Intel Corporation. 1488 | + * 1489 | + * This program is free software; you can redistribute it and/or modify it 1490 | + * under the terms and conditions of the GNU General Public License, 1491 | + * version 2, as published by the Free Software Foundation. 1492 | + * 1493 | + * This program is distributed in the hope it will be useful, but WITHOUT 1494 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1495 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 1496 | + * more details. 1497 | + * 1498 | + * You should have received a copy of the GNU General Public License along with 1499 | + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 1500 | + * Place - Suite 330, Boston, MA 02111-1307 USA. 1501 | + */ 1502 | + 1503 | +#include 1504 | +#include 1505 | +#include 1506 | +#include 1507 | +#include 1508 | +#include 1509 | +#include 1510 | +#include 1511 | +#include 1512 | +#include 1513 | +#include 1514 | +#include 1515 | +#include 1516 | +#include 1517 | +#include 1518 | +#include 1519 | +#include 1520 | +#include 1521 | +#include 1522 | +#include 1523 | + 1524 | +static int vgt_mmio_range(struct vcpu *v, unsigned long addr) 1525 | +{ 1526 | + return vgt_enabled(v->domain) && 1527 | + rangeset_contains_singleton( 1528 | + v->domain->iomem_forward_caps, addr); 1529 | +} 1530 | + 1531 | +static int _vgt_mmio_read_write(struct vcpu *v) 1532 | +{ 1533 | + get_ioreq(v)->is_vgt = 1; 1534 | + 1535 | + return X86EMUL_UNHANDLEABLE; /* notify dom0 vGT driver to handle */ 1536 | +} 1537 | + 1538 | +static int vgt_mmio_read( 1539 | + struct vcpu *v, unsigned long gpa, 1540 | + unsigned long bytes, unsigned long *p_data) 1541 | +{ 1542 | + return _vgt_mmio_read_write(v); 1543 | +} 1544 | + 1545 | +static int vgt_mmio_write(struct vcpu *v, unsigned long gpa, 1546 | + unsigned long bytes, unsigned long data) 1547 | +{ 1548 | + return _vgt_mmio_read_write(v); 1549 | +} 1550 | + 1551 | +const struct hvm_mmio_handler vgt_mmio_handler = { 1552 | + .check_handler = vgt_mmio_range, 1553 | + .read_handler = vgt_mmio_read, 1554 | + .write_handler = vgt_mmio_write 1555 | +}; 1556 | + 1557 | +/* 1558 | + * Check the fault address to see if it is from writeprotection pages. 1559 | + * Return 1: Yes, 0: No. 1560 | + */ 1561 | +static int writeprotecion_page(struct vcpu *v, unsigned long addr) 1562 | +{ 1563 | + unsigned long gpfn = addr >> PAGE_SHIFT; 1564 | + struct wp_hash_table *he; 1565 | + 1566 | + if (!vgt_enabled(v->domain)) 1567 | + return 0; 1568 | + 1569 | + spin_lock(&wp_htable_lock(v->domain)); 1570 | + he = lookup_wp_hash_table(wp_htable(v->domain), gpfn); 1571 | + spin_unlock(&wp_htable_lock(v->domain)); 1572 | + 1573 | + return (he != NULL); 1574 | +} 1575 | + 1576 | +const struct hvm_mmio_handler writeprotection_handler = { 1577 | + .check_handler = writeprotecion_page, 1578 | + .read_handler = vgt_mmio_read, 1579 | + .write_handler = vgt_mmio_write 1580 | +}; 1581 | + 1582 | + 1583 | +int vgt_wp_hash_add(struct wp_hash_table *wp_ht, unsigned long gpfn) 1584 | +{ 1585 | + int index = wp_hash(gpfn); 1586 | + struct wp_hash_table *ne; 1587 | + 1588 | + if ( lookup_wp_hash_table(wp_ht, gpfn) != NULL ) 1589 | + return -EINVAL; 1590 | + if (wp_ht[index].gpfn == WP_INVALID_GPFN) 1591 | + wp_ht[index].gpfn = gpfn; 1592 | + else { 1593 | + /* self-automicity */ 1594 | + ne = xmalloc_bytes(sizeof(wp_ht[0])); 1595 | + if ( ne == NULL ) 1596 | + return -ENOMEM; 1597 | + ne->next = wp_ht[index].next; 1598 | + ne->gpfn = gpfn; 1599 | + wp_ht[index].next = ne; 1600 | + } 1601 | + return 0; 1602 | +} 1603 | + 1604 | +int vgt_wp_hash_rem(struct wp_hash_table *wp_ht, unsigned long gpfn) 1605 | +{ 1606 | + int index = wp_hash(gpfn); 1607 | + struct wp_hash_table *next, *prev; 1608 | + 1609 | + if (wp_ht[index].gpfn == gpfn) { 1610 | + wp_ht[index].gpfn = WP_INVALID_GPFN; 1611 | + } 1612 | + else { 1613 | + prev = &wp_ht[index]; 1614 | + while (1) { 1615 | + next=prev->next; 1616 | + if ( next == NULL ) { 1617 | + printk("vgt_wp_hash_rem hash_table %p remove %lx not found\n", 1618 | + wp_ht, gpfn); 1619 | + return -EINVAL; 1620 | + } 1621 | + if (next->gpfn == gpfn) { 1622 | + /* self-automicity */ 1623 | + prev->next = next->next; 1624 | + xfree (next); 1625 | + break; 1626 | + } 1627 | + prev = next; 1628 | + } 1629 | + } 1630 | + return 0; 1631 | +} 1632 | + 1633 | +struct wp_hash_table *lookup_wp_hash_table( 1634 | + struct wp_hash_table *wp_ht, 1635 | + unsigned long gpfn) 1636 | +{ 1637 | + int index = wp_hash(gpfn); 1638 | + struct wp_hash_table *entry; 1639 | + 1640 | + for ( entry=&wp_ht[index]; entry!=NULL; entry=entry->next) { 1641 | + if (entry->gpfn == gpfn) 1642 | + break; 1643 | + } 1644 | + return entry; 1645 | +} 1646 | + 1647 | +static void free_vgt_wp_hash_chain(struct wp_hash_table *chain) 1648 | +{ 1649 | + struct wp_hash_table *p = chain; 1650 | + struct wp_hash_table *n; 1651 | + 1652 | + while (p) { 1653 | + n = p->next; 1654 | + xfree (p); 1655 | + p = n; 1656 | + } 1657 | +} 1658 | + 1659 | +void free_vgt_wp_hash(struct wp_hash_table *wp_ht) 1660 | +{ 1661 | + int i; 1662 | + 1663 | + for ( i = 0; i < WP_HASH_SIZE; i++ ) 1664 | + if ( wp_ht[i].next != NULL ) 1665 | + free_vgt_wp_hash_chain (wp_ht[i].next); 1666 | +} 1667 | + 1668 | +/* ret: 1669 | + 0 - success 1670 | + negative - non success 1671 | +*/ 1672 | +int vgt_domctl_add_range(struct domain *d, unsigned long gfn, 1673 | + unsigned long mfn, unsigned long nr_mfns) 1674 | +{ 1675 | + unsigned long hva; 1676 | + int ret, i; 1677 | + 1678 | + printk("DPCI_ADD_MAPPING_VGT : domid=%d gfn=0x%lx mfn=0x%lx nr_mfns=0x%lx\n", 1679 | + d->domain_id, gfn, mfn, nr_mfns ); 1680 | + 1681 | + /* direct map without trap & emulation */ 1682 | + ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); 1683 | + for ( i = 0; i < nr_mfns; i++ ) 1684 | + set_mmio_p2m_entry(d, gfn+i, _mfn(mfn+i)); 1685 | + 1686 | + hva = (unsigned long) mfn_to_virt(mfn); 1687 | + ret = map_pages_to_xen(hva, mfn, nr_mfns, PAGE_HYPERVISOR_NOCACHE); 1688 | + if (ret != 0) 1689 | + { 1690 | + printk("Warning: mapping domain page error\n"); 1691 | + domain_crash(current->domain); 1692 | + } 1693 | + 1694 | + return ret; 1695 | +} 1696 | + 1697 | +/* ret: 1698 | + 0 - success 1699 | + negative - failure 1700 | +*/ 1701 | +int vgt_domctl_remove_range(struct domain *d, unsigned long gfn, 1702 | + unsigned long mfn, unsigned long nr_mfns) 1703 | +{ 1704 | + int i, ret; 1705 | + /* 1706 | + remove the pfn and mfn record for MMIO trap and emulation 1707 | + */ 1708 | + printk("DPCI_REMOVE_MAPPING_VGT : domid=%d gfn=%lx mfn=%lx nr_mfns=%lx\n", 1709 | + d->domain_id, gfn, mfn, nr_mfns); 1710 | + 1711 | + for ( i = 0; i < nr_mfns; i++ ) 1712 | + clear_mmio_p2m_entry(d, gfn+i); 1713 | + ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); 1714 | + destroy_xen_mappings( (unsigned long)mfn_to_virt(mfn), 1715 | + (unsigned long)mfn_to_virt(mfn + nr_mfns - 1) ); 1716 | + return ret; 1717 | +} 1718 | diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c 1719 | index e8aa61c..5225037 100644 1720 | --- a/xen/arch/x86/hvm/vmsi.c 1721 | +++ b/xen/arch/x86/hvm/vmsi.c 1722 | @@ -34,6 +34,7 @@ 1723 | #include 1724 | #include 1725 | #include 1726 | +#include 1727 | #include 1728 | #include 1729 | #include 1730 | diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c 1731 | index 2ed25c7..eec13cf 100644 1732 | --- a/xen/arch/x86/hvm/vmx/vmcs.c 1733 | +++ b/xen/arch/x86/hvm/vmx/vmcs.c 1734 | @@ -1273,7 +1273,8 @@ void vmx_do_resume(struct vcpu *v) 1735 | * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits. 1736 | * If VT-d engine can force snooping, we don't need to do these. 1737 | */ 1738 | - if ( has_arch_pdevs(v->domain) && !iommu_snoop 1739 | + if ( ((has_arch_pdevs(v->domain) && !iommu_snoop) || 1740 | + vgt_enabled(v->domain)) 1741 | && !cpu_has_wbinvd_exiting ) 1742 | { 1743 | int cpu = v->arch.hvm_vmx.active_cpu; 1744 | diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c 1745 | index 0e5d3b4..6412e8c 100644 1746 | --- a/xen/arch/x86/hvm/vmx/vmx.c 1747 | +++ b/xen/arch/x86/hvm/vmx/vmx.c 1748 | @@ -2140,10 +2140,8 @@ static void wbinvd_ipi(void *info) 1749 | 1750 | static void vmx_wbinvd_intercept(void) 1751 | { 1752 | - if ( !has_arch_mmios(current->domain) ) 1753 | - return; 1754 | - 1755 | - if ( iommu_snoop ) 1756 | + if ( (!has_arch_mmios(current->domain) || iommu_snoop) && 1757 | + !vgt_enabled(current->domain) ) 1758 | return; 1759 | 1760 | if ( cpu_has_wbinvd_exiting ) 1761 | diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c 1762 | index 91d2c2b..3b907bd 100644 1763 | --- a/xen/arch/x86/mm.c 1764 | +++ b/xen/arch/x86/mm.c 1765 | @@ -1019,6 +1019,20 @@ get_page_from_l4e( 1766 | } while ( 0 ) 1767 | #endif 1768 | 1769 | +/* 1770 | + * No need to remove reserved bits when pte is marked non-present, 1771 | + * since a non-present to present change normally implicates a 1772 | + * completely new bit set 1773 | + */ 1774 | +#define adjust_guest_l1e_rsvd(pl1e, d, mfn) \ 1775 | + do { \ 1776 | + if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \ 1777 | + iomem_forward_permitted(d, mfn, mfn) ) \ 1778 | + { \ 1779 | + l1e_add_rsvd((pl1e), _PAGE_FORWARD); \ 1780 | + } \ 1781 | + } while ( 0 ) 1782 | + 1783 | #define adjust_guest_l2e(pl2e, d) \ 1784 | do { \ 1785 | if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \ 1786 | @@ -1211,7 +1225,14 @@ static int alloc_l1_table(struct page_info *page) 1787 | break; 1788 | } 1789 | 1790 | + if ( l1e_get_rsvd(pl1e[i]) & (_PAGE_FORWARD|_PAGE_PRESENT) ) 1791 | + { 1792 | + MEM_LOG("Captured guest reserved bit usage, which is conflicting with Xen!"); 1793 | + goto fail; 1794 | + } 1795 | + 1796 | adjust_guest_l1e(pl1e[i], d); 1797 | + adjust_guest_l1e_rsvd(pl1e[i], d, l1e_get_pfn(pl1e[i])); 1798 | } 1799 | 1800 | unmap_domain_page(pl1e); 1801 | @@ -1702,10 +1723,37 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, 1802 | return -EINVAL; 1803 | } 1804 | 1805 | + if ( l1e_has_changed_rsvd(ol1e, nl1e, _PAGE_FORWARD) ) 1806 | + { 1807 | + /* 1808 | + * Condition check whether guest itself uses reserved bit. 1809 | + * Suppose reserved bits are used only when P bit is cleared, 1810 | + * such as in swap purpose 1811 | + */ 1812 | + if ( !(l1e_get_rsvd(ol1e) & _PAGE_FORWARD) && 1813 | + (l1e_get_rsvd(nl1e) & _PAGE_FORWARD) ) 1814 | + { 1815 | + MEM_LOG("Captured guest reserved bit usage, which is conflicting with Xen!"); 1816 | + return -EINVAL; 1817 | + } 1818 | + 1819 | + /* 1820 | + * Guest may not compose the new entry based on old content, 1821 | + * and thus it's possible to have Xen populated reserved bits lost. 1822 | + * Warn such case, but it should be fine, since we anyway adjust 1823 | + * l1e to enforce the forwarding requirement. 1824 | + */ 1825 | + if ( ((l1e_get_rsvd(ol1e) & (_PAGE_FORWARD|_PAGE_PRESENT)) == 1826 | + (_PAGE_FORWARD|_PAGE_PRESENT)) && 1827 | + !(l1e_get_rsvd(nl1e) & _PAGE_FORWARD) ) 1828 | + MEM_LOG("Guest tempts to clear forwarding bit set by Xen!"); 1829 | + } 1830 | + 1831 | /* Fast path for identical mapping, r/w and presence. */ 1832 | if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) ) 1833 | { 1834 | adjust_guest_l1e(nl1e, pt_dom); 1835 | + adjust_guest_l1e_rsvd(nl1e, pt_dom, l1e_get_pfn(nl1e)); 1836 | if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, 1837 | preserve_ad) ) 1838 | { 1839 | @@ -1735,6 +1783,7 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e, 1840 | put_page(page); 1841 | 1842 | adjust_guest_l1e(nl1e, pt_dom); 1843 | + adjust_guest_l1e_rsvd(nl1e, pt_dom, l1e_get_pfn(nl1e)); 1844 | if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, 1845 | preserve_ad)) ) 1846 | { 1847 | @@ -4678,6 +4727,61 @@ static int xenmem_add_to_physmap(struct domain *d, 1848 | return xenmem_add_to_physmap_once(d, xatp); 1849 | } 1850 | 1851 | +static int get_mfn_from_pfn(XEN_GUEST_HANDLE(xen_get_mfn_from_pfn_t) arg) 1852 | +{ 1853 | + struct xen_get_mfn_from_pfn cmd_info; 1854 | + struct domain *d; 1855 | + int rc=0, i; 1856 | + xen_pfn_t *pfns = NULL; 1857 | + xen_pfn_t pfn; 1858 | + struct p2m_domain *p2m; 1859 | + p2m_type_t t; 1860 | + 1861 | + if ( !is_hardware_domain(current->domain) ) 1862 | + return -EPERM; 1863 | + 1864 | + if ( copy_from_guest(&cmd_info, arg, 1) ) 1865 | + return -EFAULT; 1866 | + 1867 | + d = rcu_lock_domain_by_any_id(cmd_info.domid); 1868 | + if ( d == NULL ) 1869 | + return -ESRCH; 1870 | + 1871 | + /* sanity check for security */ 1872 | + if (cmd_info.nr_pfns > 2048 ) 1873 | + return -ENOMEM; 1874 | + 1875 | + pfns = xmalloc_array(xen_pfn_t, cmd_info.nr_pfns); 1876 | + if (pfns == NULL) 1877 | + return -ENOMEM; 1878 | + 1879 | + if (copy_from_guest(pfns, cmd_info.pfn_list, cmd_info.nr_pfns)){ 1880 | + rc = -EFAULT; 1881 | + goto out; 1882 | + } 1883 | + 1884 | + p2m = p2m_get_hostp2m(d); 1885 | + for(i=0; i < cmd_info.nr_pfns; i++){ 1886 | + pfn = pfns[i]; 1887 | + pfns[i] = mfn_x(get_gfn_query(d, pfn, &t)); 1888 | + if(pfns[i] == INVALID_MFN){ 1889 | + rc = -EINVAL; 1890 | + goto out; 1891 | + } 1892 | + put_gfn(d, pfn); 1893 | + } 1894 | + 1895 | + if (copy_to_guest(cmd_info.pfn_list, pfns, cmd_info.nr_pfns)){ 1896 | + rc = -EFAULT; 1897 | + goto out; 1898 | + } 1899 | + 1900 | +out: 1901 | + rcu_unlock_domain(d); 1902 | + xfree(pfns); 1903 | + return rc; 1904 | +} 1905 | + 1906 | long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) 1907 | { 1908 | int rc; 1909 | @@ -4936,6 +5040,15 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE_PARAM(void) arg) 1910 | return rc; 1911 | } 1912 | 1913 | +#ifdef __x86_64__ 1914 | + case XENMEM_get_sharing_freed_pages: 1915 | + return mem_sharing_get_nr_saved_mfns(); 1916 | +#endif 1917 | + 1918 | + case XENMEM_get_mfn_from_pfn: 1919 | + rc = get_mfn_from_pfn(guest_handle_cast(arg, xen_get_mfn_from_pfn_t)); 1920 | + break; 1921 | + 1922 | default: 1923 | return subarch_memory_op(op, arg); 1924 | } 1925 | @@ -5063,7 +5176,28 @@ static int ptwr_emulated_update( 1926 | break; 1927 | } 1928 | 1929 | + if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) 1930 | + { 1931 | + ol1e = l1e_from_intpte(old); 1932 | + /* same check as the comment in mod_l1e_entry */ 1933 | + if ( l1e_has_changed_rsvd(ol1e, nl1e, _PAGE_FORWARD) ) 1934 | + { 1935 | + if ( !(l1e_get_rsvd(ol1e) & _PAGE_FORWARD) && 1936 | + (l1e_get_rsvd(nl1e) & _PAGE_FORWARD) ) 1937 | + { 1938 | + MEM_LOG("Captured guest reserved bit usage, which is conflicting with Xen!"); 1939 | + return X86EMUL_UNHANDLEABLE; 1940 | + } 1941 | + 1942 | + if ( ((l1e_get_rsvd(ol1e) & (_PAGE_FORWARD|_PAGE_PRESENT)) == 1943 | + (_PAGE_FORWARD|_PAGE_PRESENT)) && 1944 | + !(l1e_get_rsvd(nl1e) & _PAGE_FORWARD) ) 1945 | + MEM_LOG("Guest tempts to clear forwarding bit set by Xen!"); 1946 | + } 1947 | + } 1948 | + 1949 | adjust_guest_l1e(nl1e, d); 1950 | + adjust_guest_l1e_rsvd(nl1e, d, l1e_get_pfn(nl1e)); 1951 | 1952 | /* Checked successfully: do the update (write or cmpxchg). */ 1953 | pl1e = map_domain_page(mfn); 1954 | diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c 1955 | index bff05d9..5b89c0c 100644 1956 | --- a/xen/arch/x86/mm/hap/hap.c 1957 | +++ b/xen/arch/x86/mm/hap/hap.c 1958 | @@ -208,6 +208,46 @@ void hap_logdirty_init(struct domain *d) 1959 | hap_clean_dirty_bitmap); 1960 | } 1961 | 1962 | +/* 1963 | + * Write-protect guest pages. 1964 | + * A guest page may be write-protected or unwrite-protected, 1965 | + * to track the guest write operations. 1966 | + * It is initially designed for GPU PPGTT page table pages. 1967 | + * gpfn: array base containing the gpfns for write-protection. 1968 | + * num: size of the array. 1969 | + * wr: 1: write-protection, 0: unwrite-protection. 1970 | + */ 1971 | +int hap_write_protect_pages(struct domain *d, 1972 | + unsigned long *gpfn, int num, int wr) 1973 | +{ 1974 | + int i; 1975 | + p2m_type_t ot, nt; 1976 | + int ret = 0; 1977 | + 1978 | + if ( wr ) { 1979 | + ot = p2m_ram_rw; 1980 | + nt = p2m_writeprotection; 1981 | + } else { 1982 | + ot = p2m_writeprotection; 1983 | + nt = p2m_ram_rw; 1984 | + } 1985 | + 1986 | + spin_lock(&wp_htable_lock(d)); 1987 | + 1988 | + for ( i=0; i < num; i++) { 1989 | + p2m_change_type_range(d, gpfn[i], gpfn[i] + 1, ot, nt); 1990 | + if (wr) 1991 | + ret = vgt_wp_hash_add(wp_htable(d), gpfn[i]); 1992 | + else 1993 | + ret = vgt_wp_hash_rem(wp_htable(d), gpfn[i]); 1994 | + } 1995 | + 1996 | + spin_unlock(&wp_htable_lock(d)); 1997 | + 1998 | + flush_tlb_mask(d->domain_dirty_cpumask); 1999 | + return ret; 2000 | +} 2001 | + 2002 | /************************************************/ 2003 | /* HAP SUPPORT FUNCTIONS */ 2004 | /************************************************/ 2005 | diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c 2006 | index 595c6e7..993e3a8 100644 2007 | --- a/xen/arch/x86/mm/p2m-ept.c 2008 | +++ b/xen/arch/x86/mm/p2m-ept.c 2009 | @@ -69,6 +69,7 @@ static void ept_p2m_type_to_flags(ept_entry_t *entry, p2m_type_t type, p2m_acces 2010 | entry->mfn); 2011 | break; 2012 | case p2m_ram_logdirty: 2013 | + case p2m_writeprotection: 2014 | case p2m_ram_ro: 2015 | case p2m_ram_shared: 2016 | entry->r = entry->x = 1; 2017 | diff --git a/xen/arch/x86/physdev.c b/xen/arch/x86/physdev.c 2018 | index 3733c7a..b27a5e4 100644 2019 | --- a/xen/arch/x86/physdev.c 2020 | +++ b/xen/arch/x86/physdev.c 2021 | @@ -478,6 +478,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) 2022 | ret = -EFAULT; 2023 | if ( copy_from_guest(&set_iopl, arg, 1) != 0 ) 2024 | break; 2025 | + printk("VGT: PHYSDEVOP_set_iopl iopl %x\n", set_iopl.iopl); 2026 | ret = -EINVAL; 2027 | if ( set_iopl.iopl > 3 ) 2028 | break; 2029 | @@ -488,9 +489,11 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) 2030 | 2031 | case PHYSDEVOP_set_iobitmap: { 2032 | struct physdev_set_iobitmap set_iobitmap; 2033 | + 2034 | ret = -EFAULT; 2035 | if ( copy_from_guest(&set_iobitmap, arg, 1) != 0 ) 2036 | break; 2037 | + printk("VGT: PHYSDEVOP_set_iobitmap !!!\n"); 2038 | ret = -EINVAL; 2039 | if ( !guest_handle_okay(set_iobitmap.bitmap, IOBMP_BYTES) || 2040 | (set_iobitmap.nr_ports > 65536) ) 2041 | diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c 2042 | index 2162811..3e9a295 100644 2043 | --- a/xen/arch/x86/platform_hypercall.c 2044 | +++ b/xen/arch/x86/platform_hypercall.c 2045 | @@ -30,6 +30,7 @@ 2046 | #include 2047 | #include 2048 | #include 2049 | +#include 2050 | #include "cpu/mtrr/mtrr.h" 2051 | #include 2052 | 2053 | @@ -601,6 +602,14 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op) 2054 | } 2055 | break; 2056 | 2057 | + case XENPF_set_vgt_info: 2058 | + { 2059 | + ret = set_vgt_info(op->u.vgt_info.gen_dev_bdf, 2060 | + op->u.vgt_info.gen_dev_type); 2061 | + break; 2062 | + } 2063 | + break; 2064 | + 2065 | default: 2066 | ret = -ENOSYS; 2067 | break; 2068 | diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c 2069 | index adc5009..af51f9f 100644 2070 | --- a/xen/arch/x86/traps.c 2071 | +++ b/xen/arch/x86/traps.c 2072 | @@ -73,6 +73,7 @@ 2073 | #include 2074 | #include 2075 | #include 2076 | +#include 2077 | 2078 | /* 2079 | * opt_nmi: one of 'ignore', 'dom0', or 'fatal'. 2080 | @@ -424,7 +425,7 @@ void fatal_trap(int trapnr, struct cpu_user_regs *regs) 2081 | (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT"); 2082 | } 2083 | 2084 | -static void do_guest_trap( 2085 | +void do_guest_trap( 2086 | int trapnr, const struct cpu_user_regs *regs, int use_error_code) 2087 | { 2088 | struct vcpu *v = current; 2089 | @@ -1343,6 +1344,11 @@ static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs) 2090 | return ret; 2091 | } 2092 | 2093 | + /* we take some reserved bits to indicate a trapped MMIO access */ 2094 | + if ( ((regs->error_code & (PFEC_page_present | PFEC_reserved_bit)) == 2095 | + (PFEC_page_present | PFEC_reserved_bit)) && 2096 | + do_rsvd_page_fault(v, addr, regs) ) 2097 | + return EXCRET_fault_fixed; 2098 | return 0; 2099 | } 2100 | 2101 | @@ -1604,6 +1610,8 @@ static int guest_io_okay( 2102 | { 2103 | union { uint8_t bytes[2]; uint16_t mask; } x; 2104 | 2105 | + printk("VGT: =====: arch.pv_vcpu.iobmp_limit %x bitmap %lx\n", 2106 | + v->arch.pv_vcpu.iobmp_limit, (unsigned long)v->arch.pv_vcpu.iobmp.p); 2107 | /* 2108 | * Grab permission bytes from guest space. Inaccessible bytes are 2109 | * read as 0xff (no access allowed). 2110 | @@ -1745,6 +1753,67 @@ static uint32_t guest_io_read( 2111 | return data; 2112 | } 2113 | 2114 | +static bool_t vgt_cfg_space(unsigned int port, unsigned int bytes, 2115 | + bool_t wr, struct vcpu *v, struct cpu_user_regs *regs) 2116 | +{ 2117 | + unsigned int _port; 2118 | + 2119 | + if ( v->domain != dom0 ) 2120 | + return 0; 2121 | + 2122 | + _port = port & ~3; 2123 | + if ( _port == 0xCF8 && wr && 2124 | + CF8_to_BDF(regs->eax) == vgt_bdf ) { 2125 | + 2126 | + ASSERT ((port & 3) == 0 && (bytes == 4)); 2127 | + 2128 | + /* VGT CFG space access, forward to vGT driver */ 2129 | + if ( ioports_forwarding_started(v->domain) ) { 2130 | + /* Forward only after the guest is ready to process. */ 2131 | + v->domain->arch.pci_cf8 = regs->eax; 2132 | + vgt_inject_virtual_exception (regs, 1); 2133 | + return 1; 2134 | + } 2135 | + } 2136 | + else if (_port == 0xCFC && 2137 | + CF8_to_BDF(v->domain->arch.pci_cf8) == vgt_bdf) { 2138 | + if ( ioports_forwarding_started(v->domain) ) { 2139 | + /* Forward only after the guest is ready to process. */ 2140 | + vgt_inject_virtual_exception (regs, 1); 2141 | + return 1; 2142 | + } 2143 | + } 2144 | + return 0; 2145 | +} 2146 | + 2147 | +static bool_t pio_forwarding( 2148 | + unsigned int port, unsigned int bytes, int wr, 2149 | + struct vcpu *v, struct cpu_user_regs *regs) 2150 | +{ 2151 | + int perm, rc=0; 2152 | + 2153 | + if ( vgt_cfg_space(port, bytes, wr, v, regs) ) 2154 | + return 1; 2155 | + 2156 | + perm = ioports_forwarding_permitted(v->domain, port, port ) << 1; 2157 | + perm |= ioports_forwarding_permitted(v->domain, 2158 | + port + bytes - 1, port + bytes - 1); 2159 | + switch ( perm ) { 2160 | + case 0: 2161 | + break; 2162 | + case 3: /* all port are within forwarding list */ 2163 | + vgt_inject_virtual_exception (regs, 1); 2164 | + rc = 1; 2165 | + break; 2166 | + case 1: 2167 | + case 2: 2168 | + default: 2169 | + ASSERT (0); /* TODO */ 2170 | + break; 2171 | + } 2172 | + return rc; 2173 | +} 2174 | + 2175 | static void guest_io_write( 2176 | unsigned int port, unsigned int bytes, uint32_t data, 2177 | struct vcpu *v, struct cpu_user_regs *regs) 2178 | @@ -2027,6 +2096,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) 2179 | (rd_ad(edi) > (data_limit - (op_bytes - 1))) || 2180 | !guest_io_okay(port, op_bytes, v, regs) ) 2181 | goto fail; 2182 | + if ( pio_forwarding(port, op_bytes, 0, v, regs) ) 2183 | + goto skip; 2184 | data = guest_io_read(port, op_bytes, v, regs); 2185 | if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), 2186 | &data, op_bytes)) != 0 ) 2187 | @@ -2053,9 +2124,14 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) 2188 | + op_bytes - rc, 0); 2189 | return EXCRET_fault_fixed; 2190 | } 2191 | - guest_io_write(port, op_bytes, data, v, regs); 2192 | - wr_ad(esi, regs->esi + (int)((regs->eflags & X86_EFLAGS_DF) 2193 | + if ( pio_forwarding(port, op_bytes, 1, v, regs) ) 2194 | + goto skip; 2195 | + else 2196 | + { 2197 | + guest_io_write(port, op_bytes, data, v, regs); 2198 | + wr_ad(esi, regs->esi + (int)((regs->eflags & X86_EFLAGS_DF) 2199 | ? -op_bytes : op_bytes)); 2200 | + } 2201 | break; 2202 | } 2203 | 2204 | @@ -2110,6 +2186,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) 2205 | exec_in: 2206 | if ( !guest_io_okay(port, op_bytes, v, regs) ) 2207 | goto fail; 2208 | + if ( pio_forwarding(port, op_bytes, 0, v, regs ) ) 2209 | + goto skip; 2210 | if ( admin_io_okay(port, op_bytes, v, regs) ) 2211 | { 2212 | mark_regs_dirty(regs); 2213 | @@ -2140,6 +2218,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) 2214 | exec_out: 2215 | if ( !guest_io_okay(port, op_bytes, v, regs) ) 2216 | goto fail; 2217 | + if ( pio_forwarding(port, op_bytes, 1, v, regs ) ) 2218 | + goto skip; 2219 | if ( admin_io_okay(port, op_bytes, v, regs) ) 2220 | { 2221 | mark_regs_dirty(regs); 2222 | diff --git a/xen/arch/x86/vgt.c b/xen/arch/x86/vgt.c 2223 | new file mode 100644 2224 | index 0000000..688620f 2225 | --- /dev/null 2226 | +++ b/xen/arch/x86/vgt.c 2227 | @@ -0,0 +1,853 @@ 2228 | +/* 2229 | + * vgt.c: core logic to handle mediated GT passthrough 2230 | + * Copyright (c) 2011, Intel Corporation. 2231 | + * 2232 | + * This program is free software; you can redistribute it and/or modify it 2233 | + * under the terms and conditions of the GNU General Public License, 2234 | + * version 2, as published by the Free Software Foundation. 2235 | + * 2236 | + * This program is distributed in the hope it will be useful, but WITHOUT 2237 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 2238 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 2239 | + * more details. 2240 | + * 2241 | + * You should have received a copy of the GNU General Public License along with 2242 | + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 2243 | + * Place - Suite 330, Boston, MA 02111-1307 USA. 2244 | + */ 2245 | + 2246 | +#include 2247 | +#include 2248 | +#include 2249 | +#include 2250 | +#include 2251 | +#include 2252 | +#include 2253 | +#include 2254 | +#include 2255 | +#include 2256 | +#include 2257 | +#include 2258 | +#include 2259 | +#include 2260 | +#include 2261 | +#include 2262 | +#include 2263 | +#include 2264 | +#include 2265 | +#include 2266 | + 2267 | +#define VGT_LOG(_f, _a...) gdprintk(XENLOG_INFO, "vGT: " _f "\n" , ## _a) 2268 | +#define VGT_ERR(_f, _a...) gdprintk(XENLOG_ERR, "vGT: " _f "\n" , ## _a) 2269 | +//#define VGT_DEBUG 2270 | +#ifdef VGT_DEBUG 2271 | +#define VGT_DBG(_f, _a...) gdprintk(XENLOG_DEBUG, "vGT: " _f "\n" , ## _a) 2272 | +#else 2273 | +#define VGT_DBG(_f, _a...) ; 2274 | +#endif 2275 | + 2276 | +static unsigned int igd_gen = XEN_IGD_INVALID; 2277 | +unsigned long vgt_mmio_bar_start; 2278 | +char *vgt_mmio_hva; 2279 | +int gt_fifo_count; 2280 | + 2281 | +static enum { 2282 | + FORCE_WAKE_STATE_ON, 2283 | + FORCE_WAKE_STATE_OFF 2284 | +} force_wake_state = FORCE_WAKE_STATE_OFF; 2285 | + 2286 | +static DEFINE_SPINLOCK(vgt_io_lock); 2287 | + 2288 | +int (*force_wake_need)(unsigned long reg); 2289 | +void (*force_wake_get)(void); 2290 | +void (*force_wake_put)(void); 2291 | + 2292 | +#define VGT_REG_READ(offset) *((const volatile u32 *)(vgt_mmio_hva + offset)) 2293 | +#define VGT_REG_WRITE(offset, val) *((volatile u32 *)(vgt_mmio_hva + offset)) = (val) 2294 | + 2295 | +#define FORCEWAKE 0xA18C 2296 | +#define FORCEWAKE_ACK_HSW 0x130044 2297 | +#define FORCEWAKE_ACK 0x130090 2298 | +#define FORCEWAKE_MT 0xa188 /* multi-threaded */ 2299 | +#define FORCEWAKE_MT_ACK 0x130040 2300 | +#define ECOBUS 0xa180 2301 | +#define FORCEWAKE_MT_ENABLE (1<<5) 2302 | + 2303 | +#define GTFIFODBG 0x120000 2304 | +#define GT_FIFO_CPU_ERROR_MASK 7 2305 | +#define GT_FIFO_OVFERR (1<<2) 2306 | +#define GT_FIFO_IAWRERR (1<<1) 2307 | +#define GT_FIFO_IARDERR (1<<0) 2308 | + 2309 | +#define GT_FIFO_FREE_ENTRIES 0x120008 2310 | +#define GT_FIFO_NUM_RESERVED_ENTRIES 20 2311 | + 2312 | +#define GEN6_GT_THREAD_STATUS_REG 0x13805c 2313 | +#define GEN6_GT_THREAD_STATUS_CORE_MASK 0x7 2314 | +#define GEN6_GT_THREAD_STATUS_CORE_MASK_HSW (0x7 | (0x07 << 16)) 2315 | + 2316 | +#define _MASKED_BIT_ENABLE(a) (((a) << 16) | (a)) 2317 | +#define _MASKED_BIT_DISABLE(a) ((a) << 16) 2318 | + 2319 | +#define wait_for_atomic_us(COND, US) ({ \ 2320 | + int i, ret__ = -1; \ 2321 | + for (i = 0; i < (US); i++) { \ 2322 | + if ((COND)) { \ 2323 | + ret__ = 0; \ 2324 | + break; \ 2325 | + } \ 2326 | + udelay(1); \ 2327 | + } \ 2328 | + ret__; \ 2329 | + }) 2330 | + 2331 | +static int gen6_force_wake_need(unsigned long reg) 2332 | +{ 2333 | + reg -= vgt_mmio_bar_start; 2334 | + return reg < 0x40000 && reg != FORCEWAKE; 2335 | +} 2336 | + 2337 | +static const u32 gen8_shadowed_regs[] = { 2338 | + 0xa188, 2339 | + 0xa008, 2340 | + 0xa00c, 2341 | + 0x2030, 2342 | + 0x12030, 2343 | + 0x1a030, 2344 | + 0x22030, 2345 | +}; 2346 | + 2347 | +static int is_gen8_shadowed(u32 reg) 2348 | +{ 2349 | + int i; 2350 | + for (i = 0; i < ARRAY_SIZE(gen8_shadowed_regs); i++) 2351 | + if (reg == gen8_shadowed_regs[i]) 2352 | + return 1; 2353 | + 2354 | + return 0; 2355 | +} 2356 | + 2357 | +static int gen8_force_wake_need(unsigned long reg) 2358 | +{ 2359 | + reg -= vgt_mmio_bar_start; 2360 | + return reg < 0x40000 && !is_gen8_shadowed(reg); 2361 | +} 2362 | + 2363 | +static void gen6_wait_for_thread_c0(void) 2364 | +{ 2365 | + u32 gt_thread_status_mask; 2366 | + 2367 | + WARN_ON(igd_gen == XEN_IGD_INVALID); 2368 | + 2369 | + if (igd_gen == XEN_IGD_HSW) 2370 | + gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW; 2371 | + else 2372 | + gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK; 2373 | + 2374 | + /* w/a for a sporadic read returning 0 2375 | + * by waiting for the GT thread to wake up. 2376 | + */ 2377 | + if (wait_for_atomic_us((VGT_REG_READ(GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500)) 2378 | + VGT_ERR("GT thread status wait timed out\n"); 2379 | +} 2380 | + 2381 | +void gen6_gt_check_fifodbg(void) 2382 | +{ 2383 | + u32 gtfifodbg; 2384 | + gtfifodbg = VGT_REG_READ(GTFIFODBG); 2385 | + if (gtfifodbg & GT_FIFO_CPU_ERROR_MASK) { 2386 | + VGT_ERR("MMIO read or write has been dropped %x\n", gtfifodbg); 2387 | + VGT_REG_WRITE(GTFIFODBG, GT_FIFO_CPU_ERROR_MASK); 2388 | + } 2389 | +} 2390 | + 2391 | + 2392 | +void gen6_force_wake_get (void) 2393 | +{ 2394 | + u32 forcewake_ack; 2395 | + 2396 | + WARN_ON(igd_gen == XEN_IGD_INVALID); 2397 | + 2398 | + if (igd_gen == XEN_IGD_HSW) 2399 | + forcewake_ack = FORCEWAKE_ACK_HSW; 2400 | + else 2401 | + forcewake_ack = FORCEWAKE_ACK; 2402 | + 2403 | + if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1) == 0, 500)) 2404 | + VGT_ERR("Force wake wait timed out\n"); 2405 | + 2406 | + VGT_REG_WRITE(FORCEWAKE, 1); 2407 | + VGT_REG_READ(ECOBUS); 2408 | + 2409 | + if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1), 500)) 2410 | + VGT_ERR("Force wake wait timed out\n"); 2411 | + 2412 | + gen6_wait_for_thread_c0(); 2413 | +} 2414 | + 2415 | +void gen6_force_wake_put (void) 2416 | +{ 2417 | + VGT_REG_WRITE(FORCEWAKE, 0); 2418 | + VGT_REG_READ(ECOBUS); 2419 | + gen6_gt_check_fifodbg(); 2420 | +} 2421 | + 2422 | +void gen7_force_wake_mt_get (void) 2423 | +{ 2424 | + u32 forcewake_ack; 2425 | + 2426 | + WARN_ON(igd_gen == XEN_IGD_INVALID); 2427 | + 2428 | + if (igd_gen == XEN_IGD_HSW || igd_gen == XEN_IGD_BDW) 2429 | + forcewake_ack = FORCEWAKE_ACK_HSW; 2430 | + else 2431 | + forcewake_ack = FORCEWAKE_MT_ACK; 2432 | + 2433 | + if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1) == 0, 500)) 2434 | + VGT_ERR("Force wake wait timed out\n"); 2435 | + 2436 | + VGT_REG_WRITE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(1)); 2437 | + VGT_REG_READ(ECOBUS); 2438 | + 2439 | + if (wait_for_atomic_us((VGT_REG_READ(forcewake_ack) & 1), 500)) 2440 | + VGT_ERR("Force wake wait timed out\n"); 2441 | + 2442 | + if (igd_gen != XEN_IGD_BDW) 2443 | + gen6_wait_for_thread_c0(); 2444 | +} 2445 | + 2446 | +void gen7_force_wake_mt_put (void) 2447 | +{ 2448 | + VGT_REG_WRITE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(1)); 2449 | + VGT_REG_READ(ECOBUS); 2450 | + gen6_gt_check_fifodbg(); 2451 | +} 2452 | + 2453 | +int gen6_gt_wait_for_fifo(void) 2454 | +{ 2455 | + int ret = 0; 2456 | + 2457 | + if (gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) { 2458 | + int loop = 500; 2459 | + u32 fifo = VGT_REG_READ(GT_FIFO_FREE_ENTRIES); 2460 | + while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) { 2461 | + udelay(10); 2462 | + fifo = VGT_REG_READ(GT_FIFO_FREE_ENTRIES); 2463 | + } 2464 | + if (loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES) 2465 | + ++ret; 2466 | + gt_fifo_count = fifo; 2467 | + } 2468 | + gt_fifo_count--; 2469 | + 2470 | + return ret; 2471 | +} 2472 | + 2473 | +int vgt_set_mmio_trap(struct domain *d, unsigned long mfn_start, unsigned long mfn_end) 2474 | +{ 2475 | + unsigned long hva; 2476 | + int rc; 2477 | + 2478 | + hva = (unsigned long) mfn_to_virt ( mfn_start ); 2479 | + rc = map_pages_to_xen ( hva, mfn_start, mfn_end - mfn_start + 1, PAGE_HYPERVISOR_NOCACHE ); 2480 | + VGT_LOG ( "Setup MMIO range [0x%lx - 0x%lx](hva 0x%lx) for trap. return %d\n", 2481 | + mfn_start << PAGE_SHIFT, mfn_end << PAGE_SHIFT, hva, rc ); 2482 | + 2483 | + if (rc) 2484 | + printk("vgt_set_mmio_trap failed %d\n", rc); 2485 | + else { 2486 | + vgt_mmio_bar_start = mfn_start << PAGE_SHIFT; 2487 | + vgt_mmio_hva = (char *)hva; 2488 | + VGT_LOG("vgt_mmio_bar_start: 0x%lx\n", vgt_mmio_bar_start); 2489 | + VGT_LOG("vgt_mmio_hva: 0x%lx\n", (unsigned long)vgt_mmio_hva); 2490 | + } 2491 | + return rc; 2492 | +} 2493 | + 2494 | +unsigned int vgt_bdf = -1; /* -1 means an invalid value */ 2495 | + 2496 | +int set_vgt_info(unsigned int gen_dev_bdf, unsigned int gen_dev_type) 2497 | +{ 2498 | + /* We only support GEN dev that's on bus 0 */ 2499 | + if ( PCI_BUS(gen_dev_bdf) != 0 ) 2500 | + return -EINVAL; 2501 | + 2502 | + if ( gen_dev_type > XEN_IGD_MAX ) 2503 | + return -EINVAL; 2504 | + 2505 | + vgt_bdf = gen_dev_bdf; 2506 | + igd_gen = gen_dev_type; 2507 | + VGT_LOG("vgt_bdf=0x%x, gen=%d\n", vgt_bdf, igd_gen); 2508 | + 2509 | + WARN_ON(igd_gen == XEN_IGD_INVALID); 2510 | + 2511 | + if (igd_gen == XEN_IGD_BDW) { 2512 | + VGT_LOG("Use MT force wake\n"); 2513 | + force_wake_need = gen8_force_wake_need; 2514 | + force_wake_get = gen7_force_wake_mt_get; 2515 | + force_wake_put = gen7_force_wake_mt_put; 2516 | + } else { 2517 | + VGT_LOG("Set default force wake\n"); 2518 | + force_wake_need = gen6_force_wake_need; 2519 | + force_wake_get = gen6_force_wake_get; 2520 | + force_wake_put = gen6_force_wake_put; 2521 | + 2522 | + if ( igd_gen == XEN_IGD_IVB || igd_gen == XEN_IGD_HSW ) { 2523 | + int ecobus; 2524 | + 2525 | + gen7_force_wake_mt_get(); 2526 | + ecobus = VGT_REG_READ(ECOBUS); 2527 | + gen7_force_wake_mt_put(); 2528 | + 2529 | + if (ecobus & FORCEWAKE_MT_ENABLE) 2530 | + { 2531 | + VGT_LOG("Use MT force wake\n"); 2532 | + force_wake_get = gen7_force_wake_mt_get; 2533 | + force_wake_put = gen7_force_wake_mt_put; 2534 | + } 2535 | + } 2536 | + } 2537 | + 2538 | + return 0; 2539 | +} 2540 | + 2541 | +static inline void store_gdt(struct desc_ptr *dtr) 2542 | +{ 2543 | + asm volatile("sgdt %0":"=m" (*dtr)); 2544 | +} 2545 | + 2546 | +static int hypercall_read_sysdata(struct vcpu *v, struct vcpu_sysdata_request *req) 2547 | +{ 2548 | + int bytes = req->ops.rd.bytes; 2549 | + 2550 | +#if 0 2551 | + printk("hypercall_read_sysdata: src %lx bytes %x\n", 2552 | + req->ops.rd.src_addr, req->ops.rd.bytes); 2553 | +#endif 2554 | + if (bytes > 8) 2555 | + bytes = 8; 2556 | + 2557 | + req->ops.rd.sys_data = 0; 2558 | + memcpy (&req->ops.rd.sys_data, (void*)req->ops.rd.src_addr, bytes); 2559 | + return 1; 2560 | +} 2561 | + 2562 | +static int hypercall_get_segment(struct vcpu *v, struct vcpu_sysdata_request *req) 2563 | +{ 2564 | + struct desc_ptr gdtr; 2565 | + struct desc_struct *desc_tbl, *pdesc; 2566 | + uint16_t sel = req->ops.seg.selector; 2567 | + 2568 | + req->ops.seg.xdt_desc[0] = 0; 2569 | + req->ops.seg.xdt_desc[1] = 0; 2570 | + if (sel & 4) { 2571 | + printk("hypercall_get_selector %x in LDT, " 2572 | + "not supported yet\n", sel); 2573 | + return 0; 2574 | + } 2575 | + store_gdt(&gdtr); 2576 | + desc_tbl = (struct desc_struct *) gdtr.base; 2577 | + if ( sel >= gdtr.limit ) { 2578 | + printk("hypercall_get_selector selector %x exceeds size" 2579 | + "gdtr base %lx size %x\n", sel, gdtr.base, gdtr.limit); 2580 | + return 0; 2581 | + } 2582 | + pdesc = desc_tbl + (sel>>3); 2583 | + req->ops.seg.xdt_desc[0] = *(uint64_t *)pdesc; 2584 | + if (!(pdesc->b & (1<<12))) /* S bit = 0: system descriptor */ 2585 | + { 2586 | + pdesc++; 2587 | + req->ops.seg.xdt_desc[1] = *(uint64_t *)pdesc; 2588 | + } 2589 | + return 1; 2590 | +} 2591 | + 2592 | +int hypercall_get_sysdata(struct vcpu *v, struct vcpu_sysdata_request *req) 2593 | +{ 2594 | + if (req->op_type == VCPUOP_sysdata_get_segment) 2595 | + return hypercall_get_segment (v, req); 2596 | + else if ( req->op_type == VCPUOP_sysdata_read ) 2597 | + { 2598 | + return hypercall_read_sysdata (v, req); 2599 | + } 2600 | + else { 2601 | + printk("Wrong hypercall_get_sysdata op_type %d\n", (int)req->op_type); 2602 | + return 0; 2603 | + } 2604 | +} 2605 | + 2606 | +/* 2607 | + * The service to finish I/O access on behave of the requester, in 2608 | + * case that direct access to said I/O resources is trapped. 2609 | + * 2610 | + * Do we need a lock here? Perhaps not, since it's guest driver's 2611 | + * responsibilty to avoid race contention on same MMIO... 2612 | + * 2613 | + * FIXME: though PIO logic is included here, we only trap MMIO for 2614 | + * current stage. Need to add back GP handler hook to support PIO 2615 | + * trap in the future if required. 2616 | + */ 2617 | +int hypercall_io_emulation(struct vcpu *v, struct vcpu_emul_ioreq *req) 2618 | +{ 2619 | + unsigned long data; 2620 | + int rc = 1; 2621 | + 2622 | + VGT_DBG("I/O request: %s, %lx\n", 2623 | + req->type == PV_IOREQ_TYPE_PIO ? "pio" : "mmio", req->addr); 2624 | + 2625 | + if ( req->type == PV_IOREQ_TYPE_PIO ) 2626 | + { 2627 | + ASSERT( req->size <= 4 ); 2628 | + if ( req->dir == PV_IOREQ_READ ) 2629 | + { 2630 | + if ( (req->addr & ~3) == 0xcfc ) { 2631 | + ASSERT (v->domain == dom0 ); 2632 | + ASSERT (CF8_to_BDF(v->domain->arch.pci_cf8) == vgt_bdf); 2633 | + data = pci_conf_read (v->domain->arch.pci_cf8, 2634 | + req->addr & 3, req->size); 2635 | + memcpy (&req->data, &data, req->size); 2636 | + return 1; 2637 | + } 2638 | + else if ( (req->addr & ~3) == 0xcf8 ) { 2639 | + printk("VGT: hypercall_io_emulation: read %x, " 2640 | + "unexpected to be here!!!\n", (unsigned int)req->addr); 2641 | + } 2642 | + switch (req->size) 2643 | + { 2644 | + case 1: req->data = inb ( req->addr ); break; 2645 | + case 2: req->data = inw ( req->addr ); break; 2646 | + case 4: req->data = inl ( req->addr ); break; 2647 | + default: ASSERT (0); break; 2648 | + } 2649 | + } // PV_IOREQ_READ 2650 | + else 2651 | + { 2652 | + if ( (req->addr & ~3) == 0xcfc ) { 2653 | + ASSERT (v->domain == dom0 ); 2654 | + ASSERT (CF8_to_BDF(v->domain->arch.pci_cf8) == vgt_bdf); 2655 | + pci_conf_write (v->domain->arch.pci_cf8, 2656 | + (uint8_t) req->addr & 3, 2657 | + (uint8_t)req->size, (uint32_t)req->data); 2658 | + return 1; 2659 | + } 2660 | + else if ( (req->addr & ~3) == 0xcf8 ) { 2661 | + printk("VGT: hypercall_io_emulation: write %x, " 2662 | + "unexpected to be here!!!\n", (unsigned int)req->addr); 2663 | + } 2664 | + switch ( req->size ) 2665 | + { 2666 | + case 1: 2667 | + outb ( (uint8_t)req->data, req->addr ); 2668 | + break; 2669 | + case 2: 2670 | + outw ( (uint16_t)req->data, req->addr ); 2671 | + break; 2672 | + case 4: 2673 | + outl ( req->data, req->addr ); 2674 | + break; 2675 | + default: 2676 | + ASSERT ( 0 ); 2677 | + break; 2678 | + } 2679 | + } // PV_IOREQ_WRITE 2680 | + } // PIO 2681 | + else if (req->type == PV_IOREQ_TYPE_COPY) /* MMIO */ 2682 | + { 2683 | + void *hva = maddr_to_virt ( req->addr ); 2684 | + int force_wake = 0; 2685 | + unsigned long flags; 2686 | + 2687 | + spin_lock_irqsave(&vgt_io_lock, flags); 2688 | + 2689 | + if ( req->dir == PV_IOREQ_READ ) 2690 | + { 2691 | + if (force_wake_need(req->addr) && (force_wake_state == FORCE_WAKE_STATE_OFF)) { 2692 | + force_wake = 1; 2693 | + force_wake_get(); 2694 | + } 2695 | + 2696 | + req->data = 0; 2697 | + switch ( req->size ) 2698 | + { 2699 | + case 1: 2700 | + req->data = *(uint8_t *)hva; 2701 | + break; 2702 | + case 2: 2703 | + req->data = *(uint16_t *)hva; 2704 | + break; 2705 | + case 4: 2706 | + req->data = *(uint32_t *)hva; 2707 | + break; 2708 | + case 8: 2709 | + req->data = *(uint64_t *)hva; 2710 | + break; 2711 | + default: 2712 | + ASSERT ( 0 ); 2713 | + break; 2714 | + } 2715 | + 2716 | + if (force_wake) { 2717 | + force_wake_put(); 2718 | + } 2719 | + } // read 2720 | + else 2721 | + { 2722 | + int fifo_ret = 0; 2723 | + if (igd_gen == XEN_IGD_BDW) { 2724 | + if (force_wake_need(req->addr) 2725 | + && force_wake_state == FORCE_WAKE_STATE_OFF) { 2726 | + force_wake = 1; 2727 | + force_wake_get(); 2728 | + } 2729 | + } else if (force_wake_need(req->addr)) { 2730 | + fifo_ret = gen6_gt_wait_for_fifo(); 2731 | + } 2732 | + 2733 | + switch ( req->size ) 2734 | + { 2735 | + case 1: 2736 | + *(uint8_t *)hva = req->data; 2737 | + break; 2738 | + case 2: 2739 | + *(uint16_t *)hva = req->data; 2740 | + break; 2741 | + case 4: *(uint32_t *)hva = req->data; 2742 | + break; 2743 | + case 8: *(uint64_t *)hva = req->data; 2744 | + break; 2745 | + default: 2746 | + ASSERT ( 0 ); 2747 | + break; 2748 | + } 2749 | + if (force_wake) { 2750 | + force_wake_put(); 2751 | + } else if (fifo_ret) { 2752 | + gen6_gt_check_fifodbg(); 2753 | + } 2754 | + } // write 2755 | + 2756 | + spin_unlock_irqrestore(&vgt_io_lock, flags); 2757 | + 2758 | + } // mmio 2759 | + else if (req->type == PV_IOREQ_TYPE_CTRL) 2760 | + { 2761 | + unsigned long flags; 2762 | + 2763 | + spin_lock_irqsave(&vgt_io_lock, flags); 2764 | + 2765 | + switch(req->addr) 2766 | + { 2767 | + case VGT_CTRL_FORCEWAKE_PUT: 2768 | + if (force_wake_state == FORCE_WAKE_STATE_ON){ 2769 | + force_wake_put(); 2770 | + force_wake_state = FORCE_WAKE_STATE_OFF; 2771 | + } 2772 | + break; 2773 | + case VGT_CTRL_FORCEWAKE_GET: 2774 | + if (force_wake_state == FORCE_WAKE_STATE_OFF){ 2775 | + force_wake_get(); 2776 | + force_wake_state = FORCE_WAKE_STATE_ON; 2777 | + } 2778 | + break; 2779 | + default: 2780 | + rc = 0; 2781 | + ASSERT(0); 2782 | + break; 2783 | + } 2784 | + 2785 | + spin_unlock_irqrestore(&vgt_io_lock, flags); 2786 | + } 2787 | + else { 2788 | + printk("%s: invalid type\n", __func__); 2789 | + rc = 0; 2790 | + } 2791 | + 2792 | + return rc; 2793 | +} 2794 | + 2795 | +/* Without this, after Dom0 S3, MMIO read to IGD can always return zero */ 2796 | +void vgt_resume(void) 2797 | +{ 2798 | + if (force_wake_state == FORCE_WAKE_STATE_ON) 2799 | + force_wake_get(); 2800 | +} 2801 | + 2802 | +/* 2803 | + * Take use of hypervisor selector in error code to indicate a 2804 | + * vGT related event. 2805 | + * 2806 | + * P.S. will this cause any backward compatibility issue when 2807 | + * __HYPERVISOR_CS is changed someday? 2808 | + */ 2809 | +void vgt_inject_virtual_exception(struct cpu_user_regs *regs, int pio) 2810 | +{ 2811 | + if (pio) 2812 | + regs->error_code = __HYPERVISOR_CS + 4; 2813 | + else 2814 | + regs->error_code = __HYPERVISOR_CS; 2815 | + do_guest_trap(TRAP_gp_fault, regs, 1); 2816 | +} 2817 | + 2818 | +int do_rsvd_page_fault(struct vcpu *v, unsigned long addr, 2819 | + struct cpu_user_regs *regs) 2820 | +{ 2821 | + l1_pgentry_t pte; 2822 | + int flags; 2823 | + 2824 | + ASSERT ( v->domain == dom0 ); 2825 | + /* Only handle the fault triggered by delibrate reserved bit */ 2826 | + ASSERT ( (regs->error_code & (PFEC_page_present | PFEC_reserved_bit)) == 2827 | + (PFEC_page_present | PFEC_reserved_bit) ); 2828 | + 2829 | + /* Attempt to read the PTE that maps the VA being accessed. */ 2830 | + guest_get_eff_l1e ( v, addr, &pte ); 2831 | + 2832 | +#if 1 2833 | +{ 2834 | + static long cnt = 0; 2835 | + if (cnt++ < 1000) 2836 | + VGT_DBG("vGT: captured %ldth rsvd fault (%lx, %lx)\n", cnt, addr, (l1e_get_pfn(pte) << PAGE_SHIFT) | (addr & ~PAGE_MASK)); 2837 | +} 2838 | +#endif 2839 | + 2840 | + /* We're only looking for page fault purely caused by Xen */ 2841 | + flags = l1e_get_flags(pte); 2842 | + if ( (regs->error_code & PFEC_insn_fetch) || 2843 | + ((regs->error_code & PFEC_write_access) && !(flags & _PAGE_RW)) || 2844 | + ((regs->error_code & PFEC_user_mode) && !(flags & _PAGE_USER)) ) 2845 | + { 2846 | + VGT_LOG("vGT: return guest fault instead on %lx (%x, %x)\n", addr, regs->error_code, flags); 2847 | + regs->error_code &= ~PFEC_reserved_bit; 2848 | + goto bail; 2849 | + } 2850 | + 2851 | + vgt_inject_virtual_exception(regs, 0); 2852 | + return EXCRET_fault_fixed; 2853 | + 2854 | + bail: 2855 | + return 0; 2856 | +} 2857 | + 2858 | +/* FIXME: change to static */ 2859 | +int vgt_hvm_intercept_io(ioreq_t *p) 2860 | +{ 2861 | + p->is_vgt = 1; 2862 | + 2863 | + /* return X86EMUL_UNHANDLEABLE to forward to dom0 vGT driver */ 2864 | + return X86EMUL_UNHANDLEABLE; 2865 | +} 2866 | + 2867 | +static int vgt_intercept_cf8_cfc( 2868 | + int dir, uint32_t port, uint32_t bytes, uint32_t *val) 2869 | +{ 2870 | + struct domain *d = current->domain; 2871 | + ioreq_t *p = get_ioreq(current); 2872 | + 2873 | + if ( (port & ~3) == 0xcf8 ) 2874 | + { 2875 | + if (bytes != 4){ 2876 | + printk("VGT_WARNING: vgt_intercept_cf8_cfc bytes=%d not 4 bytes\n", bytes); 2877 | + return X86EMUL_OKAY; 2878 | + } 2879 | + if ((port & 3) != 0){ 2880 | + printk("VGT_WARNING: vgt_intercept_cf8_cfc port=0x%x not aligned\n", bytes); 2881 | + return X86EMUL_OKAY; 2882 | + } 2883 | + if (dir == IOREQ_WRITE) { 2884 | + /* Write */ 2885 | + d->arch.pci_cf8 = *val; 2886 | + if ( CF8_to_BDF(d->arch.pci_cf8) == vgt_bdf ) 2887 | + return vgt_hvm_intercept_io(p); 2888 | + else 2889 | + return X86EMUL_UNHANDLEABLE; 2890 | + } 2891 | + else { 2892 | + /* Read */ 2893 | + *val = d->arch.pci_cf8; 2894 | + return X86EMUL_OKAY; 2895 | + } 2896 | + } 2897 | + else { 2898 | + /* CFC access */ 2899 | + if ( CF8_to_BDF(d->arch.pci_cf8) == vgt_bdf ) 2900 | + { 2901 | + /* To vGT device */ 2902 | + return vgt_hvm_intercept_io(p); 2903 | + } 2904 | + /* To traditional Device Model */ 2905 | + return X86EMUL_UNHANDLEABLE; 2906 | + } 2907 | +} 2908 | + 2909 | +int vgt_hvm_init(struct domain *d) 2910 | +{ 2911 | + /* register vGT PIO & MMIO handler */ 2912 | + 2913 | + ASSERT( is_hvm_domain(d) ); 2914 | + 2915 | + if ( vgt_enabled(d) ) 2916 | + return 0; 2917 | + 2918 | + printk("Enable vGT for domain %d\n", d->domain_id); 2919 | + 2920 | + wp_htable(d) = xmalloc_bytes(WP_HASH_ENTRY_SIZE * WP_HASH_SIZE); 2921 | + if ( wp_htable(d) == NULL ) 2922 | + return -ENOMEM; 2923 | + 2924 | + d->arch.hvm_domain.vgt_enabled = 1; 2925 | + 2926 | + register_portio_handler(d, 0xcf8, 8, vgt_intercept_cf8_cfc); 2927 | + 2928 | + memset (wp_htable(d), 0, WP_HASH_ENTRY_SIZE * WP_HASH_SIZE); 2929 | + 2930 | + /* FIXME: get the PIO & MMIO base&size */ 2931 | +// register_portio_handler(d, vgt_pio_base, vgt_pio_size, vgt_intercept_io); 2932 | + 2933 | + return 0; 2934 | +} 2935 | + 2936 | + 2937 | +void vgt_hvm_deinit(struct domain *d) 2938 | +{ 2939 | + if (!vgt_enabled(d)) 2940 | + return; 2941 | + 2942 | + rangeset_destroy(d->iomem_forward_caps); 2943 | + free_vgt_wp_hash( wp_htable(d) ); 2944 | + xfree ( wp_htable(d) ); 2945 | +} 2946 | + 2947 | +int vgt_io_trap(struct domain *d, struct xen_domctl_vgt_io_trap *info) 2948 | +{ 2949 | + struct rangeset *rs; 2950 | + 2951 | + void *range; 2952 | + 2953 | + int r; 2954 | + int i; 2955 | + 2956 | + if (!d) 2957 | + d = dom0; 2958 | + 2959 | + /* Process PIO trap range. */ 2960 | + rs = d->arch.ioport_forwarding_caps; 2961 | + 2962 | + if (info->n_pio) { 2963 | + /* Add range. */ 2964 | + for (i = 0; i < info->n_pio; i++) { 2965 | + r = rangeset_add_range(rs, info->pio[i].s, info->pio[i].e); 2966 | + if (r) { 2967 | + printk("VGT(%d): fail to add PIO range [0x%"PRIx64" - 0x%"PRIx64"].\n", 2968 | + d->domain_id, info->pio[i].s, info->pio[i].e); 2969 | + goto err; 2970 | + } 2971 | + } 2972 | + 2973 | + printk("DOM %d IO rangeset:\n", d->domain_id); 2974 | + 2975 | + rangeset_printk(rs); 2976 | + 2977 | + printk("\n"); 2978 | + } else { 2979 | + /* Query rangeset. */ 2980 | + range = first_range_ex(rs); 2981 | + 2982 | + for (i = 0; range && i < MAX_VGT_IO_TRAP_INFO; i++) 2983 | + range = get_range(rs, range, &info->pio[i].s, &info->pio[i].e); 2984 | + 2985 | + info->n_pio = i; 2986 | + } 2987 | + 2988 | + /* Process MMIO trap range. */ 2989 | + rs = d->iomem_forward_caps; 2990 | + 2991 | + if (info->n_mmio) { 2992 | + /* Add MMIO range. */ 2993 | + 2994 | + /* XXX: remove the old trap info. This is necessary since vGT 2995 | + * doesn't emulate the 4M MMIO BAR of GEN dev correctly now: 2996 | + * vGT should be improved to use untrap-then-trap logic when 2997 | + * the BAR is updated. 2998 | + */ 2999 | + rangeset_destroy(d->iomem_forward_caps); 3000 | + d->iomem_forward_caps = rangeset_new(d, "I/O Memory Forwarding", 3001 | + RANGESETF_prettyprint_hex); 3002 | + 3003 | + if (!d->iomem_forward_caps) { 3004 | + printk("VGT(%d): fail to allocate rangeset for io trap.\n", 3005 | + d->domain_id); 3006 | + return -ENOMEM; 3007 | + } 3008 | + 3009 | + rs = d->iomem_forward_caps; 3010 | + 3011 | + for (i = 0; i < info->n_mmio; i++) { 3012 | + printk("VGT(%d): Add MMIO range [0x%"PRIx64" - 0x%"PRIx64"].\n", 3013 | + d->domain_id, info->mmio[i].s, info->mmio[i].e); 3014 | + 3015 | + if (d == dom0) { 3016 | + if (info->mmio[i].s & ~PAGE_MASK 3017 | + || info->mmio[i].e & ~PAGE_MASK) { 3018 | + printk("VGT(%d): MMIO range is not page-aligned.\n", 3019 | + d->domain_id); 3020 | + 3021 | + r = -EINVAL; 3022 | + goto err; 3023 | + } 3024 | + 3025 | + /* DOM0 use MFN. */ 3026 | + info->mmio[i].s >>= PAGE_SHIFT; 3027 | + info->mmio[i].e >>= PAGE_SHIFT; 3028 | + } 3029 | + 3030 | + r = rangeset_add_range(rs, info->mmio[i].s, info->mmio[i].e); 3031 | + if (r) { 3032 | + printk("VGT(%d): fail to add MMIO range", 3033 | + d->domain_id); 3034 | + 3035 | + goto err; 3036 | + } 3037 | + 3038 | + if (d == dom0) { 3039 | + /* 3040 | + * Map MMIO range into XEN, 3041 | + * becuase we will access some GFX registers in XEN. 3042 | + */ 3043 | + r = vgt_set_mmio_trap(d, info->mmio[i].s, info->mmio[i].e); 3044 | + if (r) { 3045 | + printk("VGT(%d): fail to map MMIO range.\n", 3046 | + d->domain_id); 3047 | + 3048 | + goto err; 3049 | + } 3050 | + } 3051 | + } 3052 | + 3053 | + printk("DOM %d MMIO rangeset:\n", d->domain_id); 3054 | + 3055 | + rangeset_printk(rs); 3056 | + 3057 | + printk("\n"); 3058 | + 3059 | + } else { 3060 | + /* Query MMIO range. */ 3061 | + range = first_range_ex(rs); 3062 | + 3063 | + for (i = 0; range && i < MAX_VGT_IO_TRAP_INFO; i++) { 3064 | + range = get_range(rs, range, &info->mmio[i].s, &info->mmio[i].e); 3065 | + 3066 | + if (d == dom0) { 3067 | + info->mmio[i].s <<= PAGE_SHIFT; 3068 | + info->mmio[i].e <<= PAGE_SHIFT; 3069 | + } 3070 | + } 3071 | + 3072 | + info->n_mmio = i; 3073 | + } 3074 | + 3075 | + return 0; 3076 | + 3077 | +err: 3078 | + info->n_pio = info->n_mmio = 0; 3079 | + return r; 3080 | +} 3081 | diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c 3082 | index e390c2a..932a104 100644 3083 | --- a/xen/arch/x86/x86_emulate/x86_emulate.c 3084 | +++ b/xen/arch/x86/x86_emulate/x86_emulate.c 3085 | @@ -4196,7 +4196,9 @@ x86_emulate( 3086 | if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) ) 3087 | rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, 3088 | ea.bytes, ctxt); 3089 | - goto done; 3090 | + if (rc) 3091 | + goto done; 3092 | + break; 3093 | } 3094 | 3095 | case 0x80 ... 0x8f: /* jcc (near) */ { 3096 | diff --git a/xen/common/domain.c b/xen/common/domain.c 3097 | index fac3470..317386f 100644 3098 | --- a/xen/common/domain.c 3099 | +++ b/xen/common/domain.c 3100 | @@ -248,8 +248,11 @@ struct domain *domain_create( 3101 | init_status |= INIT_rangeset; 3102 | 3103 | d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex); 3104 | + d->iomem_forward_caps = rangeset_new(d, "I/O Memory Forwarding", 3105 | + RANGESETF_prettyprint_hex); 3106 | d->irq_caps = rangeset_new(d, "Interrupts", 0); 3107 | - if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) ) 3108 | + if ( (d->iomem_caps == NULL) || (d->iomem_forward_caps == NULL) 3109 | + || (d->irq_caps == NULL) ) 3110 | goto fail; 3111 | 3112 | if ( domcr_flags & DOMCRF_dummy ) 3113 | diff --git a/xen/common/domctl.c b/xen/common/domctl.c 3114 | index 9bd8f80..e056c1b 100644 3115 | --- a/xen/common/domctl.c 3116 | +++ b/xen/common/domctl.c 3117 | @@ -298,6 +298,11 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) 3118 | case XEN_DOMCTL_test_assign_device: 3119 | d = NULL; 3120 | break; 3121 | + case XEN_DOMCTL_vgt_io_trap: 3122 | + if (!op->domain) { 3123 | + d = NULL; 3124 | + break; 3125 | + } 3126 | default: 3127 | d = rcu_lock_domain_by_id(op->domain); 3128 | if ( d == NULL ) 3129 | diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c 3130 | index 64c976b..fdfafd5 100644 3131 | --- a/xen/common/event_channel.c 3132 | +++ b/xen/common/event_channel.c 3133 | @@ -1140,20 +1140,14 @@ void free_xen_event_channel( 3134 | (void)__evtchn_close(d, port); 3135 | } 3136 | 3137 | - 3138 | -void notify_via_xen_event_channel(struct domain *ld, int lport) 3139 | +void __notify_via_xen_event_channel(struct domain *ld, int lport) 3140 | { 3141 | struct evtchn *lchn, *rchn; 3142 | struct domain *rd; 3143 | int rport; 3144 | 3145 | - spin_lock(&ld->event_lock); 3146 | - 3147 | if ( unlikely(ld->is_dying) ) 3148 | - { 3149 | - spin_unlock(&ld->event_lock); 3150 | return; 3151 | - } 3152 | 3153 | ASSERT(port_is_valid(ld, lport)); 3154 | lchn = evtchn_from_port(ld, lport); 3155 | @@ -1166,6 +1160,14 @@ void notify_via_xen_event_channel(struct domain *ld, int lport) 3156 | rchn = evtchn_from_port(rd, rport); 3157 | evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport); 3158 | } 3159 | +} 3160 | + 3161 | + 3162 | +void notify_via_xen_event_channel(struct domain *ld, int lport) 3163 | +{ 3164 | + spin_lock(&ld->event_lock); 3165 | + 3166 | + __notify_via_xen_event_channel(ld, lport); 3167 | 3168 | spin_unlock(&ld->event_lock); 3169 | } 3170 | diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c 3171 | index 5072133..26d8dae 100644 3172 | --- a/xen/common/keyhandler.c 3173 | +++ b/xen/common/keyhandler.c 3174 | @@ -20,6 +20,7 @@ 3175 | #include 3176 | #include 3177 | #include 3178 | +#include 3179 | 3180 | static struct keyhandler *key_table[256]; 3181 | static unsigned char keypress_key; 3182 | diff --git a/xen/common/rangeset.c b/xen/common/rangeset.c 3183 | index f09c0c4..050a775 100644 3184 | --- a/xen/common/rangeset.c 3185 | +++ b/xen/common/rangeset.c 3186 | @@ -438,3 +438,18 @@ void rangeset_domain_printk( 3187 | 3188 | spin_unlock(&d->rangesets_lock); 3189 | } 3190 | + 3191 | +void *first_range_ex(struct rangeset *r) 3192 | +{ 3193 | + return first_range(r); 3194 | +} 3195 | + 3196 | +void *get_range(struct rangeset *r, void *x, 3197 | + unsigned long *s, unsigned long *e) 3198 | +{ 3199 | + struct range *y = x; 3200 | + 3201 | + *s = y->s; 3202 | + *e = y->e; 3203 | + return next_range(r, y); 3204 | +} 3205 | diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h 3206 | index 909f449..dfe15b4 100644 3207 | --- a/xen/include/asm-x86/domain.h 3208 | +++ b/xen/include/asm-x86/domain.h 3209 | @@ -214,7 +214,14 @@ struct paging_vcpu { 3210 | #define MAX_CPUID_INPUT 40 3211 | typedef xen_domctl_cpuid_t cpuid_input_t; 3212 | 3213 | -#define MAX_NESTEDP2M 10 3214 | +/* 3215 | + * Adding new data structure into struct domain exceeds the PAGE_SIZE 3216 | + * limitation, and extending to multiple page may be risky. 3217 | + * We simpliy borrrow space from some unused data structures for now, 3218 | + * and revisit some time later. 3219 | + */ 3220 | +//#define MAX_NESTEDP2M 10 3221 | +#define MAX_NESTEDP2M 1 3222 | struct p2m_domain; 3223 | struct time_scale { 3224 | int shift; 3225 | @@ -251,6 +258,7 @@ struct arch_domain 3226 | 3227 | /* I/O-port admin-specified access capabilities. */ 3228 | struct rangeset *ioport_caps; 3229 | + struct rangeset *ioport_forwarding_caps; 3230 | uint32_t pci_cf8; 3231 | uint8_t cmos_idx; 3232 | 3233 | diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h 3234 | index 27b3de5..74375a8 100644 3235 | --- a/xen/include/asm-x86/hvm/domain.h 3236 | +++ b/xen/include/asm-x86/hvm/domain.h 3237 | @@ -41,6 +41,24 @@ struct hvm_ioreq_page { 3238 | void *va; 3239 | }; 3240 | 3241 | +struct wp_hash_table { 3242 | + struct wp_hash_table *next; 3243 | + unsigned long gpfn; 3244 | +}; 3245 | +#define WP_HASH_SIZE_SHIFT 8 3246 | +#define WP_HASH_SIZE (1 << WP_HASH_SIZE_SHIFT) 3247 | +#define wp_hash(x) ((x) % WP_HASH_SIZE) 3248 | +#define WP_INVALID_GPFN 0 3249 | +#define WP_HASH_ENTRY_SIZE sizeof(struct wp_hash_table) 3250 | +#define wp_htable(d) (d->arch.hvm_domain.vgt_wp_hash_base) 3251 | +#define wp_htable_lock(d) (d->arch.hvm_domain.vgt_wp_hash_lock) 3252 | +int vgt_wp_hash_add(struct wp_hash_table *wp_ht, unsigned long gpfn); 3253 | +int vgt_wp_hash_rem(struct wp_hash_table *wp_ht, unsigned long gpfn); 3254 | +struct wp_hash_table *lookup_wp_hash_table( 3255 | + struct wp_hash_table *wp_ht, 3256 | + unsigned long gpfn); 3257 | +void free_vgt_wp_hash(struct wp_hash_table *wp_ht); 3258 | + 3259 | struct hvm_domain { 3260 | struct hvm_ioreq_page ioreq; 3261 | struct hvm_ioreq_page buf_ioreq; 3262 | @@ -100,9 +118,13 @@ struct hvm_domain { 3263 | struct vmx_domain vmx; 3264 | struct svm_domain svm; 3265 | }; 3266 | + bool_t vgt_enabled; 3267 | + struct wp_hash_table *vgt_wp_hash_base; 3268 | + spinlock_t vgt_wp_hash_lock; 3269 | }; 3270 | 3271 | #define hap_enabled(d) ((d)->arch.hvm_domain.hap_enabled) 3272 | +#define vgt_enabled(d) ((d)->arch.hvm_domain.vgt_enabled) 3273 | 3274 | #endif /* __ASM_X86_HVM_DOMAIN_H__ */ 3275 | 3276 | diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h 3277 | index 4d303e6..151d24a 100644 3278 | --- a/xen/include/asm-x86/hvm/hvm.h 3279 | +++ b/xen/include/asm-x86/hvm/hvm.h 3280 | @@ -194,6 +194,11 @@ struct hvm_function_table { 3281 | bool_t access_w, bool_t access_x); 3282 | }; 3283 | 3284 | +int vgt_domctl_add_range(struct domain *d, unsigned long gfn, 3285 | + unsigned long mfn, unsigned long nr_mfns); 3286 | +int vgt_domctl_remove_range(struct domain *d, unsigned long gfn, 3287 | + unsigned long mfn, unsigned long nr_mfns); 3288 | + 3289 | extern struct hvm_function_table hvm_funcs; 3290 | extern bool_t hvm_enabled; 3291 | extern bool_t cpu_has_lmsl; 3292 | diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h 3293 | index 410a5f6..a237c71 100644 3294 | --- a/xen/include/asm-x86/hvm/io.h 3295 | +++ b/xen/include/asm-x86/hvm/io.h 3296 | @@ -70,8 +70,10 @@ extern const struct hvm_mmio_handler vlapic_mmio_handler; 3297 | extern const struct hvm_mmio_handler vioapic_mmio_handler; 3298 | extern const struct hvm_mmio_handler msixtbl_mmio_handler; 3299 | extern const struct hvm_mmio_handler iommu_mmio_handler; 3300 | +extern const struct hvm_mmio_handler vgt_mmio_handler; 3301 | +extern const struct hvm_mmio_handler writeprotection_handler; 3302 | 3303 | -#define HVM_MMIO_HANDLER_NR 5 3304 | +#define HVM_MMIO_HANDLER_NR 7 3305 | 3306 | int hvm_io_intercept(ioreq_t *p, int type); 3307 | void register_io_handler( 3308 | diff --git a/xen/include/asm-x86/hvm/vcpu.h b/xen/include/asm-x86/hvm/vcpu.h 3309 | index e8b8cd7..871998a 100644 3310 | --- a/xen/include/asm-x86/hvm/vcpu.h 3311 | +++ b/xen/include/asm-x86/hvm/vcpu.h 3312 | @@ -44,12 +44,23 @@ struct hvm_vcpu_asid { 3313 | uint32_t asid; 3314 | }; 3315 | 3316 | +/* Max MMIO read or write size in one instruction emulation */ 3317 | +#define MAX_INS_EMULATE_MMIO_SIZE 32 3318 | + 3319 | struct hvm_vcpu_io { 3320 | /* I/O request in flight to device model. */ 3321 | enum hvm_io_state io_state; 3322 | unsigned long io_data; 3323 | int io_size; 3324 | 3325 | + int mmio_split; 3326 | + int mmio_split_size; 3327 | + int mmio_split_dir; 3328 | + 3329 | + paddr_t mmio_split_pa; 3330 | + unsigned int mmio_split_done_size; 3331 | + uint8_t mmio_split_buf[MAX_INS_EMULATE_MMIO_SIZE]; 3332 | + 3333 | /* 3334 | * HVM emulation: 3335 | * Virtual address @mmio_gva maps to MMIO physical frame @mmio_gpfn. 3336 | @@ -170,6 +181,7 @@ struct hvm_vcpu { 3337 | struct hvm_trap inject_trap; 3338 | 3339 | struct viridian_vcpu viridian; 3340 | + int vgt_port; /* event channle port to notify dom0 vGT driver */ 3341 | }; 3342 | 3343 | #endif /* __ASM_X86_HVM_VCPU_H__ */ 3344 | diff --git a/xen/include/asm-x86/iocap.h b/xen/include/asm-x86/iocap.h 3345 | index 591ae17..dfb7958 100644 3346 | --- a/xen/include/asm-x86/iocap.h 3347 | +++ b/xen/include/asm-x86/iocap.h 3348 | @@ -14,6 +14,14 @@ 3349 | #define ioports_access_permitted(d, s, e) \ 3350 | rangeset_contains_range((d)->arch.ioport_caps, s, e) 3351 | 3352 | +#define ioports_permit_forwarding(d, s, e) \ 3353 | + rangeset_add_range((d)->arch.ioport_forwarding_caps, s, e) 3354 | +#define ioports_forwarding_permitted(d, s, e) \ 3355 | + rangeset_contains_range((d)->arch.ioport_forwarding_caps, s, e) 3356 | +#define ioports_forwarding_started(d) \ 3357 | + (!rangeset_is_empty((d)->arch.ioport_forwarding_caps)) 3358 | + 3359 | + 3360 | #define cache_flush_permitted(d) \ 3361 | (!rangeset_is_empty((d)->iomem_caps) || \ 3362 | !rangeset_is_empty((d)->arch.ioport_caps)) 3363 | diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h 3364 | index 43583b2..2656de0 100644 3365 | --- a/xen/include/asm-x86/p2m.h 3366 | +++ b/xen/include/asm-x86/p2m.h 3367 | @@ -70,6 +70,7 @@ typedef enum { 3368 | p2m_ram_paging_in = 11, /* Memory that is being paged in */ 3369 | p2m_ram_shared = 12, /* Shared or sharable memory */ 3370 | p2m_ram_broken = 13, /* Broken page, access cause domain crash */ 3371 | + p2m_writeprotection = 14, /* write-protection guest PPGTT PT pages */ 3372 | } p2m_type_t; 3373 | 3374 | /* 3375 | @@ -656,6 +657,8 @@ void p2m_flush_nestedp2m(struct domain *d); 3376 | void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, 3377 | l1_pgentry_t *p, mfn_t table_mfn, l1_pgentry_t new, unsigned int level); 3378 | 3379 | +int hap_write_protect_pages(struct domain *d, 3380 | + unsigned long *gpfn, int num, int wr); 3381 | #endif /* _XEN_P2M_H */ 3382 | 3383 | /* 3384 | diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h 3385 | index 2c3cc2d..e2d3e86 100644 3386 | --- a/xen/include/asm-x86/page.h 3387 | +++ b/xen/include/asm-x86/page.h 3388 | @@ -71,6 +71,10 @@ 3389 | #define l4e_get_pfn(x) \ 3390 | ((unsigned long)(((x).l4 & (PADDR_MASK&PAGE_MASK)) >> PAGE_SHIFT)) 3391 | 3392 | +/* Get reserved bits mapped by pte(unsigned long) */ 3393 | +#define l1e_get_rsvd(x) \ 3394 | + ((unsigned long)(((x).l1 & (PRSVD_MASK&(~PADDR_MASK))))) 3395 | + 3396 | /* Get physical address of page mapped by pte (paddr_t). */ 3397 | #define l1e_get_paddr(x) \ 3398 | ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK)))) 3399 | @@ -157,6 +161,12 @@ static inline l4_pgentry_t l4e_from_paddr(paddr_t pa, unsigned int flags) 3400 | #define l3e_remove_flags(x, flags) ((x).l3 &= ~put_pte_flags(flags)) 3401 | #define l4e_remove_flags(x, flags) ((x).l4 &= ~put_pte_flags(flags)) 3402 | 3403 | +/* Add extra reserved bits to an existing pte */ 3404 | +#define l1e_add_rsvd(x, bits) ((x).l1 |= (bits)) 3405 | + 3406 | +/* Remove reserved bits from an existing pte */ 3407 | +#define l1e_remove_rsvd(x, bits) ((x).l1 &= ~(bits)) 3408 | + 3409 | /* Check if a pte's page mapping or significant access flags have changed. */ 3410 | #define l1e_has_changed(x,y,flags) \ 3411 | ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) 3412 | @@ -167,6 +177,10 @@ static inline l4_pgentry_t l4e_from_paddr(paddr_t pa, unsigned int flags) 3413 | #define l4e_has_changed(x,y,flags) \ 3414 | ( !!(((x).l4 ^ (y).l4) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) ) 3415 | 3416 | +/* Check if a pte's reserved bits have changed */ 3417 | +#define l1e_has_changed_rsvd(x,y,bits) \ 3418 | + ( !!(((x).l1 ^ (y).l1) & ((PRSVD_MASK&(~PADDR_MASK))|(bits))) ) 3419 | + 3420 | /* Pagetable walking. */ 3421 | #define l2e_to_l1e(x) ((l1_pgentry_t *)__va(l2e_get_paddr(x))) 3422 | #define l3e_to_l2e(x) ((l2_pgentry_t *)__va(l3e_get_paddr(x))) 3423 | diff --git a/xen/include/asm-x86/traps.h b/xen/include/asm-x86/traps.h 3424 | index 82cbcee..5420920 100644 3425 | --- a/xen/include/asm-x86/traps.h 3426 | +++ b/xen/include/asm-x86/traps.h 3427 | @@ -32,6 +32,8 @@ extern void machine_check_vector(struct cpu_user_regs *regs, long error_code); 3428 | 3429 | void async_exception_cleanup(struct vcpu *); 3430 | 3431 | +extern void do_guest_trap( 3432 | + int trapnr, const struct cpu_user_regs *regs, int use_error_code); 3433 | /** 3434 | * guest_has_trap_callback 3435 | * 3436 | diff --git a/xen/include/asm-x86/vgt.h b/xen/include/asm-x86/vgt.h 3437 | new file mode 100644 3438 | index 0000000..2e0afa8 3439 | --- /dev/null 3440 | +++ b/xen/include/asm-x86/vgt.h 3441 | @@ -0,0 +1,43 @@ 3442 | +/* 3443 | + * vgt.h: vGT related definitions 3444 | + * Copyright (c) 2011, Intel Corporation. 3445 | + * 3446 | + * This program is free software; you can redistribute it and/or modify it 3447 | + * under the terms and conditions of the GNU General Public License, 3448 | + * version 2, as published by the Free Software Foundation. 3449 | + * 3450 | + * This program is distributed in the hope it will be useful, but WITHOUT 3451 | + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 3452 | + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 3453 | + * more details. 3454 | + * 3455 | + * You should have received a copy of the GNU General Public License along with 3456 | + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 3457 | + * Place - Suite 330, Boston, MA 02111-1307 USA. 3458 | + * 3459 | + */ 3460 | +#ifndef __ASM_X86_VGT_H__ 3461 | +#define __ASM_X86_VGT_H__ 3462 | + 3463 | +extern int vgt_io_trap(struct domain *d, struct xen_domctl_vgt_io_trap *info); 3464 | +extern int set_vgt_info(unsigned int gen_dev_bdf, unsigned int gen_dev_type); 3465 | +extern int vgt_hvm_init(struct domain *d); 3466 | +extern void vgt_hvm_deinit(struct domain *d); 3467 | +extern int hypercall_io_emulation(struct vcpu *v, struct vcpu_emul_ioreq *req); 3468 | +extern int hypercall_get_sysdata(struct vcpu *v, 3469 | + struct vcpu_sysdata_request *req); 3470 | +extern int do_rsvd_page_fault(struct vcpu *v, unsigned long addr, 3471 | + struct cpu_user_regs *regs); 3472 | +extern void vgt_inject_virtual_exception(struct cpu_user_regs *regs, int pio); 3473 | +extern int vgt_set_mmio_trap(struct domain *d, unsigned long mfn_start, unsigned long mfn_end); 3474 | + 3475 | +int vgt_hvm_intercept_io(ioreq_t *p); 3476 | + 3477 | +#define CF8_to_BDF(cf8) (((unsigned int)(cf8) >> 8) & 0xffff) 3478 | +extern unsigned int vgt_bdf; 3479 | + 3480 | +void vgt_resume(void); 3481 | + 3482 | +#endif // __ASM_X86_VGT_H__ 3483 | + 3484 | + 3485 | diff --git a/xen/include/asm-x86/x86_64/page.h b/xen/include/asm-x86/x86_64/page.h 3486 | index c193c88..5ee47fd 100644 3487 | --- a/xen/include/asm-x86/x86_64/page.h 3488 | +++ b/xen/include/asm-x86/x86_64/page.h 3489 | @@ -22,10 +22,24 @@ 3490 | #define __PAGE_OFFSET DIRECTMAP_VIRT_START 3491 | #define __XEN_VIRT_START XEN_VIRT_START 3492 | 3493 | -/* These are architectural limits. Current CPUs support only 40-bit phys. */ 3494 | -#define PADDR_BITS 52 3495 | +/* 3496 | + * These are architectural limits. Current CPUs support only 40-bit phys. 3497 | + * 3498 | + * There's a requirement to trap-and-emulate MMIO from PV guest (such as 3499 | + * in vGT), so we adopt an approach by using reserved bit to trap concerned 3500 | + * MMIO ranges. That means we need reserve some highest bits under 3501 | + * architectural limit for that purpose. Currently only one reserved bit is 3502 | + * stolen here, and it should work well on client platforms for quite a long 3503 | + * time. It should be enough to come up other architectural innovation before 3504 | + * all bits are supported by CPUs, if this usage continues to be appealing. 3505 | + * :-) 3506 | + */ 3507 | +#define PADDR_ARCH_BITS 52 3508 | +#define PRSVD_BITS PADDR_ARCH_BITS 3509 | +#define PADDR_BITS 51 3510 | #define VADDR_BITS 48 3511 | #define PADDR_MASK ((1UL << PADDR_BITS)-1) 3512 | +#define PRSVD_MASK ((1UL << PRSVD_BITS)-1) 3513 | #define VADDR_MASK ((1UL << VADDR_BITS)-1) 3514 | 3515 | #define is_canonical_address(x) (((long)(x) >> 47) == ((long)(x) >> 63)) 3516 | @@ -161,6 +175,9 @@ typedef l4_pgentry_t root_pgentry_t; 3517 | /* Bit 22 of a 24-bit flag mask. This corresponds to bit 62 of a pte.*/ 3518 | #define _PAGE_GNTTAB (1U<<22) 3519 | 3520 | +/* Bit 51 of the pte, as the indicator for MMIO trap-and-emulation */ 3521 | +#define _PAGE_FORWARD (1UL<<51) 3522 | + 3523 | #define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL) 3524 | #define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL) 3525 | 3526 | diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h 3527 | index 4c5b2bb..869f381 100644 3528 | --- a/xen/include/public/domctl.h 3529 | +++ b/xen/include/public/domctl.h 3530 | @@ -537,6 +537,24 @@ struct xen_domctl_ioport_mapping { 3531 | typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; 3532 | DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); 3533 | 3534 | +#define MAX_VGT_IO_TRAP_INFO 4 3535 | + 3536 | +struct vgt_io_trap_info { 3537 | + uint64_t s; 3538 | + uint64_t e; 3539 | +}; 3540 | + 3541 | +struct xen_domctl_vgt_io_trap { 3542 | + uint32_t n_pio; 3543 | + struct vgt_io_trap_info pio[MAX_VGT_IO_TRAP_INFO]; 3544 | + 3545 | + uint32_t n_mmio; 3546 | + struct vgt_io_trap_info mmio[MAX_VGT_IO_TRAP_INFO]; 3547 | +}; 3548 | + 3549 | +typedef struct xen_domctl_vgt_io_trap xen_domctl_vgt_io_trap_t; 3550 | +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vgt_io_trap_t); 3551 | + 3552 | 3553 | /* 3554 | * Pin caching type of RAM space for x86 HVM domU. 3555 | @@ -924,6 +942,9 @@ struct xen_domctl { 3556 | #define XEN_DOMCTL_gdbsx_pausevcpu 1001 3557 | #define XEN_DOMCTL_gdbsx_unpausevcpu 1002 3558 | #define XEN_DOMCTL_gdbsx_domstatus 1003 3559 | + 3560 | +#define XEN_DOMCTL_vgt_io_trap 700 3561 | + 3562 | uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ 3563 | domid_t domain; 3564 | union { 3565 | @@ -979,7 +1000,8 @@ struct xen_domctl { 3566 | struct xen_domctl_set_broken_page_p2m set_broken_page_p2m; 3567 | struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; 3568 | struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; 3569 | - uint8_t pad[128]; 3570 | + struct xen_domctl_vgt_io_trap vgt_io_trap; 3571 | + uint8_t pad[256]; 3572 | } u; 3573 | }; 3574 | typedef struct xen_domctl xen_domctl_t; 3575 | diff --git a/xen/include/public/hvm/e820.h b/xen/include/public/hvm/e820.h 3576 | index 5bdc227..683f091 100644 3577 | --- a/xen/include/public/hvm/e820.h 3578 | +++ b/xen/include/public/hvm/e820.h 3579 | @@ -27,7 +27,7 @@ 3580 | #define HVM_E820_NR_OFFSET 0x000001E8 3581 | #define HVM_E820_OFFSET 0x000002D0 3582 | 3583 | -#define HVM_BELOW_4G_RAM_END 0xF0000000 3584 | +#define HVM_BELOW_4G_RAM_END 0xC0000000 3585 | #define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END 3586 | #define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) 3587 | 3588 | diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h 3589 | index a9aab4b..4bfdd85 100644 3590 | --- a/xen/include/public/hvm/hvm_op.h 3591 | +++ b/xen/include/public/hvm/hvm_op.h 3592 | @@ -270,6 +270,36 @@ struct xen_hvm_inject_msi { 3593 | typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; 3594 | DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); 3595 | 3596 | +#define HVMOP_vgt_map_mmio 18 3597 | +struct xen_hvm_vgt_map_mmio { 3598 | + uint16_t domid; 3599 | + uint16_t map; /* 1: Map, 0: Unmap */ 3600 | + uint32_t nr_mfns; 3601 | + uint64_t first_gfn; 3602 | + uint64_t first_mfn; 3603 | +}; 3604 | +typedef struct xen_hvm_vgt_map_mmio xen_hvm_vgt_map_mmio_t; 3605 | +DEFINE_XEN_GUEST_HANDLE(xen_hvm_vgt_map_mmio_t); 3606 | + 3607 | +#define HVMOP_vgt_enable 19 3608 | +struct xen_hvm_vgt_enable { 3609 | + uint16_t domid; 3610 | +}; 3611 | +typedef struct xen_hvm_vgt_enable xen_hvm_vgt_enable_t; 3612 | +DEFINE_XEN_GUEST_HANDLE(xen_hvm_vgt_enable_t); 3613 | + 3614 | +#define HVMOP_vgt_wp_pages 20 /* writeprotection to guest pages */ 3615 | +struct xen_hvm_vgt_wp_pages { 3616 | +#define MAX_WP_BATCH_PAGES 128 3617 | + domid_t domid; 3618 | + uint16_t set; /* 1: set WP, 0: remove WP */ 3619 | + uint16_t nr_pages; 3620 | + unsigned long wp_pages[MAX_WP_BATCH_PAGES]; 3621 | +}; 3622 | +typedef struct xen_hvm_vgt_wp_pages xen_hvm_vgt_wp_pages_t; 3623 | +DEFINE_XEN_GUEST_HANDLE(xen_hvm_vgt_wp_pages_t); 3624 | + 3625 | + 3626 | #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ 3627 | 3628 | #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ 3629 | diff --git a/xen/include/public/hvm/ioreq.h b/xen/include/public/hvm/ioreq.h 3630 | index f05d130..c1a215a 100644 3631 | --- a/xen/include/public/hvm/ioreq.h 3632 | +++ b/xen/include/public/hvm/ioreq.h 3633 | @@ -54,8 +54,9 @@ struct ioreq { 3634 | * of the real data to use. */ 3635 | uint8_t dir:1; /* 1=read, 0=write */ 3636 | uint8_t df:1; 3637 | - uint8_t _pad1:1; 3638 | + uint8_t is_vgt:1; /* if 1, it is vGT I/O request */ 3639 | uint8_t type; /* I/O type */ 3640 | + uint32_t vgt_eport; /* evtchn for notification to/from vGT driver */ 3641 | }; 3642 | typedef struct ioreq ioreq_t; 3643 | 3644 | diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h 3645 | index 7a26dee..a3d3611 100644 3646 | --- a/xen/include/public/memory.h 3647 | +++ b/xen/include/public/memory.h 3648 | @@ -461,6 +461,29 @@ DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); 3649 | 3650 | #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ 3651 | 3652 | +/* 3653 | + * Translate the given guest PFNs to MFNs 3654 | + */ 3655 | +#define XENMEM_get_mfn_from_pfn 25 3656 | +struct xen_get_mfn_from_pfn { 3657 | + /* 3658 | + * Pointer to buffer to fill with list of pfn. 3659 | + * for IN, it contains the guest PFN that need to translated 3660 | + * for OUT, it contains the translated MFN. or INVALID_MFN if no valid translation 3661 | + */ 3662 | + XEN_GUEST_HANDLE(xen_pfn_t) pfn_list; 3663 | + 3664 | + /* 3665 | + * IN: Size of the pfn_array. 3666 | + */ 3667 | + unsigned int nr_pfns; 3668 | + 3669 | + /* IN: which domain */ 3670 | + domid_t domid; 3671 | +}; 3672 | +typedef struct xen_get_mfn_from_pfn xen_get_mfn_from_pfn_t; 3673 | +DEFINE_XEN_GUEST_HANDLE(xen_get_mfn_from_pfn_t); 3674 | + 3675 | #endif /* __XEN_PUBLIC_MEMORY_H__ */ 3676 | 3677 | /* 3678 | diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h 3679 | index 4341f54..295ee43 100644 3680 | --- a/xen/include/public/platform.h 3681 | +++ b/xen/include/public/platform.h 3682 | @@ -527,6 +527,21 @@ struct xenpf_core_parking { 3683 | typedef struct xenpf_core_parking xenpf_core_parking_t; 3684 | DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t); 3685 | 3686 | +#define XENPF_set_vgt_info 61 3687 | + 3688 | +#define XEN_IGD_INVALID 0 3689 | +#define XEN_IGD_SNB 1 3690 | +#define XEN_IGD_IVB 2 3691 | +#define XEN_IGD_HSW 3 3692 | +#define XEN_IGD_BDW 4 3693 | +#define XEN_IGD_MAX 4 /* the max GEN dev type supported */ 3694 | +struct xenpf_vgt_info { 3695 | + unsigned int gen_dev_bdf; 3696 | + unsigned int gen_dev_type; 3697 | +}; 3698 | +typedef struct xenpf_vgt_info xenpf_vgt_info_t; 3699 | +DEFINE_XEN_GUEST_HANDLE(xenpf_vgt_info_t); 3700 | + 3701 | /* 3702 | * ` enum neg_errnoval 3703 | * ` HYPERVISOR_platform_op(const struct xen_platform_op*); 3704 | @@ -553,6 +568,7 @@ struct xen_platform_op { 3705 | struct xenpf_cpu_hotadd cpu_add; 3706 | struct xenpf_mem_hotadd mem_add; 3707 | struct xenpf_core_parking core_parking; 3708 | + struct xenpf_vgt_info vgt_info; 3709 | uint8_t pad[128]; 3710 | } u; 3711 | }; 3712 | diff --git a/xen/include/public/vcpu.h b/xen/include/public/vcpu.h 3713 | index e888daf..5fa5066 100644 3714 | --- a/xen/include/public/vcpu.h 3715 | +++ b/xen/include/public/vcpu.h 3716 | @@ -227,6 +227,57 @@ struct vcpu_register_time_memory_area { 3717 | typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; 3718 | DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); 3719 | 3720 | +/* Request an I/O emulation for the specified VCPU. */ 3721 | +#define VCPUOP_request_io_emulation 14 3722 | + 3723 | +#define PV_IOREQ_READ 1 3724 | +#define PV_IOREQ_WRITE 0 3725 | + 3726 | +#define PV_IOREQ_TYPE_PIO 0 /* pio */ 3727 | +#define PV_IOREQ_TYPE_COPY 1 /* mmio ops */ 3728 | +#define PV_IOREQ_TYPE_CTRL 2 /* vGT control ops */ 3729 | + 3730 | +/* for "addr" field , when "type" is PV_IOREQ_TYPE_CTRL */ 3731 | +#define VGT_CTRL_FORCEWAKE_GET 0 3732 | +#define VGT_CTRL_FORCEWAKE_PUT 1 3733 | + 3734 | +struct vcpu_emul_ioreq { 3735 | + uint64_t addr; /* physical address */ 3736 | + uint64_t data; /* data (or paddr of data) */ 3737 | + uint64_t count; /* for rep prefixes */ 3738 | + uint32_t size; /* size in bytes */ 3739 | + uint16_t _pad0; 3740 | + uint8_t state:4; 3741 | + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr 3742 | + * of the real data to use. */ 3743 | + uint8_t dir:1; /* 1=read, 0=write */ 3744 | + uint8_t df:1; 3745 | + uint8_t _pad1:1; 3746 | + uint8_t type; /* I/O type */ 3747 | +}; 3748 | +typedef struct vcpu_emul_ioreq vcpu_emul_ioreq_t; 3749 | +DEFINE_XEN_GUEST_HANDLE(vcpu_emul_ioreq_t); 3750 | + 3751 | +#define VCPUOP_get_sysdata 16 3752 | +/* sub operations */ 3753 | +#define VCPUOP_sysdata_get_segment 0 3754 | +#define VCPUOP_sysdata_read 1 3755 | +struct vcpu_sysdata_request { 3756 | + uint64_t op_type; 3757 | + union { 3758 | + struct { 3759 | + uint32_t selector; 3760 | + uint32_t pad1; 3761 | + uint64_t xdt_desc[2]; 3762 | + /* System descriptors uses 2 descriptors in IA32e */ 3763 | + } seg; 3764 | + struct { 3765 | + uint64_t src_addr; /* linear address */ 3766 | + uint64_t sys_data; 3767 | + uint32_t bytes; 3768 | + } rd; 3769 | + } ops; 3770 | +}; 3771 | #endif /* __XEN_PUBLIC_VCPU_H__ */ 3772 | 3773 | /* 3774 | diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h 3775 | index fe179b9..4c6021a 100644 3776 | --- a/xen/include/public/xen.h 3777 | +++ b/xen/include/public/xen.h 3778 | @@ -789,6 +789,7 @@ struct xen_multiboot_mod_list 3779 | /* Unused, must be zero */ 3780 | uint32_t pad; 3781 | }; 3782 | + 3783 | /* 3784 | * `incontents 200 startofday_dom0_console Dom0_console 3785 | * 3786 | diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h 3787 | index 4ac39ad..3d80a9d 100644 3788 | --- a/xen/include/xen/event.h 3789 | +++ b/xen/include/xen/event.h 3790 | @@ -66,6 +66,9 @@ void free_xen_event_channel( 3791 | /* Query if event channel is in use by the guest */ 3792 | int guest_enabled_event(struct vcpu *v, uint32_t virq); 3793 | 3794 | +/* Notify remote end of a Xen-attached event channel - no event lock held. */ 3795 | +void __notify_via_xen_event_channel(struct domain *ld, int lport); 3796 | + 3797 | /* Notify remote end of a Xen-attached event channel.*/ 3798 | void notify_via_xen_event_channel(struct domain *ld, int lport); 3799 | 3800 | diff --git a/xen/include/xen/iocap.h b/xen/include/xen/iocap.h 3801 | index b755ecb..16a88be 100644 3802 | --- a/xen/include/xen/iocap.h 3803 | +++ b/xen/include/xen/iocap.h 3804 | @@ -17,6 +17,13 @@ 3805 | #define iomem_access_permitted(d, s, e) \ 3806 | rangeset_contains_range((d)->iomem_caps, s, e) 3807 | 3808 | +#define iomem_permit_forward(d, s, e) \ 3809 | + rangeset_add_range((d)->iomem_forward_caps, s, e) 3810 | +#define iomem_deny_forward(d, s, e) \ 3811 | + rangeset_remove_range((d)->iomem_forward_caps, s, e) 3812 | +#define iomem_forward_permitted(d, s, e) \ 3813 | + rangeset_contains_range((d)->iomem_forward_caps, s, e) 3814 | + 3815 | #define irq_permit_access(d, i) \ 3816 | rangeset_add_singleton((d)->irq_caps, i) 3817 | #define irq_deny_access(d, i) \ 3818 | diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h 3819 | index 1e16a6b..9d51aa4 100644 3820 | --- a/xen/include/xen/rangeset.h 3821 | +++ b/xen/include/xen/rangeset.h 3822 | @@ -73,4 +73,8 @@ void rangeset_printk( 3823 | void rangeset_domain_printk( 3824 | struct domain *d); 3825 | 3826 | +void *first_range_ex(struct rangeset *r); 3827 | +void *get_range(struct rangeset *r, void *y, 3828 | + unsigned long *s, unsigned long *e); 3829 | + 3830 | #endif /* __XEN_RANGESET_H__ */ 3831 | diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h 3832 | index ae6a3b8..8f7b255 100644 3833 | --- a/xen/include/xen/sched.h 3834 | +++ b/xen/include/xen/sched.h 3835 | @@ -284,6 +284,7 @@ struct domain 3836 | /* I/O capabilities (access to IRQs and memory-mapped I/O). */ 3837 | struct rangeset *iomem_caps; 3838 | struct rangeset *irq_caps; 3839 | + struct rangeset *iomem_forward_caps; 3840 | 3841 | /* Is this an HVM guest? */ 3842 | bool_t is_hvm; 3843 | --------------------------------------------------------------------------------