├── .github └── workflows │ ├── reviewdog.yml │ └── spellcheck.yml ├── .gitignore ├── .spellcheck.yaml ├── .wordlist.txt ├── Makefile ├── README.md └── p2pmem_pci.c /.github/workflows/reviewdog.yml: -------------------------------------------------------------------------------- 1 | name: reviewdog 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | shellcheck: 11 | name: runner / shellcheck 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v1 15 | - name: shellcheck 16 | uses: reviewdog/action-shellcheck@v1 17 | with: 18 | github_token: ${{ secrets.github_token }} 19 | reporter: github-pr-review 20 | pattern: "*" 21 | exclude: "./*.*" 22 | misspell: 23 | name: runner / misspell 24 | runs-on: ubuntu-latest 25 | steps: 26 | - name: Check out code. 27 | uses: actions/checkout@v1 28 | - name: misspell 29 | uses: reviewdog/action-misspell@v1 30 | with: 31 | github_token: ${{ secrets.github_token }} 32 | locale: "CA" 33 | -------------------------------------------------------------------------------- /.github/workflows/spellcheck.yml: -------------------------------------------------------------------------------- 1 | name: spellcheck 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | spellcheck: 11 | name: runner / spellcheck 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Check out code. 15 | uses: actions/checkout@v2 16 | - name: Run PySpelling as a GitHub action 17 | uses: sbates130272/spellcheck-github-actions@v0.5.0 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.cmd 2 | .tmp_versions/ 3 | Module.symvers 4 | *.ko 5 | *.mod.c 6 | *~ 7 | modules.order 8 | -------------------------------------------------------------------------------- /.spellcheck.yaml: -------------------------------------------------------------------------------- 1 | spellchecker: aspell 2 | matrix: 3 | - name: markdown 4 | aspell: 5 | lang: en 6 | sources: 7 | - '**/*.md' 8 | pipeline: 9 | - pyspelling.filters.markdown 10 | dictionary: 11 | wordlists: 12 | - wordlist.txt 13 | output: wordlist.dic 14 | encoding: utf-8 15 | default_encoding: utf-8 16 | expect_match: true 17 | -------------------------------------------------------------------------------- /.wordlist.txt: -------------------------------------------------------------------------------- 1 | li 2 | ol 3 | 4 | CMBs 5 | Collaterals 6 | KDIR 7 | Maier 8 | NVM 9 | NVMF 10 | PCIe 11 | PMEM 12 | 13 | api 14 | config 15 | dev 16 | distclean 17 | flashmemorysummit 18 | github 19 | href 20 | html 21 | https 22 | kbuild 23 | ko 24 | mmap 25 | mmapping 26 | modversions 27 | pci 28 | pdf 29 | pdma 30 | pmem 31 | pmemX 32 | sbates 33 | standlone 34 | symvers 35 | txt 36 | userspace 37 | www 38 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # By default, the build is done against the running linux kernel source. 3 | # To build against a different kernel source tree, set SYSSRC: 4 | # 5 | # make KDIR=/path/to/kernel/source 6 | 7 | ifdef KDIR 8 | KERNEL_SOURCES = $(KDIR) 9 | else 10 | KERNEL_UNAME := $(shell uname -r) 11 | KERNEL_SOURCES = /lib/modules/$(KERNEL_UNAME)/build 12 | endif 13 | 14 | default: modules 15 | .PHONY: default 16 | 17 | obj-m += p2pmem_pci.o 18 | 19 | %:: 20 | $(MAKE) -C $(KERNEL_SOURCES) M=$$PWD $@ 21 | 22 | p2pmem-pci.ko: 23 | 24 | install: modules_install 25 | .PHONY: install 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # P2PMEM PCIe Linux Driver 2 | 3 | ## Disclaimer 4 | 5 | This driver exposes p2pmem to userspace without taking appropriate 6 | safety measures to ensure it is used correctly. If you do certain 7 | things with the pointers obtained by mmapping the /dev/p2pmemX exposed 8 | by this driver bad things will probably happen. Consider yourself 9 | warned. 10 | 11 | ## Introduction 12 | 13 | This is a standlone PCIe driver that ties into the [p2pmem][1] 14 | framework. It can be used for any PCIe end-point devices that have 15 | registered one or more PCIe BAR(s) with the p2pdma framework (e.g. [NVM 16 | Express CMBs][2]). 17 | 18 | ## Build and Install 19 | 20 | To build the module as a .ko for manually installation run: 21 | 22 | ``` 23 | make KDIR=/path/to/kernel/source 24 | ``` 25 | 26 | Note the resultant kernel module is called p2pmem_pci.ko. To install 27 | the module in the current kernel's module tree run: 28 | 29 | ``` 30 | make install 31 | ``` 32 | 33 | Note that for this to work you either need to have module signing set 34 | up or turned off on your machine. Also this code will only work for >= 35 | 4.20.x kernels. Please look for the largest tag that is less than or 36 | equal to your kernel version and use that tag. 37 | 38 | Often you don't want to compile against the kernel installed on your 39 | host so you can use the instructions [here][3] that describe how to 40 | prepare a kernel tree for out-of-tree module compilation. The 41 | following process seems to work well (in the top level kernel source 42 | folder): 43 | 44 | 1. ```make distclean```. 45 | 2. Setup your .config (don't forget you will need p2pdma enabled). 46 | 3. ```make modules_prepare```. 47 | 48 | You can now go back to your p2pmem-pci repository and run the make 49 | command as noted previously. Note you *might* get the following 50 | warning when you build the module. 51 | 52 | ``` 53 | WARNING: Symbol version dump ./Module.symvers 54 | is missing; modules will have no dependencies and modversions. 55 | ``` 56 | This behaviour is [expected][3]. 57 | 58 | ## Usage 59 | 60 | Once this module has been inserted you should see a /dev/p2pmemX for 61 | each of the p2pmem regions in your system. You can then use mmap() to 62 | obtain virtual address pointers backed by memory on the PCIe BAR(s) 63 | associated with /dev/p2pmemX. You can then pass these pointers into 64 | library functions like write() and read() *as long as you use 65 | O_DIRECT*. 66 | 67 | An example of how to use /dev/p2pmemX is via [p2pmem-test][4] which 68 | also has more information on setting up p2pdma enabled kernels and a 69 | p2pdma capable system. 70 | 71 | [1]: https://www.kernel.org/doc/html/latest/driver-api/pci/p2pdma.html 72 | [2]: https://www.flashmemorysummit.com/English/Collaterals/Proceedings/2018/20180809_NVMF-301-1_Maier.pdf 73 | [3]: https://www.kernel.org/doc/Documentation/kbuild/modules.txt 74 | [4]: https://github.com/sbates130272/p2pmem-test 75 | -------------------------------------------------------------------------------- /p2pmem_pci.c: -------------------------------------------------------------------------------- 1 | /* 2 | * P2PMEM PCI EP Device Driver 3 | * Copyright (c) 2017, Eideticom 4 | * 5 | * This program is free software; you can redistribute it and/or modify it 6 | * under the terms and conditions of the GNU General Public License, 7 | * version 2, as published by the Free Software Foundation. 8 | * 9 | * This program is distributed in the hope it will be useful, but WITHOUT 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 | * more details. 13 | * 14 | * Copyright (C) 2017 Eideitcom 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #define PCI_VENDOR_EIDETICOM 0x1de5 25 | #define PCI_VENDOR_MICROSEMI 0x11f8 26 | #define PCI_MTRAMON_DEV_ID 0xf117 27 | 28 | MODULE_LICENSE("GPL"); 29 | MODULE_AUTHOR("Stephen Bates vm_private_data; 76 | 77 | atomic_inc(&pv->mmap_count); 78 | } 79 | 80 | static void p2pmem_vma_free_pages(struct vm_area_struct *vma) 81 | { 82 | int i; 83 | struct p2pmem_vma *pv = vma->vm_private_data; 84 | 85 | mutex_lock(&pv->mutex); 86 | 87 | for (i = 0; i < pv->nr_pages; i++) { 88 | if (pv->used_pages[i]) { 89 | pci_free_p2pmem(pv->p2pmem_dev->pdev, 90 | page_to_virt(pv->used_pages[i]), 91 | PAGE_SIZE); 92 | pv->used_pages[i] = NULL; 93 | } 94 | } 95 | 96 | mutex_unlock(&pv->mutex); 97 | } 98 | 99 | static void p2pmem_vma_close(struct vm_area_struct *vma) 100 | { 101 | struct p2pmem_vma *pv = vma->vm_private_data; 102 | 103 | if (!atomic_dec_and_test(&pv->mmap_count)) 104 | return; 105 | 106 | p2pmem_vma_free_pages(vma); 107 | 108 | dev_dbg(&pv->p2pmem_dev->dev, "vma close"); 109 | kfree(pv); 110 | } 111 | 112 | static vm_fault_t p2pmem_vma_fault(struct vm_fault *vmf) 113 | { 114 | struct p2pmem_vma *pv = vmf->vma->vm_private_data; 115 | unsigned int pg_idx; 116 | struct page *pg; 117 | pfn_t pfn; 118 | vm_fault_t rc; 119 | 120 | pg_idx = (vmf->address - vmf->vma->vm_start) / PAGE_SIZE; 121 | 122 | mutex_lock(&pv->mutex); 123 | 124 | if (pv->used_pages[pg_idx]) 125 | pg = pv->used_pages[pg_idx]; 126 | else 127 | pg = virt_to_page(pci_alloc_p2pmem(pv->p2pmem_dev->pdev, 128 | PAGE_SIZE)); 129 | 130 | if (!pg) { 131 | mutex_unlock(&pv->mutex); 132 | return VM_FAULT_OOM; 133 | } 134 | 135 | pv->used_pages[pg_idx] = pg; 136 | 137 | pfn = phys_to_pfn_t(page_to_phys(pg), PFN_DEV | PFN_MAP); 138 | rc = vmf_insert_mixed(vmf->vma, vmf->address, pfn); 139 | 140 | mutex_unlock(&pv->mutex); 141 | 142 | return rc; 143 | } 144 | 145 | const struct vm_operations_struct p2pmem_vmops = { 146 | .open = p2pmem_vma_open, 147 | .close = p2pmem_vma_close, 148 | .fault = p2pmem_vma_fault, 149 | }; 150 | 151 | static int p2pmem_open(struct inode *inode, struct file *filp) 152 | { 153 | struct p2pmem_dev *p; 154 | 155 | p = container_of(inode->i_cdev, struct p2pmem_dev, cdev); 156 | filp->private_data = p; 157 | 158 | return 0; 159 | } 160 | 161 | static int p2pmem_mmap(struct file *filp, struct vm_area_struct *vma) 162 | { 163 | struct p2pmem_dev *p = filp->private_data; 164 | struct p2pmem_vma *pv; 165 | size_t nr_pages = (vma->vm_end - vma->vm_start) / PAGE_SIZE; 166 | 167 | if ((vma->vm_flags & VM_MAYSHARE) != VM_MAYSHARE) { 168 | dev_warn(&p->dev, "mmap failed: can't create private mapping\n"); 169 | return -EINVAL; 170 | } 171 | 172 | dev_dbg(&p->dev, "Allocating mmap with %zd pages.\n", nr_pages); 173 | 174 | pv = kzalloc(sizeof(*pv) + sizeof(pv->used_pages[0]) * nr_pages, 175 | GFP_KERNEL); 176 | if (!pv) 177 | return -ENOMEM; 178 | 179 | mutex_init(&pv->mutex); 180 | pv->nr_pages = nr_pages; 181 | pv->p2pmem_dev = p; 182 | atomic_set(&pv->mmap_count, 1); 183 | 184 | vma->vm_private_data = pv; 185 | vma->vm_ops = &p2pmem_vmops; 186 | vma->vm_flags |= VM_MIXEDMAP; 187 | 188 | return 0; 189 | } 190 | 191 | static const struct file_operations p2pmem_fops = { 192 | .owner = THIS_MODULE, 193 | .open = p2pmem_open, 194 | .mmap = p2pmem_mmap, 195 | }; 196 | 197 | static int p2pmem_test_page_mappings(struct p2pmem_dev *p) 198 | { 199 | void *addr; 200 | int err = 0; 201 | struct page *page; 202 | struct pci_bus_region bus_region; 203 | struct resource res; 204 | phys_addr_t pa; 205 | 206 | addr = pci_alloc_p2pmem(p->pdev, PAGE_SIZE); 207 | if (!addr) 208 | return -ENOMEM; 209 | 210 | page = virt_to_page(addr); 211 | if (!is_zone_device_page(page)) { 212 | dev_err(&p->dev, 213 | "ERROR: kernel virt_to_page does not point to a ZONE_DEVICE page!"); 214 | err = -EFAULT; 215 | goto out; 216 | } 217 | 218 | bus_region.start = pci_p2pmem_virt_to_bus(p->pdev, addr); 219 | bus_region.end = bus_region.start + PAGE_SIZE; 220 | 221 | pcibios_bus_to_resource(p->pdev->bus, &res, &bus_region); 222 | 223 | pa = page_to_phys(page); 224 | if (pa != res.start) { 225 | dev_err(&p->dev, 226 | "ERROR: page_to_phys does not map to the BAR address!" 227 | " %pa[p] != %pa[p]", &pa, &res.start); 228 | err = -EFAULT; 229 | goto out; 230 | } 231 | 232 | pa = virt_to_phys(addr); 233 | if (pa != res.start) { 234 | dev_err(&p->dev, 235 | "ERROR: virt_to_phys does not map to the BAR address!" 236 | " %pa[p] != %pa[p]", &pa, &res.start); 237 | err = -EFAULT; 238 | goto out; 239 | } 240 | 241 | if (page_to_virt(page) != addr) { 242 | dev_err(&p->dev, 243 | "ERROR: page_to_virt does not map to the correct address!"); 244 | err = -EFAULT; 245 | goto out; 246 | } 247 | 248 | out: 249 | if (err == 0) 250 | dev_info(&p->dev, "kernel page mappings seem sane."); 251 | 252 | pci_free_p2pmem(p->pdev, addr, PAGE_SIZE); 253 | return err; 254 | } 255 | 256 | static int p2pmem_test_p2p_access(struct p2pmem_dev *p) 257 | { 258 | u32 *addr; 259 | const u32 test_value = 0x11223344; 260 | int err = 0; 261 | 262 | addr = pci_alloc_p2pmem(p->pdev, PAGE_SIZE); 263 | if (!addr) 264 | return -ENOMEM; 265 | 266 | WRITE_ONCE(addr[0], 0); 267 | if (READ_ONCE(addr[0]) != 0) { 268 | err = -EFAULT; 269 | goto out; 270 | } 271 | 272 | WRITE_ONCE(addr[0], test_value); 273 | if (READ_ONCE(addr[0]) != test_value) { 274 | err = -EFAULT; 275 | goto out; 276 | } 277 | 278 | out: 279 | if (err == 0) 280 | dev_info(&p->dev, "kernel can access p2p memory."); 281 | else 282 | dev_err(&p->dev, "ERROR: kernel can't access p2p memory!"); 283 | 284 | pci_free_p2pmem(p->pdev, addr, PAGE_SIZE); 285 | return err; 286 | } 287 | 288 | static int p2pmem_test(struct p2pmem_dev *p) 289 | { 290 | int err; 291 | 292 | err = p2pmem_test_page_mappings(p); 293 | if (err) 294 | return err; 295 | 296 | return p2pmem_test_p2p_access(p); 297 | } 298 | 299 | static void p2pmem_release(struct device *dev) 300 | { 301 | struct p2pmem_dev *p = to_p2pmem(dev); 302 | 303 | kfree(p); 304 | } 305 | 306 | static struct p2pmem_dev *p2pmem_create(struct pci_dev *pdev) 307 | { 308 | struct p2pmem_dev *p; 309 | int err; 310 | 311 | p = kzalloc(sizeof(*p), GFP_KERNEL); 312 | if (!p) 313 | return ERR_PTR(-ENOMEM); 314 | 315 | p->pdev = pdev; 316 | 317 | device_initialize(&p->dev); 318 | p->dev.class = p2pmem_class; 319 | p->dev.parent = &pdev->dev; 320 | p->dev.release = p2pmem_release; 321 | 322 | p->id = ida_simple_get(&p2pmem_ida, 0, 0, GFP_KERNEL); 323 | if (p->id < 0) { 324 | err = p->id; 325 | goto out_free; 326 | } 327 | 328 | dev_set_name(&p->dev, "p2pmem%d", p->id); 329 | p->dev.devt = MKDEV(MAJOR(p2pmem_devt), p->id); 330 | 331 | cdev_init(&p->cdev, &p2pmem_fops); 332 | p->cdev.owner = THIS_MODULE; 333 | 334 | err = cdev_device_add(&p->cdev, &p->dev); 335 | if (err) 336 | goto out_ida; 337 | 338 | dev_info(&p->dev, "registered"); 339 | 340 | p2pmem_test(p); 341 | 342 | return p; 343 | 344 | out_ida: 345 | ida_simple_remove(&p2pmem_ida, p->id); 346 | out_free: 347 | kfree(p); 348 | return ERR_PTR(err); 349 | } 350 | 351 | void p2pmem_destroy(struct p2pmem_dev *p) 352 | { 353 | dev_info(&p->dev, "unregistered"); 354 | cdev_device_del(&p->cdev, &p->dev); 355 | ida_simple_remove(&p2pmem_ida, p->id); 356 | put_device(&p->dev); 357 | } 358 | 359 | static int p2pmem_pci_probe(struct pci_dev *pdev, 360 | const struct pci_device_id *id) 361 | { 362 | struct p2pmem_dev *p; 363 | int err = 0; 364 | 365 | if (pci_enable_device_mem(pdev) < 0) { 366 | dev_err(&pdev->dev, "unable to enable device!\n"); 367 | goto out; 368 | } 369 | 370 | err = pci_p2pdma_add_resource(pdev, id->driver_data, 0, 0); 371 | if (err) { 372 | dev_err(&pdev->dev, "unable to add p2p resource"); 373 | goto out_disable_device; 374 | } 375 | 376 | pci_p2pmem_publish(pdev, true); 377 | 378 | p = p2pmem_create(pdev); 379 | if (IS_ERR(p)) 380 | goto out_disable_device; 381 | 382 | pci_set_drvdata(pdev, p); 383 | 384 | return 0; 385 | 386 | out_disable_device: 387 | pci_disable_device(pdev); 388 | out: 389 | return err; 390 | } 391 | 392 | static void p2pmem_pci_remove(struct pci_dev *pdev) 393 | { 394 | struct p2pmem_dev *p = pci_get_drvdata(pdev); 395 | 396 | p2pmem_destroy(p); 397 | } 398 | 399 | static struct pci_driver p2pmem_pci_driver = { 400 | .name = "p2pmem_pci", 401 | .id_table = p2pmem_pci_id_table, 402 | .probe = p2pmem_pci_probe, 403 | .remove = p2pmem_pci_remove, 404 | }; 405 | 406 | static void ugly_mtramon_hack_init(void) 407 | { 408 | struct pci_dev *pdev = NULL; 409 | struct p2pmem_dev *p; 410 | int err; 411 | 412 | while ((pdev = pci_get_device(PCI_VENDOR_MICROSEMI, 413 | PCI_MTRAMON_DEV_ID, 414 | pdev))) { 415 | // If there's no driver it can be handled by the regular 416 | // pci driver case 417 | if (!pdev->driver) 418 | continue; 419 | 420 | // The NVME driver already handled it 421 | if (pdev->p2pdma) 422 | continue; 423 | 424 | if (!pdev->p2pdma) { 425 | err = pci_p2pdma_add_resource(pdev, MTRAMON_BAR, 0, 0); 426 | if (err) { 427 | dev_err(&pdev->dev, 428 | "unable to add p2p resource"); 429 | continue; 430 | } 431 | } 432 | 433 | p = p2pmem_create(pdev); 434 | if (!p) 435 | continue; 436 | 437 | p->created_by_hack = true; 438 | } 439 | } 440 | 441 | static void ugly_hack_to_create_p2pmem_devs_for_other_devices(void) 442 | { 443 | struct pci_dev *pdev = NULL; 444 | struct p2pmem_dev *p; 445 | 446 | while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) { 447 | if (!pdev->p2pdma) 448 | continue; 449 | 450 | p = p2pmem_create(pdev); 451 | if (!p) 452 | continue; 453 | 454 | p->created_by_hack = true; 455 | } 456 | } 457 | 458 | static void ugly_hack_deinit(void) 459 | { 460 | struct class_dev_iter iter; 461 | struct device *dev; 462 | struct p2pmem_dev *p; 463 | 464 | class_dev_iter_init(&iter, p2pmem_class, NULL, NULL); 465 | while ((dev = class_dev_iter_next(&iter))) { 466 | p = to_p2pmem(dev); 467 | if (p->created_by_hack) 468 | p2pmem_destroy(p); 469 | } 470 | class_dev_iter_exit(&iter); 471 | } 472 | 473 | static int __init p2pmem_pci_init(void) 474 | { 475 | int rc; 476 | 477 | p2pmem_class = class_create(THIS_MODULE, "p2pmem_device"); 478 | if (IS_ERR(p2pmem_class)) 479 | return PTR_ERR(p2pmem_class); 480 | 481 | rc = alloc_chrdev_region(&p2pmem_devt, 0, max_devices, "p2pmem"); 482 | if (rc) 483 | goto err_class; 484 | 485 | ugly_hack_to_create_p2pmem_devs_for_other_devices(); 486 | ugly_mtramon_hack_init(); 487 | 488 | rc = pci_register_driver(&p2pmem_pci_driver); 489 | if (rc) 490 | goto err_chdev; 491 | 492 | pr_info(KBUILD_MODNAME ": module loaded\n"); 493 | 494 | return 0; 495 | err_chdev: 496 | unregister_chrdev_region(p2pmem_devt, max_devices); 497 | err_class: 498 | class_destroy(p2pmem_class); 499 | return rc; 500 | } 501 | 502 | static void __exit p2pmem_pci_cleanup(void) 503 | { 504 | pci_unregister_driver(&p2pmem_pci_driver); 505 | ugly_hack_deinit(); 506 | unregister_chrdev_region(p2pmem_devt, max_devices); 507 | class_destroy(p2pmem_class); 508 | pr_info(KBUILD_MODNAME ": module unloaded\n"); 509 | } 510 | 511 | late_initcall(p2pmem_pci_init); 512 | module_exit(p2pmem_pci_cleanup); 513 | --------------------------------------------------------------------------------