├── Makefile
├── README.md
├── build_module.sh
├── build_release.sh
├── compat_nv-p2p.h
├── create_nv.symvers.sh
├── debian
    ├── .gitignore
    ├── changelog
    ├── compat
    ├── control
    ├── nvidia-peer-memory-dkms.dkms
    ├── nvidia-peer-memory-dkms.install
    ├── nvidia-peer-memory-dkms.postinst
    ├── nvidia-peer-memory.install
    ├── nvidia-peer-memory.postinst
    ├── nvidia-peer-memory.prerm
    ├── patches
    │   ├── dkms_name.patch
    │   └── series
    ├── rules
    ├── source
    │   └── format
    └── updateInit.sh
├── dkms.conf
├── nv_peer_mem
├── nv_peer_mem.c
├── nv_peer_mem.conf
├── nv_peer_mem.upstart
└── nvidia_peer_memory.spec


/Makefile:
--------------------------------------------------------------------------------
  1 | obj-m += nv_peer_mem.o
  2 | 
  3 | PHONY += all clean install uninstall gen_nv_symvers
  4 | .PHONY: $(PHONY)
  5 | 
  6 | KVER := $(shell uname -r)
  7 | OFA_ARCH := $(shell uname -m)
  8 | OFA_DIR ?= /usr/src/ofa_kernel
  9 | OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KVER) $(OFA_DIR)/$(KVER) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
 10 | OFA_KERNEL ?= $(shell for d in $(OFA_CANDIDATES); do if [ -d "$$d" ]; then echo "$$d"; exit 0; fi; done; echo $(OFA_DIR))
 11 | 
 12 | ifneq ($(shell test -d $(OFA_KERNEL) && echo "true" || echo "" ),)
 13 | $(info INFO: Building with MLNX_OFED from: $(OFA_KERNEL))
 14 | ccflags-y += -I$(OFA_KERNEL)/include/ -I$(OFA_KERNEL)/include/rdma
 15 | else
 16 | $(info INFO: Building with Inbox InfiniBand Stack)
 17 | $(warning "WARNING: Compilation might fail against Inbox InfiniBand Stack as it might lack needed support; in such cases you need to install MLNX_OFED package first.")
 18 | endif
 19 | 
 20 | PWD  := $(shell pwd)
 21 | MODULES_DIR := /lib/modules/$(KVER)
 22 | KDIR := $(MODULES_DIR)/build
 23 | MODULE_DESTDIR := $(MODULES_DIR)/extra/
 24 | DEPMOD := /sbin/depmod
 25 | 
 26 | MOD_NAME := nv_peer_mem
 27 | MOD_VERSION = $(shell awk '/^Version:/ {print $$2}' nvidia_peer_memory.spec)
 28 | DKMS_SRC_DIR = /usr/src/$(MOD_NAME)-$(MOD_VERSION)
 29 | SOURCE_FILES := Makefile compat_nv-p2p.h nv_peer_mem.c \
 30 |   create_nv.symvers.sh dkms.conf
 31 | 
 32 | # GCC earlier than 4.6.0 will build modules which require 'mcount',
 33 | # and this symbol will not be available in the kernel if the kernel was
 34 | # compiled with GCC 4.6.0 and above.
 35 | # therefore, to prevent unknown symbol issues we disable function tracing.
 36 | #
 37 | CC  = $(CROSS_COMPILE)gcc
 38 | CPP = $(CC) -E
 39 | 
 40 | CPP_MAJOR := $(shell $(CPP) -dumpversion 2>&1 | cut -d'.' -f1)
 41 | CPP_MINOR := $(shell $(CPP) -dumpversion 2>&1 | cut -d'.' -f2)
 42 | CPP_PATCH := $(shell $(CPP) -dumpversion 2>&1 | cut -d'.' -f3)
 43 | # Assumes that major, minor, and patch cannot exceed 999
 44 | CPP_VERS  := $(shell expr 0$(CPP_MAJOR) \* 1000000 + 0$(CPP_MINOR) \* 1000 + 0$(CPP_PATCH))
 45 | compile_h=$(shell /bin/ls -1 $(KDIR)/include/*/compile.h 2> /dev/null | head -1)
 46 | ifneq ($(compile_h),)
 47 | KERNEL_GCC_MAJOR := $(shell grep LINUX_COMPILER $(compile_h) | sed -r -e 's/.*gcc \S+ ([0-9\.\-]*) .*/\1/g' | cut -d'.' -f1)
 48 | KERNEL_GCC_MINOR := $(shell grep LINUX_COMPILER $(compile_h) | sed -r -e 's/.*gcc \S+ ([0-9\.\-]*) .*/\1/g' | cut -d'.' -f2)
 49 | KERNEL_GCC_PATCH := $(shell grep LINUX_COMPILER $(compile_h) | sed -r -e 's/.*gcc \S+ ([0-9\.\-]*) .*/\1/g' | cut -d'.' -f3)
 50 | KERNEL_GCC_VER  := $(shell expr 0$(KERNEL_GCC_MAJOR) \* 1000000 + 0$(KERNEL_GCC_MINOR) \* 1000 + 0$(KERNEL_GCC_PATCH))
 51 | ifneq ($(shell if [ $(CPP_VERS) -lt 4006000 ] && [ $(KERNEL_GCC_VER) -ge 4006000 ]; then \
 52 |                              echo "YES"; else echo ""; fi),)
 53 | $(info Warning: The kernel was compiled with GCC newer than 4.6.0, while the current GCC is older than 4.6.0, Disabling function tracing to prevent unknown symbol issues...)
 54 | override MAKE_PARAMS += CONFIG_FUNCTION_TRACER= CONFIG_HAVE_FENTRY=
 55 | endif
 56 | endif
 57 | 
 58 | #
 59 | # Get nv-p2p.h header file of the currently installed CUDA version.
 60 | # Try to get it based on available nvidia module version (just in case there are sources for couple of versions)
 61 | nv_version=$(shell /sbin/modinfo -F version -k $(KVER) nvidia 2>/dev/null)
 62 | nv_sources=$(shell /bin/ls -d /usr/src/nvidia-$(nv_version)/ 2>/dev/null)
 63 | ifneq ($(shell test -d "$(nv_sources)" && echo "true" || echo "" ),)
 64 | NV_P2P_H=$(shell /bin/ls -1 $(nv_sources)/nvidia/nv-p2p.h 2>/dev/null | tail -1)
 65 | else
 66 | NV_P2P_H=$(shell /bin/ls -1 /usr/src/nvidia-*/nvidia/nv-p2p.h 2>/dev/null | tail -1)
 67 | endif
 68 | 
 69 | all: gen_nv_symvers
 70 | ifneq ($(shell test -e "$(NV_P2P_H)" && echo "true" || echo "" ),)
 71 | 	$(info Found $(NV_P2P_H))
 72 | 	/bin/cp -f $(NV_P2P_H) $(PWD)/nv-p2p.h
 73 | else
 74 | 	$(info Warning: nv-p2p.h was not found on the system, going to use compat_nv-p2p.h)
 75 | 	/bin/cp -f $(PWD)/compat_nv-p2p.h $(PWD)/nv-p2p.h
 76 | endif
 77 | 	echo -n "" > my.symvers
 78 | ifneq ($(shell test -d $(OFA_KERNEL) && echo "true" || echo "" ),)
 79 | 	# get OFED symbols when building with MLNX_OFED
 80 | 	/bin/cp -f $(OFA_KERNEL)/Module.symvers my.symvers
 81 | endif
 82 | 	cat nv.symvers >> my.symvers
 83 | 	make -C $(KDIR) $(MAKE_PARAMS) M=$(PWD) KBUILD_EXTRA_SYMBOLS="$(PWD)/my.symvers" modules
 84 | 
 85 | clean:
 86 | 	make -C $(KDIR)  M=$(PWD) clean
 87 | 	/bin/rm -f nv.symvers my.symvers nv-p2p.h
 88 | 
 89 | install:
 90 | 	mkdir -p $(DESTDIR)/$(MODULE_DESTDIR);
 91 | 	/bin/cp -f $(PWD)/nv_peer_mem.ko $(DESTDIR)/$(MODULE_DESTDIR);
 92 | 	if [ ! -n "$(DESTDIR)" ]; then $(DEPMOD) -r -ae $(KVER);fi;
 93 | 
 94 | install-dkms: $(SOURCE_FILES)
 95 | 	install -d $(DESTDIR)$(DKMS_SRC_DIR)
 96 | 	cp -a $^ $(DESTDIR)$(DKMS_SRC_DIR)
 97 | 
 98 | install-utils:
 99 | 	install -d $(DESTDIR)/etc/infiniband
100 | 	install -d $(DESTDIR)/etc/init.d
101 | 	install -d $(DESTDIR)/etc/init
102 | 	install -m 0644 nv_peer_mem.conf	$(DESTDIR)/etc/infiniband/nv_peer_mem.conf
103 | 	install -m 0755 nv_peer_mem		$(DESTDIR)/etc/init.d/nv_peer_mem
104 | 	install -m 0644 nv_peer_mem.upstart	$(DESTDIR)/etc/init/nv_peer_mem.conf
105 | 
106 | uninstall:
107 | 	/bin/rm -f $(DESTDIR)/$(MODULE_DESTDIR)/nv_peer_mem.ko
108 | 	if [ ! -n "$(DESTDIR)" ]; then $(DEPMOD) -r -ae $(KVER);fi;
109 | 
110 | gen_nv_symvers:
111 | 	$(PWD)/create_nv.symvers.sh $(KVER)
112 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPUDirect RDMA
 2 | 
 3 | The latest advancement in GPU-GPU communications is GPUDirect RDMA. This new technology provides a direct P2P (Peer-to-Peer) data path between the GPU Memory directly to/from the NVIDIA HCA/NIC devices. This provides a significant decrease in GPU-GPU communication latency and completely offloads the CPU, removing it from all GPU-GPU communications across the network.
 4 | 
 5 | [Mellanox Product Family](http://www.mellanox.com/page/products_dyn?product_family=116)
 6 | 
 7 | General
 8 | -----------
 9 | MLNX_OFED 2.1 introduces an API between IB CORE to peer memory clients, such as NVIDIA Kepler class GPU's, (e.g. GPU cards), also known as GPUDirect RDMA.  It provides access for the HCA to read/write peer memory data buffers, as a result it allows RDMA-based applications to use the peer device computing power with the RDMA interconnect without the need for copying data to host memory.
10 | 
11 | This capability is supported with Mellanox ConnectX-3 VPI or Connect-IB InfiniBand adapters.  It will also seamlessly work using RoCE technology with the Mellanox ConnectX-3 VPI adapters.
12 | 
13 | This README describes the required steps to completing the installation for the NVIDIA peer memory client with Mellanox OFED.
14 | 
15 | A kernel module with comparable functionalities has been integrated into the GPU driver, starting from the release R470, under the name nvidia-peermem.
16 | 
17 | Installation
18 | -------------
19 | Starting from version 1.2, nv_peer_mem requires a MLNX_OFED containing a fix for “Peer-direct patch may cause deadlock due to lock inversion" (tracked by the Internal Ref. #2696789). 
20 | 
21 | nv_peer_mem version 1.1 is the last one to support MLNX_OFED LTS 4.9.
22 | 
23 | Pre-requisites:
24 | 1) NVIDIA compatible driver is installed and up.
25 | 2) MLNX_OFED 5.1 or newer (with the fix of bug #2696789) is installed and up.
26 | 
27 | Failure to have the proper configuration (as described above) will result in build failure.
28 | 
29 | For the required NVIDIA driver and other relevant details in that area
30 | please check with NVIDIA support.
31 | 
32 | To build source packages (src.rpm for RPM based OS and tarball for DEB based OS), use the build_module.sh script.
33 | 
34 | 
35 | For example, to build on RPM based OS:
36 | 
37 |     $ ./build_module.sh
38 |     Building source rpm for nvidia_peer_memory...
39 |     
40 |     Built: /tmp/nvidia_peer_memory-1.3-0.src.rpm
41 |     
42 |     To install run on RPM based OS:
43 |     # rpmbuild --rebuild /tmp/nvidia_peer_memory-1.3-0.src.rpm
44 |     # rpm -ivh <path to generated binary rpm file>
45 | 
46 | To build on DEB based OS:
47 | 
48 |     Building debian tarball for nvidia-peer-memory...
49 |     
50 |     Built: /tmp/nvidia-peer-memory_1.3.orig.tar.gz
51 | 
52 |     To install on DEB based OS:
53 |     # cd /tmp
54 |     # tar xzf /tmp/nvidia-peer-memory_1.3.orig.tar.gz
55 |     # cd nvidia-peer-memory-1.3
56 |     # dpkg-buildpackage -us -uc
57 |     # dpkg -i <path to generated deb files>            
58 | 
59 | To install run (excluding Ubuntu):
60 | 
61 |     rpmbuild --rebuild <path to srpm>.
62 |     rpm -ivh <path to generated binary rpm file.> [On SLES add --nodeps].
63 | 
64 | To install on Ubuntu run:
65 | 
66 |     dpkg-buildpackage -us -uc
67 |     dpkg -i <path to generated deb files.>
68 | 
69 |     (e.g. dpkg -i nvidia-peer-memory_1.3-0_all.deb
70 |           dpkg -i nvidia-peer-memory-dkms_1.3-0_all.deb)
71 | 
72 | After successful installation:
73 | 1)	nv_peer_mem.ko is installed
74 | 2)	service file /etc/init.d/nv_peer_mem to be used for start/stop/status
75 | 	for that kernel module was added.
76 | 3)	/etc/infiniband/nv_peer_mem.conf to control whether kernel module will be loaded on boot
77 | 	(default is YES) was added.
78 | 
79 | Notes
80 | ------
81 | 
82 | To achieve good performance both the NIC and the GPU must physically sit on same i/o root complex,
83 | use lspci -tv to make sure that this is the case.
84 | 


--------------------------------------------------------------------------------
/build_module.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # vim: ts=4 sw=4 expandtab
 3 | #
 4 | # Author: Feras Daoud <ferasda@mellanox.com>
 5 | #
 6 | 
 7 | ex()
 8 | {
 9 |     if ! eval "$@"; then
10 |         echo "Failed to execute: $@" >&2
11 |         exit 1
12 |     fi
13 | }
14 | 
15 | tmpdir=`mktemp -d /tmp/nv.XXXXXX`
16 | if [ ! -d "$tmpdir" ]; then
17 |     echo "Failed to create a temp directory!" >&2
18 |     exit 1
19 | fi
20 | 
21 | dirname=`basename "$PWD"`
22 | VERSION=`grep Version: *.spec | cut -d : -f 2 | sed -e 's@\s@@g'`
23 | RELEASE=`grep "define _release" *.spec | cut -d" " -f"4"| sed -r -e 's/}//'`
24 | if [ "X$VERSION" == "X" ] || [ "X$RELEASE" == "X" ]; then
25 |     echo "Failed to get version numbers!" >&2
26 |     exit 1
27 | fi
28 | 
29 | ex cp -r . $tmpdir/nvidia_peer_memory-$VERSION
30 | pushd $tmpdir > /dev/null
31 | ex tar czf nvidia_peer_memory-$VERSION.tar.gz  --exclude='.*' --exclude=build_release.sh nvidia_peer_memory-$VERSION
32 | popd > /dev/null
33 | 
34 | if [ -f /etc/debian_version ]; then
35 |     echo
36 |     echo "Building debian tarball for nvidia-peer-memory..."
37 |     ex mv $tmpdir/nvidia_peer_memory-$VERSION $tmpdir/nvidia-peer-memory-$VERSION
38 |     pushd $tmpdir > /dev/null
39 |     ex tar czf nvidia-peer-memory_$VERSION.orig.tar.gz --exclude='.*' --exclude=build_release.sh nvidia-peer-memory-$VERSION
40 |     ex mv nvidia-peer-memory_$VERSION.orig.tar.gz /tmp
41 |     popd > /dev/null
42 | 
43 |     echo ""
44 |     echo Built: /tmp/nvidia-peer-memory_$VERSION.orig.tar.gz
45 |     echo ""
46 |     echo "To install on DEB based OS:"
47 |     echo "    # cd /tmp"
48 |     echo "    # tar xzf /tmp/nvidia-peer-memory_$VERSION.orig.tar.gz"
49 |     echo "    # cd nvidia-peer-memory-$VERSION"
50 |     echo "    # dpkg-buildpackage -us -uc"
51 |     echo "    # dpkg -i <path to generated deb files>"
52 |     echo ""
53 | else
54 |     echo
55 |     echo "Building source rpm for nvidia_peer_memory..."
56 |     mkdir -p $tmpdir/topdir/{SRPMS,RPMS,SPECS,BUILD}
57 |     ex "rpmbuild -ts --nodeps --define '_topdir $tmpdir/topdir' --define 'dist %{nil}' --define '_source_filedigest_algorithm md5' --define '_binary_filedigest_algorithm md5' $tmpdir/nvidia_peer_memory-$VERSION.tar.gz >/dev/null"
58 |     srpm=`ls -1 $tmpdir/topdir/SRPMS/`
59 |     mv $tmpdir/topdir/SRPMS/$srpm /tmp
60 | 
61 |     echo ""
62 |     echo Built: /tmp/$srpm
63 |     echo ""
64 |     echo "To install run on RPM based OS:"
65 |     echo "    # rpmbuild --rebuild /tmp/$srpm"
66 |     echo "    # rpm -ivh <path to generated binary rpm file>" 
67 |     echo ""
68 | fi
69 | 
70 | /bin/rm -rf $tmpdir
71 | 
72 | 


--------------------------------------------------------------------------------
/build_release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # vim: ts=4 sw=4 expandtab
 3 | #
 4 | # Author: Alaa Hleihel <alaa@mellanox.com>
 5 | #
 6 | 
 7 | GIT_URL=${GIT_URL:-"https://github.com/Mellanox/nv_peer_memory.git"}
 8 | 
 9 | GIT_BRANCH=${GIT_BRANCH:-"master"}
10 | ex()
11 | {
12 |     if ! eval "$@"; then
13 |         echo "Failed to execute: $@" >&2
14 |         exit 1
15 |     fi
16 | }
17 | 
18 | tmpdir=`mktemp -d /tmp/nv.XXXXXX`
19 | if [ ! -d "$tmpdir" ]; then
20 |     echo "Failed to create a temp directory!" >&2
21 |     exit 1
22 | fi
23 | echo "Working in $tmpdir ..."
24 | cd $tmpdir
25 | 
26 | #clone
27 | echo "Cloning from $GIT_URL ..."
28 | ex git clone $GIT_URL >/dev/null
29 | dirname=`ls -1`
30 | cd $dirname
31 | echo "Checking out branch: $GIT_BRANCH ..."
32 | ex git checkout $GIT_BRANCH >/dev/null
33 | VERSION=`grep Version: *.spec | cut -d : -f 2 | sed -e 's@\s@@g'`
34 | RELEASE=`grep "define _release" *.spec | cut -d" " -f"4"| sed -r -e 's/}//'`
35 | if [ "X$VERSION" == "X" ] || [ "X$RELEASE" == "X" ]; then
36 |     echo "Failed to get version numbers!" >&2
37 |     exit 1
38 | fi
39 | 
40 | cd $tmpdir
41 | ex mv $dirname nvidia_peer_memory-$VERSION
42 | ex tar czf nvidia_peer_memory-$VERSION.tar.gz nvidia_peer_memory-$VERSION --exclude=.* --exclude=build_release.sh
43 | 
44 | echo
45 | echo "Building source rpm for nvidia_peer_memory..."
46 | mkdir -p $tmpdir/topdir/{SRPMS,RPMS,SPECS,BUILD}
47 | ex "rpmbuild -ts --nodeps --define '_topdir $tmpdir/topdir' --define 'dist %{nil}' --define '_source_filedigest_algorithm md5' --define '_binary_filedigest_algorithm md5' nvidia_peer_memory-$VERSION.tar.gz >/dev/null"
48 | srpm=`ls -1 $tmpdir/topdir/SRPMS/`
49 | mv $tmpdir/topdir/SRPMS/$srpm /tmp
50 | 
51 | echo "Building debian tarball for nvidia-peer-memory..."
52 | ex mv nvidia_peer_memory-$VERSION nvidia-peer-memory-$VERSION
53 | # update version in changelog
54 | sed -i -r "0,/^(.*) \(([a-zA-Z0-9.-]+)\) (.*)/s//\1 \(${VERSION}-${RELEASE}\) \3/" nvidia-peer-memory-${VERSION}/debian/changelog
55 | ex tar czf nvidia-peer-memory_$VERSION.orig.tar.gz nvidia-peer-memory-$VERSION --exclude=.* --exclude=build_release.sh
56 | ex mv nvidia-peer-memory_$VERSION.orig.tar.gz /tmp
57 | 
58 | /bin/rm -rf $tmpdir
59 | 
60 | echo ""
61 | echo Built: /tmp/$srpm
62 | echo Built: /tmp/nvidia-peer-memory_$VERSION.orig.tar.gz
63 | echo ""
64 | echo "To install run on RPM based OS:"
65 | echo "    # rpmbuild --rebuild /tmp/$srpm"
66 | echo "    # rpm -ivh <path to generated binary rpm file>" 
67 | echo ""
68 | echo "To install on DEB based OS:"
69 | echo "    # cd /tmp"
70 | echo "    # tar xzf /tmp/nvidia-peer-memory_$VERSION.orig.tar.gz"
71 | echo "    # cd nvidia-peer-memory-$VERSION"
72 | echo "    # dpkg-buildpackage -us -uc"
73 | echo "    # dpkg -i <path to generated deb files>"
74 | echo ""
75 | 


--------------------------------------------------------------------------------
/compat_nv-p2p.h:
--------------------------------------------------------------------------------
  1 | /* _NVRM_COPYRIGHT_BEGIN_
  2 |  *
  3 |  * Copyright 2011 by NVIDIA Corporation.  All rights reserved.  All
  4 |  * information contained herein is proprietary and confidential to NVIDIA
  5 |  * Corporation.  Any use, reproduction, or disclosure without the written
  6 |  * permission of NVIDIA Corporation is prohibited.
  7 |  *
  8 |  * _NVRM_COPYRIGHT_END_
  9 |  */
 10 | 
 11 | #ifndef _NV_P2P_H_
 12 | #define _NV_P2P_H_
 13 | 
 14 | enum {
 15 |     NVIDIA_P2P_ARCHITECTURE_TESLA = 0,
 16 |     NVIDIA_P2P_ARCHITECTURE_FERMI,
 17 |     NVIDIA_P2P_ARCHITECTURE_CURRENT = NVIDIA_P2P_ARCHITECTURE_FERMI
 18 | };
 19 | 
 20 | #define NVIDIA_P2P_PARAMS_VERSION   0x00010001
 21 | 
 22 | enum {
 23 |     NVIDIA_P2P_PARAMS_ADDRESS_INDEX_GPU = 0,
 24 |     NVIDIA_P2P_PARAMS_ADDRESS_INDEX_THIRD_PARTY_DEVICE,
 25 |     NVIDIA_P2P_PARAMS_ADDRESS_INDEX_MAX = \
 26 |         NVIDIA_P2P_PARAMS_ADDRESS_INDEX_THIRD_PARTY_DEVICE
 27 | };
 28 | 
 29 | typedef
 30 | struct nvidia_p2p_params {
 31 |     uint32_t version;
 32 |     uint32_t architecture;
 33 |     union nvidia_p2p_mailbox_addresses {
 34 |         struct {
 35 |             uint64_t wmb_addr;
 36 |             uint64_t wmb_data;
 37 |             uint64_t rreq_addr;
 38 |             uint64_t rcomp_addr;
 39 |             uint64_t reserved[2];
 40 |         } fermi;
 41 |     } addresses[NVIDIA_P2P_PARAMS_ADDRESS_INDEX_MAX+1];
 42 | } nvidia_p2p_params_t;
 43 | 
 44 | /*
 45 |  * @brief
 46 |  *   Initializes a third-party P2P mapping between an NVIDIA
 47 |  *   GPU and a third-party device.
 48 |  *
 49 |  * @param[in]     p2p_token
 50 |  *   A token that uniquely identifies the P2P mapping.
 51 |  * @param[in,out] params
 52 |  *   A pointer to a structure with P2P mapping parameters.
 53 |  * @param[in]     destroy_callback
 54 |  *   A pointer to the function to be invoked when the P2P mapping
 55 |  *   is destroyed implictly.
 56 |  * @param[in]     data
 57 |  *   An opaque pointer to private data to be passed to the
 58 |  *   callback function.
 59 |  *
 60 |  * @return
 61 |  *    0           upon successful completion.
 62 |  *   -EINVAL      if an invalid argument was supplied.
 63 |  *   -ENOTSUPP    if the requested configuration is not supported.
 64 |  *   -ENOMEM      if the driver failed to allocate memory.
 65 |  *   -EBUSY       if the mapping has already been initialized.
 66 |  *   -EIO         if an unknown error occurred.
 67 |  */
 68 | int nvidia_p2p_init_mapping(uint64_t p2p_token,
 69 |         struct nvidia_p2p_params *params,
 70 |         void (*destroy_callback)(void *data),
 71 |         void *data);
 72 | 
 73 | /*
 74 |  * @brief
 75 |  *   Tear down a previously initialized third-party P2P mapping.
 76 |  *
 77 |  * @param[in]     p2p_token
 78 |  *   A token that uniquely identifies the mapping.
 79 |  *
 80 |  * @return
 81 |  *    0           upon successful completion.
 82 |  *   -EINVAL      if an invalid argument was supplied.
 83 |  *   -ENOTSUPP    if the requested configuration is not supported.
 84 |  *   -ENOMEM      if the driver failed to allocate memory.
 85 |  */
 86 | int nvidia_p2p_destroy_mapping(uint64_t p2p_token);
 87 | 
 88 | enum {
 89 |     NVIDIA_P2P_PAGE_SIZE_4KB = 0,
 90 |     NVIDIA_P2P_PAGE_SIZE_64KB,
 91 |     NVIDIA_P2P_PAGE_SIZE_128KB
 92 | };
 93 | 
 94 | typedef
 95 | struct nvidia_p2p_page {
 96 |     uint64_t physical_address;
 97 |     union nvidia_p2p_request_registers {
 98 |         struct {
 99 |             uint32_t wreqmb_h;
100 |             uint32_t rreqmb_h;
101 |             uint32_t rreqmb_0;
102 |             uint32_t reserved[3];
103 |         } fermi;
104 |     } registers;
105 | } nvidia_p2p_page_t;
106 | 
107 | #define NVIDIA_P2P_PAGE_TABLE_VERSION   0x00010001
108 | 
109 | typedef
110 | struct nvidia_p2p_page_table {
111 |     uint32_t version;
112 |     uint32_t page_size;
113 |     struct nvidia_p2p_page **pages;
114 |     uint32_t entries;
115 | } nvidia_p2p_page_table_t;
116 | 
117 | /*
118 |  * @brief
119 |  *   Make the pages underlying a range of GPU virtual memory
120 |  *   accessible to a third-party device.
121 |  *
122 |  * @param[in]     p2p_token
123 |  *   A token that uniquely identifies the P2P mapping.
124 |  * @param[in]     va_space
125 |  *   A GPU virtual address space qualifier.
126 |  * @param[in]     virtual_address
127 |  *   The start address in the specified virtual address space.
128 |  * @param[in]     length
129 |  *   The length of the requested P2P mapping.
130 |  * @param[out]    page_table
131 |  *   A pointer to an array of structures with P2P PTEs.
132 |  * @param[in]     free_callback
133 |  *   A pointer to the function to be invoked when the pages
134 |  *   underlying the virtual address range are freed
135 |  *   implicitly.
136 |  * @param[in]     data
137 |  *   An opaque pointer to private data to be passed to the
138 |  *   callback function.
139 |  *
140 |  * @return
141 |  *    0           upon successful completion.
142 |  *   -EINVAL      if an invalid argument was supplied.
143 |  *   -ENOTSUPP    if the requested operation is not supported.
144 |  *   -ENOMEM      if the driver failed to allocate memory or if
145 |  *     insufficient resources were available to complete the operation.
146 |  *   -EIO         if an unknown error occurred.
147 |  */
148 | int nvidia_p2p_get_pages(uint64_t p2p_token, uint32_t va_space,
149 |         uint64_t virtual_address,
150 |         uint64_t length,
151 |         struct nvidia_p2p_page_table **page_table,
152 |         void (*free_callback)(void *data),
153 |         void *data);
154 | 
155 | /*
156 |  * @brief
157 |  *   Release a set of pages previously made accessible to
158 |  *   a third-party device.
159 |  *
160 |  * @param[in]     p2p_token
161 |  *   A token that uniquely identifies the P2P mapping.
162 |  * @param[in]     va_space
163 |  *   A GPU virtual address space qualifier.
164 |  * @param[in]     virtual_address
165 |  *   The start address in the specified virtual address space.
166 |  * @param[in]     page_table
167 |  *   A pointer to the array of structures with P2P PTEs.
168 |  *
169 |  * @return
170 |  *    0           upon successful completion.
171 |  *   -EINVAL      if an invalid argument was supplied.
172 |  *   -EIO         if an unknown error occurred.
173 |  */
174 | int nvidia_p2p_put_pages(uint64_t p2p_token, uint32_t va_space,
175 |         uint64_t virtual_address,
176 |         struct nvidia_p2p_page_table *page_table);
177 | 
178 | /*
179 |  * @brief
180 |  *   Free a third-party P2P page table.
181 |  *
182 |  * @param[in]     page_table
183 |  *   A pointer to the array of structures with P2P PTEs.
184 |  *
185 |  * @return
186 |  *    0           upon successful completion.
187 |  *   -EINVAL      if an invalid argument was supplied.
188 |  */
189 | int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table);
190 | 
191 | #endif /* _NV_P2P_H_ */
192 | 


--------------------------------------------------------------------------------
/create_nv.symvers.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # Copyright (c) 2016 Mellanox Technologies. All rights reserved.
  4 | #
  5 | # This Software is licensed under one of the following licenses:
  6 | #
  7 | # 1) under the terms of the "Common Public License 1.0" a copy of which is
  8 | #    available from the Open Source Initiative, see
  9 | #    http://www.opensource.org/licenses/cpl.php.
 10 | #
 11 | # 2) under the terms of the "The BSD License" a copy of which is
 12 | #    available from the Open Source Initiative, see
 13 | #    http://www.opensource.org/licenses/bsd-license.php.
 14 | #
 15 | # 3) under the terms of the "GNU General Public License (GPL) Version 2" a
 16 | #    copy of which is available from the Open Source Initiative, see
 17 | #    http://www.opensource.org/licenses/gpl-license.php.
 18 | #
 19 | # Licensee has the right to choose one of the above licenses.
 20 | #
 21 | # Redistributions of source code must retain the above copyright
 22 | # notice and one of the license notices.
 23 | #
 24 | # Redistributions in binary form must reproduce both the above copyright
 25 | # notice, one of the license notices in the documentation
 26 | # and/or other materials provided with the distribution.
 27 | #
 28 | SCRIPTPATH=$(cd `dirname "${BASH_SOURCE[0]}"` && pwd)
 29 | 
 30 | MOD_SYMVERS=${SCRIPTPATH}/nv.symvers
 31 | KVER=${1:-$(uname -r)}
 32 | 
 33 | # Create empty symvers file
 34 | echo -n "" > $MOD_SYMVERS
 35 | 
 36 | try_compile_nvidia_sources()
 37 | {
 38 | 	local mod=$1; shift
 39 | 
 40 | 	nv_version=$(/sbin/modinfo -F version -k "$KVER" $mod 2>/dev/null)
 41 | 	nv_sources=$(/bin/ls -d /usr/src/nvidia-${nv_version}/ 2>/dev/null)
 42 | 	if [ "X${nv_sources}" == "X" ]; then
 43 | 		nv_sources=$(/bin/ls -1d /usr/src/nvidia-* 2>/dev/null | tail -1)
 44 | 	fi
 45 | 	if [ "X${nv_sources}" == "X" ]; then
 46 | 		return
 47 | 	fi
 48 | 
 49 | 	echo
 50 | 	echo "Attempting to compile nvidia from $nv_sources sources to build Module.symvers..."
 51 | 	echo
 52 | 	local tmpdir=`mktemp -d /tmp/nv.XXXXXX`
 53 | 	if [ ! -d "$tmpdir" ]; then
 54 | 		echo "-E- Failed to create a temp directory!" >&2
 55 | 		exit 1
 56 | 	fi
 57 | 	/bin/cp -a $nv_sources $tmpdir
 58 | 	cd $tmpdir/*
 59 | 	make -j8 NV_EXCLUDE_BUILD_MODULES='' KERNEL_UNAME=$KVER clean
 60 | 	if [ $? -ne 0 ]; then
 61 | 		return
 62 | 	fi
 63 | 	make -j8 NV_EXCLUDE_BUILD_MODULES='' KERNEL_UNAME=$KVER modules
 64 | 	if [ $? -ne 0 ]; then
 65 | 		return
 66 | 	fi
 67 | 	grep "nvidia_p2p_" Module*.symvers > ${MOD_SYMVERS}
 68 | 	echo "Created: ${MOD_SYMVERS}"
 69 | 	cd -
 70 | 	/bin/rm -rf $tmpdir
 71 | }
 72 | 
 73 | nvidia_mod=
 74 | crc_found=0
 75 | crc_mod_str="__crc_nvidia_p2p_"
 76 | modules_pat="$crc_mod_str|T nvidia_p2p_"
 77 | for mod in nvidia $(find /lib/modules/$KVER -name "nvidia*.ko*" 2>/dev/null)
 78 | do
 79 | 	nvidia_mod=$(/sbin/modinfo -F filename -k "$KVER" $mod 2>/dev/null)
 80 | 	if [ ! -e "$nvidia_mod" ]; then
 81 | 		continue
 82 | 	fi
 83 | 
 84 | 	# WA for nm: nvidia.ko.xz: File format not recognized
 85 | 	case "$nvidia_mod" in
 86 | 		*ko.xz)
 87 | 			/bin/cp -fv $nvidia_mod .
 88 | 			nvidia_mod=$(basename $nvidia_mod | sed -e "s/.xz//g")
 89 | 			xz -d ${nvidia_mod}.xz
 90 | 			;;
 91 | 	esac
 92 | 
 93 | 	if ! (nm -o $nvidia_mod | grep -q -E "$modules_pat"); then
 94 | 		continue
 95 | 	fi
 96 | 
 97 | 	# On some PPC kernels we might have relative CRCs, so we can't build symvers based on nm output.
 98 | 	# In that case try to recompile the nvidia driver from source code and get the needed
 99 | 	# nvidia_p2p_* symbols from the generated Module.symvers file.
100 | 	# If we fail to generate Module.symvers, then just build the nv_peer_mem without
101 | 	# specifying the nvidia_p2p_ symbol versions.
102 | 	if (nm -o $nvidia_mod | grep "$crc_mod_str" | grep -qe "\sR\s*__crc"); then
103 | 		echo "-W- Module $nvidia_mod contains relative CRCs, cannot get symbols from it!" >&2
104 | 		try_compile_nvidia_sources $nvidia_mod
105 | 		break
106 | 	fi
107 | 
108 | 	echo "Getting symbol versions from $nvidia_mod ..."
109 | 	while read -r line
110 | 	do
111 | 		if echo "$line" | grep -q "$crc_mod_str"; then
112 | 			crc_found=1
113 | 		else
114 | 			if [ "$crc_found" != 0 ]; then
115 | 				continue
116 | 			fi
117 | 		fi
118 | 		file=$(echo $line | cut -f1 -d: | sed -r -e 's@\./@@' -e 's@.ko(\S)*@@' -e "s@$PWD/@@")
119 | 		crc=$(echo $line | cut -f2 -d: | cut -f1 -d" ")
120 | 		sym=$(echo $line | cut -f2 -d: | cut -f3 -d" " | sed -e 's/__crc_//g')
121 | 		echo -e "0x$crc\t$sym\t$file\tEXPORT_SYMBOL\t" >> $MOD_SYMVERS
122 | 	done < <(nm -o $nvidia_mod | grep -E "$modules_pat")
123 | 
124 | 	echo "Created: ${MOD_SYMVERS}"
125 | 	exit 0
126 | done
127 | 
128 | if [ ! -e "$nvidia_mod" ]; then
129 | 	echo "-E- Cannot locate nvidia modules!" >&2
130 | 	echo "CUDA driver must be installed before installing this package!" >&2
131 | 	exit 1
132 | fi
133 | 
134 | if [ ! -s "$MOD_SYMVERS" ]; then
135 | 	echo "-W- Could not get list of nvidia symbols." >&2
136 | fi
137 | 


--------------------------------------------------------------------------------
/debian/.gitignore:
--------------------------------------------------------------------------------
1 | *.substvars
2 | *.debhelper
3 | *.debhelper.log
4 | .debhelper/
5 | files
6 | nvidia-peer-memory-dkms/
7 | nvidia-peer-memory/
8 | tmp
9 | 


--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
 1 | nvidia-peer-memory (1.2-0) unstable; urgency=low
 2 | 
 3 |   *  nv_peer_mem.c: avoid deadlocks by registering as new style client
 4 | 
 5 |  -- Feras Daoud <ferasda@nvidia.com>  Mon, 27 Jul 2020 16:01:20 +0200
 6 | 
 7 | nvidia-peer-memory (1.1-0) unstable; urgency=low
 8 | 
 9 |   *  Makefile: Support compiling without MLNX_OFED
10 |   *  Makefile: Imporve parsing /proc/version
11 |   *  Add EXPORT_SYMBOL to symver
12 | 
13 |  -- Feras Daoud <ferasda@nvidia.com>  Mon, 27 Jul 2020 16:01:20 +0200
14 | 
15 | nvidia-peer-memory (1.0-9) unstable; urgency=low
16 | 
17 |   *  create_nv.symvers.sh: avoid non-crc syms on old
18 |   *  build_module.sh: Reorder parameters of tar
19 |   *  deb: simpler dkms script and more
20 |   *  create_syms_fix.sh: also look for nvidia_p2p_* symbols
21 | 
22 |  -- Feras Daoud <ferasda@mellanox.com>  Tue, 05 May 2020 19:43:30 +0200
23 | 
24 | nvidia-peer-memory (1.0-8) unstable; urgency=low
25 | 
26 |   *  create_nv.symvers.sh: Support compressed modules
27 | 
28 |  -- Alaa Hleihel <alaa@mellanox.com>  Thu, 06 Sep 2018 15:45:30 +0200
29 | 
30 | nvidia-peer-memory (1.0-7) unstable; urgency=low
31 | 
32 |   *  Replace ACCESS_ONCE() by READ_ONCE() and WRITE_ONCE()
33 |   *  Revert "debian: Add dependency on cuda"
34 | 
35 |  -- Alaa Hleihel <alaa@mellanox.com>  Thu, 03 May 2018 15:45:30 +0200
36 | 
37 | nvidia-peer-memory (1.0-6) unstable; urgency=low
38 | 
39 |   *  debian: Add dependency on cuda
40 |   *  debian: Add dependency on mlnx-ofed-kernel
41 |   *  create_nv.symvers.sh: W/A for PPC kernels with relative CRCs
42 |   *  Makefile: Try to get nv-p2p.h based on available nvidia module version
43 |   *  Makefile: Fix getting ofa_kernel path
44 | 
45 |  -- Alaa Hleihel <alaa@mellanox.com>  Wed, 10 Jan 2018 15:45:30 +0200
46 | 
47 | nvidia-peer-memory (1.0-5) unstable; urgency=low
48 | 
49 |   *  Support kernels with CONFIG_MODVERSIONS unset
50 | 
51 |  -- Feras Daoud <ferasda@mellanox.com>  Sun, 20 Aug 2017 15:45:30 +0200
52 | 
53 | nvidia-peer-memory (1.0-4) unstable; urgency=low
54 | 
55 |   *  Stable NV_DMA_MAPPING
56 | 
57 |  -- Feras Daoud <ferasda@mellanox.com>  Wed, 15 May 2017 11:20:20 +0200
58 | 
59 | nvidia-peer-memory (1.0-3) unstable; urgency=low
60 | 
61 |   *  Bar address re-mapping for POWER
62 | 
63 |  -- Feras Daoud <ferasda@mellanox.com>  Wed, 04 Jan 2017 13:00:23 +0200
64 | 
65 | nvidia-peer-memory (1.0-2) unstable; urgency=low
66 | 
67 |   * Build infrastructure improvements
68 | 
69 |  -- Yishai Hadas <yishaih@mellanox.com>  Thu, 03 Nov 2016 15:00:53 +0200
70 | 
71 | vidia-peer-memory (1.0-1) unstable; urgency=low
72 | 
73 |   * Makefile: EXTRA_CFLAGS was deprecated [1] in newer kernels.
74 | 
75 |  -- Yishai Hadas <yishaih@mellanox.com>  Wed, 17 Aug 2016 15:00:53 +0200
76 | 
77 | nvidia-peer-memory (1.0-0) unstable; urgency=low
78 | 
79 |   * Initial release. Closes: #358673
80 | 
81 |  -- Alaa Hleihel <alaa@mellanox.com>  Thur, 12 Dec 2013 15:00:53 +0200
82 | 


--------------------------------------------------------------------------------
/debian/compat:
--------------------------------------------------------------------------------
1 | 9
2 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: nvidia-peer-memory
 2 | Section: kernel
 3 | Priority: optional
 4 | Maintainer: Yishai Hadas <yishaih@mellanox.com>
 5 | Build-Depends: debhelper (>= 9.0), autotools-dev, bzip2, dkms
 6 | Standards-Version: 1.0
 7 | Homepage: http://www.mellanox.com
 8 | 
 9 | Package: nvidia-peer-memory
10 | Architecture: all
11 | Depends: ${misc:Depends}
12 | Description: nvidia peer memory kernel module.
13 | 
14 | Package: nvidia-peer-memory-dkms
15 | Section: kernel
16 | Architecture: all
17 | Depends: dkms, make, mlnx-ofed-kernel-dkms | mlnx-ofed-kernel-modules, ${misc:Depends}
18 | Recommends: linux-headers-arm64 | linux-headers-powerpc | linux-headers-ppc64 | linux-headers-ppc64le | linux-headers-amd64 | linux-headers-generic | linux-headers
19 | Description: DKMS support for nvidia-peer-memory kernel modules
20 |  This package provides integration with the DKMS infrastructure for automatically building out of tree kernel modules.
21 |  .
22 |  This package contains the source to be built with dkms.
23 | 


--------------------------------------------------------------------------------
/debian/nvidia-peer-memory-dkms.dkms:
--------------------------------------------------------------------------------
1 | dkms.conf
2 | 


--------------------------------------------------------------------------------
/debian/nvidia-peer-memory-dkms.install:
--------------------------------------------------------------------------------
1 | usr/src/nv_peer_mem-*/*
2 | 


--------------------------------------------------------------------------------
/debian/nvidia-peer-memory-dkms.postinst:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | #DEBHELPER#
 6 | 
 7 | module="nv_peer_mem"
 8 | # Load the module if it was installed
 9 | if lsmod | grep -q -w "$module"; then
10 | 	rmmod "$module" 2>/dev/null || :
11 | fi
12 | if modinfo -n "$module" >/dev/null 2>&1; then
13 | 	modprobe nv_peer_mem
14 | fi
15 | 
16 | exit 0
17 | 


--------------------------------------------------------------------------------
/debian/nvidia-peer-memory.install:
--------------------------------------------------------------------------------
1 | etc/infiniband/nv_peer_mem.conf
2 | etc/init.d/nv_peer_mem
3 | etc/init/nv_peer_mem.conf
4 | 


--------------------------------------------------------------------------------
/debian/nvidia-peer-memory.postinst:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | dist=`lsb_release -s -i | tr '[:upper:]' '[:lower:]'`
 4 | dist_ver=`lsb_release -s -r`
 5 | 
 6 | if [ $dist != "ubuntu" ]; then
 7 | 	/usr/sbin/update-rc.d nv_peer_mem defaults > /dev/null 2>&1
 8 | fi
 9 | 
10 | exit 0
11 | 


--------------------------------------------------------------------------------
/debian/nvidia-peer-memory.prerm:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | dist=`lsb_release -s -i | tr '[:upper:]' '[:lower:]'`
 4 | dist_ver=`lsb_release -s -r`
 5 | 
 6 | if [ $dist != "ubuntu" ]; then
 7 | 	/usr/sbin/update-rc.d -f nv_peer_mem remove > /dev/null 2>&1
 8 | fi
 9 | 
10 | exit 0
11 | 


--------------------------------------------------------------------------------
/debian/patches/dkms_name.patch:
--------------------------------------------------------------------------------
 1 | From: Tzafrir Cohen <mellanox@cohens.org.il>
 2 | Subject: Name of dkms module to match install path
 3 | 
 4 | diff --git a/dkms.conf b/dkms.conf
 5 | index 7315b92..0b966e1 100644
 6 | --- a/dkms.conf
 7 | +++ b/dkms.conf
 8 | @@ -1,6 +1,6 @@
 9 |  
10 |  # DKMS module name and version
11 | -PACKAGE_NAME="nv_peer_mem"
12 | +PACKAGE_NAME="nvidia-peer-memory"
13 |  PACKAGE_VERSION="1.3"
14 |  
15 |  kernelver=${kernelver:-$(uname -r)}
16 | 


--------------------------------------------------------------------------------
/debian/patches/series:
--------------------------------------------------------------------------------
1 | dkms_name.patch
2 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/make -f
 2 | 
 3 | # Uncomment this to turn on verbose mode.
 4 | #export DH_VERBOSE=1
 5 | 
 6 | pname:=nvidia-peer-memory
 7 | pdkms:=$(pname)-dkms
 8 | dkms_pkg:=nv_peer_mem-$(pversion)
 9 | pversion := $(shell dpkg-parsechangelog | sed -n 's/^Version: *\([^-]\+\)-.\+/\1/p')
10 | 
11 | %:
12 | 	dh $@ --with dkms
13 | 
14 | override_dh_auto_clean:
15 | 
16 | override_dh_auto_configure:
17 | 
18 | override_dh_auto_build:
19 | 
20 | override_dh_auto_test:
21 | 
22 | override_dh_auto_install:
23 | 	make DESTDIR=$(CURDIR)/debian/tmp install-dkms install-utils
24 | 	$(CURDIR)/debian/updateInit.sh debian/tmp/etc/init.d/nv_peer_mem
25 | 
26 | override_dh_installinit:
27 | 


--------------------------------------------------------------------------------
/debian/source/format:
--------------------------------------------------------------------------------
1 | 3.0 (native)
2 | 


--------------------------------------------------------------------------------
/debian/updateInit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | file=$1
 3 | shift
 4 | 
 5 | perl -i -ne 'if (m@^#!/bin/bash@) {
 6 |         print q@#!/bin/bash
 7 | #
 8 | ### BEGIN INIT INFO
 9 | # Provides:       nv_peer_mem
10 | # Required-Start: openibd
11 | # Required-Stop:
12 | # Default-Start: 2 3 4 5
13 | # Default-Stop: 0 1 6
14 | # Description:    Activates/Deactivates nv_peer_mem to \
15 | #                 start at boot time.
16 | ### END INIT INFO
17 | @;
18 |                  } else {
19 |                      print;
20 |                  }' ${file}
21 | 


--------------------------------------------------------------------------------
/dkms.conf:
--------------------------------------------------------------------------------
 1 | 
 2 | # DKMS module name and version
 3 | PACKAGE_NAME="nv_peer_mem"
 4 | PACKAGE_VERSION="1.3"
 5 | 
 6 | kernelver=${kernelver:-$(uname -r)}
 7 | kernel_source_dir=${kernel_source_dir:-/lib/modules/$kernelver/build}
 8 | 
 9 | # Module name, source and destination directories, and build command-line
10 | BUILT_MODULE_NAME[0]="nv_peer_mem"
11 | BUILT_MODULE_LOCATION[0]="./"
12 | DEST_MODULE_LOCATION[0]="/kernel/../updates/"
13 | BUILD_DEPENDS[0]="mlnx-ofed-kernel"
14 | MAKE="make all KVER=$kernelver KDIR=$kernel_source_dir"
15 | 
16 | # Cleanup command-line
17 | CLEAN="make clean"
18 | 
19 | # Rebuild and autoinstall automatically when dkms_autoinstaller runs for a new kernel
20 | AUTOINSTALL="yes"
21 | 


--------------------------------------------------------------------------------
/nv_peer_mem:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | #
  4 | # Copyright (c) 2013 Mellanox Technologies. All rights reserved.
  5 | #
  6 | 
  7 | # config: /etc/infiniband/nv_peer_mem.conf
  8 | CONFIG=${CONFIG:-"/etc/infiniband/nv_peer_mem.conf"}
  9 | 
 10 | modname=nv_peer_mem
 11 | reqmods="ib_core nvidia"
 12 | 
 13 | if [ ! -f $CONFIG ]; then
 14 |     echo No configuration file found for $modname
 15 |     exit 0
 16 | fi
 17 | 
 18 | . $CONFIG
 19 | 
 20 | CWD=`pwd`
 21 | cd /etc/infiniband
 22 | WD=`pwd`
 23 | 
 24 | # Allow unsupported modules, if disallowed by current configuration
 25 | modprobe=/sbin/modprobe
 26 | if ${modprobe} -c | grep -q '^allow_unsupported_modules  *0'; then
 27 |     modprobe="${modprobe} --allow-unsupported-modules"
 28 | fi
 29 | 
 30 | ACTION=$1
 31 | shift
 32 | 
 33 | # Check if configured to start automatically
 34 | if [ "X${ONBOOT}" != "Xyes" ]; then
 35 |     exit 0
 36 | fi
 37 | 
 38 | is_module()
 39 | {
 40 |     local RC
 41 | 
 42 |     /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
 43 |     RC=$?
 44 | 
 45 |     return $RC
 46 | }
 47 | 
 48 | log_msg()
 49 | {
 50 |     logger -i "$modname: $@"
 51 | }
 52 | 
 53 | load_module()
 54 | {
 55 |     local module=$1
 56 |     filename=`modinfo $module | grep filename | awk '{print $NF}'`
 57 | 
 58 |     if [ ! -n "$filename" ]; then
 59 |         echo "Module $module does not exist"
 60 |         log_msg "Error: Module $module does not exist"
 61 |         return 1
 62 |     fi
 63 | 
 64 |     ${modprobe} $module 2>&1
 65 | }
 66 | 
 67 | start()
 68 | {
 69 |     local RC=0
 70 |     echo -n "starting... "
 71 | 
 72 |     # Check if required modules are loaded
 73 |     for mod in $reqmods
 74 |     do
 75 |         if is_module $mod ; then
 76 |             continue
 77 |         else
 78 |             echo "Error: the following required module is not loaded: $mod"
 79 |             log_msg "Error: the following required module is not loaded: $mod"
 80 |             exit 1
 81 |         fi
 82 |     done
 83 | 
 84 |     load_module $modname
 85 |     RC=$?
 86 |     if [ $RC -eq 0 ];then
 87 |         echo "OK"
 88 |         log_msg "$modname loaded."
 89 |     else
 90 |         echo "Failed to load $modname"
 91 |         log_msg "Failed to load $modname"
 92 |     fi
 93 | 
 94 |     return $RC
 95 | }
 96 | 
 97 | stop()
 98 | {
 99 |     local RC=0
100 |     echo -n "stopping... "
101 |     if is_module $modname ; then
102 |         /sbin/rmmod $modname 2>&1
103 | 	RC=$?
104 |         if [ $RC -eq 0 ];then
105 |             echo "OK"
106 |             log_msg "$modname unloaded."
107 |         else
108 |             echo "Failed to unload $modname"
109 |             log_msg "Failed to unload $modname"
110 |         fi
111 |     else
112 |         echo "OK"
113 |     fi
114 | 
115 |     return $RC
116 | }
117 | 
118 | status()
119 | {
120 |     if is_module $modname ; then
121 |         echo "$modname module is loaded."
122 |     else
123 |         echo "$modname module is NOT loaded."
124 |     fi
125 | }
126 | 
127 | RC=0
128 | case $ACTION in
129 |         start)
130 |                 start
131 |                 ;;
132 |         stop)
133 |                 stop
134 |                 ;;
135 |         restart)
136 |                 stop
137 |                 start
138 |                 ;;
139 |         status)
140 |                 status
141 |                 ;;
142 |         *)
143 |                 echo
144 |                 echo "Usage: `basename $0` {start|stop|restart|status}"
145 |                 echo
146 |                 exit 1
147 |                 ;;
148 | esac
149 | 
150 | RC=$?
151 | exit $RC
152 | 


--------------------------------------------------------------------------------
/nv_peer_mem.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
  3 |  * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
  4 |  *
  5 |  * This software is available to you under a choice of one of two
  6 |  * licenses.  You may choose to be licensed under the terms of the GNU
  7 |  * General Public License (GPL) Version 2, available from the file
  8 |  * COPYING in the main directory of this source tree, or the
  9 |  * OpenIB.org BSD license below:
 10 |  *
 11 |  *     Redistribution and use in source and binary forms, with or
 12 |  *     without modification, are permitted provided that the following
 13 |  *     conditions are met:
 14 |  *
 15 |  *      - Redistributions of source code must retain the above
 16 |  *        copyright notice, this list of conditions and the following
 17 |  *        disclaimer.
 18 |  *
 19 |  *      - Redistributions in binary form must reproduce the above
 20 |  *        copyright notice, this list of conditions and the following
 21 |  *        disclaimer in the documentation and/or other materials
 22 |  *        provided with the distribution.
 23 |  *
 24 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 25 |  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 26 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 27 |  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 28 |  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 29 |  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 30 |  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 31 |  * SOFTWARE.
 32 |  */
 33 | #include <linux/mm.h>
 34 | #include <linux/dma-mapping.h>
 35 | #include <linux/module.h>
 36 | #include <linux/init.h>
 37 | #include <linux/slab.h>
 38 | #include <linux/errno.h>
 39 | #include <linux/export.h>
 40 | #include <linux/hugetlb.h>
 41 | #include <linux/atomic.h>
 42 | #include <linux/pci.h>
 43 | #include <linux/kernel.h>
 44 | 
 45 | #include "nv-p2p.h"
 46 | #include <rdma/peer_mem.h>
 47 | 
 48 | #define DRV_NAME	"nv_mem"
 49 | #define DRV_VERSION	"1.3-0"
 50 | #define DRV_RELDATE	__DATE__
 51 | 
 52 | MODULE_AUTHOR("Yishai Hadas");
 53 | MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
 54 | MODULE_LICENSE("Dual BSD/GPL");
 55 | MODULE_VERSION(DRV_VERSION);
 56 | 
 57 | #define peer_err(FMT, ARGS...) printk(KERN_ERR   DRV_NAME " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
 58 | 
 59 | static int enable_dbg = 0;
 60 | #define peer_dbg(FMT, ARGS...)                                          \
 61 |         do {                                                            \
 62 |                 if (enable_dbg /*&& printk_ratelimit()*/)		\
 63 |                         printk(KERN_ERR DRV_NAME " DBG %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS); \
 64 |         } while(0)
 65 | 
 66 | module_param(enable_dbg, int, 0000);
 67 | MODULE_PARM_DESC(enable_dbg, "enable debug tracing");
 68 | 
 69 | #ifndef NVIDIA_P2P_MAJOR_VERSION_MASK
 70 | #define NVIDIA_P2P_MAJOR_VERSION_MASK   0xffff0000
 71 | #endif
 72 | 
 73 | #ifndef NVIDIA_P2P_MINOR_VERSION_MASK
 74 | #define NVIDIA_P2P_MINOR_VERSION_MASK   0x0000ffff
 75 | #endif
 76 | 
 77 | #ifndef NVIDIA_P2P_MAJOR_VERSION
 78 | #define NVIDIA_P2P_MAJOR_VERSION(v)	\
 79 | 	(((v) & NVIDIA_P2P_MAJOR_VERSION_MASK) >> 16)
 80 | #endif
 81 | 
 82 | #ifndef NVIDIA_P2P_MINOR_VERSION
 83 | #define NVIDIA_P2P_MINOR_VERSION(v)	\
 84 | 	(((v) & NVIDIA_P2P_MINOR_VERSION_MASK))
 85 | #endif
 86 | 
 87 | /*
 88 |  *	Note: before major version 2, struct dma_mapping had no version field,
 89 |  *	so it is not possible to check version compatibility. In this case
 90 |  *	let us just avoid dma mappings altogether.
 91 |  */
 92 | #if defined(NVIDIA_P2P_DMA_MAPPING_VERSION) &&	\
 93 | 	(NVIDIA_P2P_MAJOR_VERSION(NVIDIA_P2P_DMA_MAPPING_VERSION) >= 2)
 94 | #pragma message("Enable nvidia_p2p_dma_map_pages support")
 95 | #define NV_DMA_MAPPING 1
 96 | #else
 97 | #define NV_DMA_MAPPING 0
 98 | #endif
 99 | 
100 | #ifndef READ_ONCE
101 | #define READ_ONCE(x) ACCESS_ONCE(x)
102 | #endif
103 | 
104 | #ifndef WRITE_ONCE
105 | #define WRITE_ONCE(x, val) ({ ACCESS_ONCE(x) = (val); })
106 | #endif
107 | 
108 | #define GPU_PAGE_SHIFT   16
109 | #define GPU_PAGE_SIZE    ((u64)1 << GPU_PAGE_SHIFT)
110 | #define GPU_PAGE_OFFSET  (GPU_PAGE_SIZE-1)
111 | #define GPU_PAGE_MASK    (~GPU_PAGE_OFFSET)
112 | 
113 | 
114 | invalidate_peer_memory mem_invalidate_callback;
115 | static void *reg_handle = NULL;
116 | static void *reg_handle_nc = NULL;
117 | 
118 | struct nv_mem_context {
119 | 	struct nvidia_p2p_page_table *page_table;
120 | #if NV_DMA_MAPPING
121 | 	struct nvidia_p2p_dma_mapping *dma_mapping;
122 | #endif
123 | #ifndef PEER_MEM_U64_CORE_CONTEXT
124 | 	void *core_context;
125 | #else
126 | 	u64 core_context;
127 | #endif
128 | 	u64 page_virt_start;
129 | 	u64 page_virt_end;
130 | 	size_t mapped_size;
131 | 	unsigned long npages;
132 | 	unsigned long page_size;
133 | 	struct task_struct *callback_task;
134 | 	int sg_allocated;
135 | 	struct sg_table sg_head;
136 | };
137 | 
138 | static inline int nv_support_persistent_pages(void)
139 | {
140 | #ifdef NVIDIA_P2P_CAP_PERSISTENT_PAGES
141 | 	return !!(nvidia_p2p_cap_persistent_pages);
142 | #else
143 | 	return 0;
144 | #endif
145 | }
146 | 
147 | static void nv_get_p2p_free_callback(void *data)
148 | {
149 | 	int ret = 0;
150 | 	struct nv_mem_context *nv_mem_context = (struct nv_mem_context *)data;
151 | 	struct nvidia_p2p_page_table *page_table = NULL;
152 | #if NV_DMA_MAPPING
153 | 	struct nvidia_p2p_dma_mapping *dma_mapping = NULL;
154 | #endif
155 | 
156 | 	__module_get(THIS_MODULE);
157 | 	if (!nv_mem_context) {
158 | 		peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
159 | 		goto out;
160 | 	}
161 | 
162 | 	if (!nv_mem_context->page_table) {
163 | 		peer_err("nv_get_p2p_free_callback -- invalid page_table\n");
164 | 		goto out;
165 | 	}
166 | 
167 | 	/* Save page_table locally to prevent it being freed as part of nv_mem_release
168 | 	    in case it's called internally by that callback.
169 | 	*/
170 | 	page_table = nv_mem_context->page_table;
171 | 
172 | #if NV_DMA_MAPPING
173 | 	if (!nv_mem_context->dma_mapping) {
174 | 		peer_err("nv_get_p2p_free_callback -- invalid dma_mapping\n");
175 | 		goto out;
176 | 	}
177 | 	dma_mapping = nv_mem_context->dma_mapping;
178 | #endif
179 | 
180 | 	/* For now don't set nv_mem_context->page_table to NULL, 
181 | 	  * confirmed by NVIDIA that inflight put_pages with valid pointer will fail gracefully.
182 | 	*/
183 |         peer_dbg("calling mem_invalidate_callback\n");
184 | 	nv_mem_context->callback_task = current;
185 | 	(*mem_invalidate_callback) (reg_handle, nv_mem_context->core_context);
186 | 	nv_mem_context->callback_task = NULL;
187 | 
188 | #if NV_DMA_MAPPING
189 | 	ret = nvidia_p2p_free_dma_mapping(dma_mapping); 
190 | 	if (ret)
191 |                 peer_err("nv_get_p2p_free_callback -- error %d while calling nvidia_p2p_free_dma_mapping()\n", ret);
192 | #endif
193 | 	ret = nvidia_p2p_free_page_table(page_table);
194 | 	if (ret)
195 | 		peer_err("nv_get_p2p_free_callback -- error %d while calling nvidia_p2p_free_page_table()\n", ret);
196 | 
197 | out:
198 | 	module_put(THIS_MODULE);
199 | 	return;
200 | 
201 | }
202 | 
203 | /* At that function we don't call IB core - no ticket exists */
204 | static void nv_mem_dummy_callback(void *data)
205 | {
206 | 	struct nv_mem_context *nv_mem_context = (struct nv_mem_context *)data;
207 | 	int ret = 0;
208 | 
209 | 	__module_get(THIS_MODULE);
210 | 
211 | 	ret = nvidia_p2p_free_page_table(nv_mem_context->page_table);
212 | 	if (ret)
213 | 		peer_err("nv_mem_dummy_callback -- error %d while calling nvidia_p2p_free_page_table()\n", ret);
214 | 
215 | 	module_put(THIS_MODULE);
216 | 	return;
217 | }
218 | 
219 | /* acquire return code: 1 mine, 0 - not mine */
220 | static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_private_data,
221 | 					char *peer_mem_name, void **client_context)
222 | {
223 | 
224 | 	int ret = 0;
225 | 	struct nv_mem_context *nv_mem_context;
226 | 
227 | 	nv_mem_context = kzalloc(sizeof *nv_mem_context, GFP_KERNEL);
228 | 	if (!nv_mem_context)
229 | 		/* Error case handled as not mine */
230 | 		return 0;
231 | 
232 | 	nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
233 | 	nv_mem_context->page_virt_end   = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
234 | 	nv_mem_context->mapped_size  = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
235 | 
236 | 	ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
237 | 			&nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
238 | 
239 | 	if (ret < 0)
240 | 		goto err;
241 | 
242 | 	ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
243 | 				   nv_mem_context->page_table);
244 | 	if (ret < 0) {
245 | 		/* Not expected, however in case callback was called on that buffer just before
246 | 		    put pages we'll expect to fail gracefully (confirmed by NVIDIA) and return an error.
247 | 		*/
248 | 		peer_err("nv_mem_acquire -- error %d while calling nvidia_p2p_put_pages()\n", ret);
249 | 		goto err;
250 | 	}
251 | 
252 | 	/* 1 means mine */
253 | 	*client_context = nv_mem_context;
254 | 	__module_get(THIS_MODULE);
255 | 	return 1;
256 | 
257 | err:
258 | 	kfree(nv_mem_context);
259 | 
260 | 	/* Error case handled as not mine */
261 | 	return 0;
262 | }
263 | 
264 | static int nv_dma_map(struct sg_table *sg_head, void *context,
265 | 			      struct device *dma_device, int dmasync,
266 | 			      int *nmap)
267 | {
268 | 	int i, ret;
269 | 	struct scatterlist *sg;
270 | 	struct nv_mem_context *nv_mem_context =
271 | 		(struct nv_mem_context *) context;
272 | 	struct nvidia_p2p_page_table *page_table = nv_mem_context->page_table;
273 | 
274 | 	if (page_table->page_size != NVIDIA_P2P_PAGE_SIZE_64KB) {
275 | 		peer_err("nv_dma_map -- assumption of 64KB pages failed size_id=%u\n",
276 | 					nv_mem_context->page_table->page_size);
277 | 		return -EINVAL;
278 | 	}
279 | 
280 | #if NV_DMA_MAPPING
281 | 	{
282 | 		struct nvidia_p2p_dma_mapping *dma_mapping;
283 | 		struct pci_dev *pdev = to_pci_dev(dma_device);
284 | 
285 | 		if (!pdev) {
286 | 			peer_err("nv_dma_map -- invalid pci_dev\n");
287 | 			return -EINVAL;
288 | 		}
289 | 
290 | 		ret = nvidia_p2p_dma_map_pages(pdev, page_table, &dma_mapping);
291 | 		if (ret) {
292 | 			peer_err("nv_dma_map -- error %d while calling nvidia_p2p_dma_map_pages()\n", ret);
293 | 			return ret;
294 | 		}
295 | 
296 | 		if (!NVIDIA_P2P_DMA_MAPPING_VERSION_COMPATIBLE(dma_mapping)) {
297 | 			peer_err("error, incompatible dma mapping version 0x%08x\n",
298 | 				 dma_mapping->version);
299 | 			nvidia_p2p_dma_unmap_pages(pdev, page_table, dma_mapping);
300 | 			return -EINVAL;
301 | 		}
302 | 
303 | 		nv_mem_context->npages = dma_mapping->entries;
304 | 
305 | 		ret = sg_alloc_table(sg_head, dma_mapping->entries, GFP_KERNEL);
306 | 		if (ret) {
307 | 			nvidia_p2p_dma_unmap_pages(pdev, page_table, dma_mapping);
308 | 			return ret;
309 | 		}
310 | 
311 | 		nv_mem_context->dma_mapping = dma_mapping;
312 | 
313 | 		for_each_sg(sg_head->sgl, sg, nv_mem_context->npages, i) {
314 | 			sg_set_page(sg, NULL, nv_mem_context->page_size, 0);
315 | 			sg->dma_address = dma_mapping->dma_addresses[i];
316 | 			sg->dma_length = nv_mem_context->page_size;
317 | 		}
318 | 	}
319 | #else
320 | 	nv_mem_context->npages = PAGE_ALIGN(nv_mem_context->mapped_size) >>
321 | 						GPU_PAGE_SHIFT;
322 | 
323 | 	if (page_table->entries != nv_mem_context->npages) {
324 | 		peer_err("nv_dma_map -- unexpected number of page table entries got=%u, expected=%lu\n",
325 | 					page_table->entries,
326 | 					nv_mem_context->npages);
327 | 		return -EINVAL;
328 | 	}
329 | 
330 | 	ret = sg_alloc_table(sg_head, nv_mem_context->npages, GFP_KERNEL);
331 | 	if (ret)
332 | 		return ret;
333 | 
334 | 	for_each_sg(sg_head->sgl, sg, nv_mem_context->npages, i) {
335 | 		sg_set_page(sg, NULL, nv_mem_context->page_size, 0);
336 | 		sg->dma_address = page_table->pages[i]->physical_address;
337 | 		sg->dma_length = nv_mem_context->page_size;
338 | 	}
339 | #endif
340 | 	nv_mem_context->sg_allocated = 1;
341 | 	nv_mem_context->sg_head = *sg_head;
342 | 	peer_dbg("allocated sg_head.sgl=%p\n", nv_mem_context->sg_head.sgl);
343 | 	*nmap = nv_mem_context->npages;
344 | 
345 | 	return 0;
346 | }
347 | 
348 | static int nv_dma_unmap(struct sg_table *sg_head, void *context,
349 | 			   struct device  *dma_device)
350 | {
351 | 	struct nv_mem_context *nv_mem_context =
352 | 		(struct nv_mem_context *)context;
353 | 
354 | 	if (!nv_mem_context) {
355 | 		peer_err("nv_dma_unmap -- invalid nv_mem_context\n");
356 | 		return -EINVAL;
357 | 	}
358 | 
359 | 	if (WARN_ON(0 != memcmp(sg_head, &nv_mem_context->sg_head, sizeof(*sg_head))))
360 | 		return -EINVAL;
361 | 
362 | 	if (nv_mem_context->callback_task == current) {
363 | 		peer_dbg("no-op in callback context\n");
364 | 		goto out;
365 | 	}
366 | 
367 | 	peer_dbg("nv_mem_context=%p\n", nv_mem_context);
368 | 
369 | #if NV_DMA_MAPPING
370 | 	{
371 | 		struct pci_dev *pdev = to_pci_dev(dma_device);
372 | 		if (nv_mem_context->dma_mapping)
373 | 			nvidia_p2p_dma_unmap_pages(pdev, nv_mem_context->page_table,
374 | 						   nv_mem_context->dma_mapping);
375 | 	}
376 | #endif
377 | 
378 | out:
379 | 	return 0;
380 | }
381 | 
382 | 
383 | static void nv_mem_put_pages(struct sg_table *sg_head, void *context)
384 | {
385 | 	int ret = 0;
386 | 	struct nv_mem_context *nv_mem_context =
387 | 		(struct nv_mem_context *) context;
388 | 
389 | 	if (!nv_mem_context) {
390 | 		peer_err("nv_mem_put_pages -- invalid nv_mem_context\n");
391 | 		return;
392 | 	}
393 | 
394 | 	if (WARN_ON(0 != memcmp(sg_head, &nv_mem_context->sg_head, sizeof(*sg_head))))
395 | 		return;
396 | 
397 | 	if (nv_mem_context->callback_task == current) {
398 |             	peer_dbg("no-op in callback context\n");
399 | 		return;
400 |         }
401 | 
402 |         peer_dbg("nv_mem_context=%p\n", nv_mem_context);
403 | 
404 | 	ret = nvidia_p2p_put_pages(0, 0, nv_mem_context->page_virt_start,
405 | 				   nv_mem_context->page_table);
406 | 
407 | #ifdef _DEBUG_ONLY_
408 | 	/* Here we expect an error in real life cases that should be ignored - not printed.
409 | 	  * (e.g. concurrent callback with that call)
410 | 	*/
411 | 	if (ret < 0) {
412 | 		printk(KERN_ERR "error %d while calling nvidia_p2p_put_pages, page_table=%p \n",
413 | 			ret,  nv_mem_context->page_table);
414 | 	}
415 | #endif
416 | 	return;
417 | }
418 | 
419 | static void nv_mem_release(void *context)
420 | {
421 | 	struct nv_mem_context *nv_mem_context =
422 | 		(struct nv_mem_context *) context;
423 | 	if (nv_mem_context->sg_allocated) {
424 | 		peer_dbg("freeing sg_head.sgl=%p\n", nv_mem_context->sg_head.sgl);
425 | 		sg_free_table(&nv_mem_context->sg_head);
426 | 		nv_mem_context->sg_allocated = 0;
427 | 	}
428 | 	kfree(nv_mem_context);
429 | 	module_put(THIS_MODULE);
430 | 	return;
431 | }
432 | 
433 | static int nv_mem_get_pages(unsigned long addr,
434 | 			  size_t size, int write, int force,
435 | 			  struct sg_table *sg_head,
436 | 			  void *client_context,
437 | #ifndef PEER_MEM_U64_CORE_CONTEXT
438 | 			  void *core_context)
439 | #else
440 | 			  u64 core_context)
441 | #endif
442 | {
443 | 	int ret;
444 | 	struct nv_mem_context *nv_mem_context;
445 | 
446 | 	nv_mem_context = (struct nv_mem_context *)client_context;
447 | 	if (!nv_mem_context)
448 | 		return -EINVAL;
449 | 
450 | 	nv_mem_context->core_context = core_context;
451 | 	nv_mem_context->page_size = GPU_PAGE_SIZE;
452 | 
453 | 	ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
454 | 			&nv_mem_context->page_table, nv_get_p2p_free_callback, nv_mem_context);
455 | 	if (ret < 0) {
456 | 		peer_err("nv_mem_get_pages -- error %d while calling nvidia_p2p_get_pages()\n", ret);
457 | 		return ret;
458 | 	}
459 | 
460 | 	/* No extra access to nv_mem_context->page_table here as we are
461 | 	    called not under a lock and may race with inflight invalidate callback on that buffer.
462 | 	    Extra handling was delayed to be done under nv_dma_map.
463 | 	 */
464 | 	return 0;
465 | }
466 | 
467 | 
468 | static unsigned long nv_mem_get_page_size(void *context)
469 | {
470 | 	struct nv_mem_context *nv_mem_context =
471 | 				(struct nv_mem_context *)context;
472 | 
473 | 	return nv_mem_context->page_size;
474 | 
475 | }
476 | 
477 | static struct peer_memory_client_ex nv_mem_client_ex = { .client = {
478 | 	.acquire        = nv_mem_acquire,
479 | 	.get_pages  = nv_mem_get_pages,
480 | 	.dma_map    = nv_dma_map,
481 | 	.dma_unmap  = nv_dma_unmap,
482 | 	.put_pages  = nv_mem_put_pages,
483 | 	.get_page_size  = nv_mem_get_page_size,
484 | 	.release        = nv_mem_release,
485 | }};
486 | 
487 | 
488 | static int nv_mem_get_pages_nc(unsigned long addr,
489 | 			  size_t size, int write, int force,
490 | 			  struct sg_table *sg_head,
491 | 			  void *client_context,
492 | #ifndef PEER_MEM_U64_CORE_CONTEXT
493 | 			  void *core_context)
494 | #else
495 | 			  u64 core_context)
496 | #endif
497 | {
498 | 	int ret;
499 | 	struct nv_mem_context *nv_mem_context;
500 | 
501 | 	peer_dbg("nv_mem_get_pages_nc -- addr:%lx size:%zu\n", addr, size);
502 | 
503 | 	nv_mem_context = (struct nv_mem_context *)client_context;
504 | 	if (!nv_mem_context)
505 | 		return -EINVAL;
506 | 
507 | 	BUG_ON(!nv_support_persistent_pages());
508 | 
509 | 	nv_mem_context->core_context = core_context;
510 | 	nv_mem_context->page_size = GPU_PAGE_SIZE;
511 | 
512 | 	ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
513 | 			&nv_mem_context->page_table, NULL, NULL);
514 | 	if (ret < 0) {
515 | 		peer_err("nv_mem_get_pages -- error %d while calling nvidia_p2p_get_pages() with NULL callback\n", ret);
516 | 		return ret;
517 | 	}
518 | 
519 | 	/* No extra access to nv_mem_context->page_table here as we are
520 | 	    called not under a lock and may race with inflight invalidate callback on that buffer.
521 | 	    Extra handling was delayed to be done under nv_dma_map.
522 | 	 */
523 | 	return 0;
524 | }
525 | 
526 | static struct peer_memory_client nv_mem_client_nc = {
527 | 	.acquire        = nv_mem_acquire,
528 | 	.get_pages      = nv_mem_get_pages_nc,
529 | 	.dma_map        = nv_dma_map,
530 | 	.dma_unmap      = nv_dma_unmap,
531 | 	.put_pages      = nv_mem_put_pages,
532 | 	.get_page_size  = nv_mem_get_page_size,
533 | 	.release        = nv_mem_release,
534 | };
535 | 
536 | 
537 | static int __init nv_mem_client_init(void)
538 | {
539 | 	int status = 0;
540 | 
541 | 	// off by one, to leave space for the trailing '1' which is flagging
542 | 	// the new client type
543 | 	BUG_ON(strlen(DRV_NAME) > IB_PEER_MEMORY_NAME_MAX-1);
544 | 	strcpy(nv_mem_client_ex.client.name, DRV_NAME);
545 | 
546 | 	// [VER_MAX-1]=1 <-- last byte is used as flag
547 | 	// [VER_MAX-2]=0 <-- version string terminator
548 | 	BUG_ON(strlen(DRV_VERSION) > IB_PEER_MEMORY_VER_MAX-2);
549 | 	strcpy(nv_mem_client_ex.client.version, DRV_VERSION);
550 | 
551 | 	// Register as new-style client
552 | 	// Needs updated peer_mem patch, but is harmless otherwise
553 | 	nv_mem_client_ex.client.version[IB_PEER_MEMORY_VER_MAX-1] = 1;
554 | 	nv_mem_client_ex.ex_size = sizeof(struct peer_memory_client_ex);
555 | 
556 | 	// PEER_MEM_INVALIDATE_UNMAPS allow clients to opt out of
557 | 	// unmap/put_pages during invalidation, i.e. the client tells the
558 | 	// infiniband layer that it does not need to call
559 | 	// unmap/put_pages in the invalidation callback
560 | 	nv_mem_client_ex.flags = PEER_MEM_INVALIDATE_UNMAPS;
561 | 
562 | 	reg_handle = ib_register_peer_memory_client(&nv_mem_client_ex.client,
563 | 						    &mem_invalidate_callback);
564 | 	if (!reg_handle) {
565 | 		peer_err("nv_mem_client_init -- error while registering client\n");
566 | 		status = -EINVAL;
567 | 		goto out;
568 | 	}
569 | 
570 | 	// Register the NC client only if nvidia.ko supports persistent pages
571 | 	if (nv_support_persistent_pages()) {
572 | 		strcpy(nv_mem_client_nc.name, DRV_NAME "_nc");
573 | 		strcpy(nv_mem_client_nc.version, DRV_VERSION);
574 | 		reg_handle_nc = ib_register_peer_memory_client(&nv_mem_client_nc, NULL);
575 | 		if (!reg_handle_nc) {
576 | 			peer_err("nv_mem_client_init -- error while registering nc client\n");
577 | 			status = -EINVAL;
578 | 			goto out;
579 | 		}
580 | 	}
581 | 
582 | out:
583 | 	if (status) {
584 | 		if (reg_handle) {
585 | 			ib_unregister_peer_memory_client(reg_handle);
586 | 			reg_handle = NULL;
587 | 		}
588 | 
589 | 		if (reg_handle_nc) {
590 | 			ib_unregister_peer_memory_client(reg_handle_nc);
591 | 			reg_handle_nc = NULL;
592 | 		}
593 | 	}
594 | 
595 | 	return status;
596 | }
597 | 
598 | static void __exit nv_mem_client_cleanup(void)
599 | {
600 | 	if (reg_handle)
601 | 		ib_unregister_peer_memory_client(reg_handle);
602 | 
603 | 	if (reg_handle_nc)
604 | 		ib_unregister_peer_memory_client(reg_handle_nc);
605 | }
606 | 
607 | module_init(nv_mem_client_init);
608 | module_exit(nv_mem_client_cleanup);
609 | 


--------------------------------------------------------------------------------
/nv_peer_mem.conf:
--------------------------------------------------------------------------------
1 | # Load nv_peer_mem module upon boot
2 | ONBOOT=yes
3 | 


--------------------------------------------------------------------------------
/nv_peer_mem.upstart:
--------------------------------------------------------------------------------
 1 | # nv_peer_mem
 2 | #
 3 | 
 4 | description	"load nv_peer_mem module"
 5 | 
 6 | start on (startup
 7 |           and started openibd)
 8 | stop on runlevel [!2345]
 9 | 
10 | pre-start script
11 | 	exec /etc/init.d/nv_peer_mem start
12 | end script
13 | 
14 | post-stop exec /etc/init.d/nv_peer_mem stop
15 | 
16 | 


--------------------------------------------------------------------------------
/nvidia_peer_memory.spec:
--------------------------------------------------------------------------------
  1 | %define debug_package %{nil}
  2 | %{!?_release: %define _release 0}
  3 | %{!?KVERSION: %define KVERSION %(uname -r)}
  4 | 
  5 | %define MODPROBE %(if ( /sbin/modprobe -c | grep -q '^allow_unsupported_modules  *0'); then echo -n "/sbin/modprobe --allow-unsupported-modules"; else echo -n "/sbin/modprobe"; fi )
  6 | 
  7 | Summary: nvidia_peer_memory
  8 | Name: nvidia_peer_memory
  9 | Version: 1.3
 10 | Release: %{_release}
 11 | License: GPL
 12 | Group: System Environment/Libraries
 13 | Source: %{name}-%{version}.tar.gz
 14 | BuildRequires: gcc kernel-headers
 15 | BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 16 | URL: http://www.mellanox.com
 17 | Prefix: %{prefix}
 18 | Packager: <yishaih@mellanox.com>
 19 | 
 20 | %description
 21 | 
 22 | nvidia peer memory kernel module.
 23 | 
 24 | %prep
 25 | %setup -n %{name}-%{version}
 26 | 
 27 | %build
 28 | export KVER=%{KVERSION}
 29 | make KVER=$KVER all
 30 | 
 31 | %install
 32 | 
 33 | #install kernel module
 34 | export KVER=%{KVERSION}
 35 | make DESTDIR=$RPM_BUILD_ROOT KVER=$KVER install
 36 | 
 37 | # Copy configuration file
 38 | install -d $RPM_BUILD_ROOT/etc/infiniband
 39 | install -m 0644 $RPM_BUILD_DIR/%{name}-%{version}/nv_peer_mem.conf $RPM_BUILD_ROOT/etc/infiniband
 40 | 
 41 | # Install nv_peer_mem service script
 42 | install -d $RPM_BUILD_ROOT/etc/init.d
 43 | install -m 0755 $RPM_BUILD_DIR/%{name}-%{version}/nv_peer_mem $RPM_BUILD_ROOT/etc/init.d
 44 | 
 45 | %post
 46 | depmod -a
 47 | %{MODPROBE} -rq nv_peer_mem||:
 48 | %{MODPROBE} nv_peer_mem||:
 49 | 
 50 | if [[ -f /etc/redhat-release || -f /etc/rocks-release ]]; then
 51 | perl -i -ne 'if (m@^#!/bin/bash@) {
 52 |         print q@#!/bin/bash
 53 | #
 54 | # Bring up/down nv_peer_mem
 55 | #
 56 | # chkconfig: 2345 05 95
 57 | # description: Activates/Deactivates nv_peer_mem module to \
 58 | #              start at boot time.
 59 | #
 60 | ### BEGIN INIT INFO
 61 | # Provides:       nv_peer_mem
 62 | # Required-Start: openibd
 63 | # Required-Stop:
 64 | ### END INIT INFO
 65 | @;
 66 |                  } else {
 67 |                      print;
 68 |                  }' /etc/init.d/nv_peer_mem
 69 | 
 70 |         if ! ( /sbin/chkconfig --del nv_peer_mem > /dev/null 2>&1 ); then
 71 |                 true
 72 |         fi
 73 |         if ! ( /sbin/chkconfig --add nv_peer_mem > /dev/null 2>&1 ); then
 74 |                 true
 75 |         fi
 76 | fi
 77 | 
 78 | if [ -f /etc/SuSE-release ]; then
 79 |         perl -i -ne "if (m@^#!/bin/bash@) {
 80 |         print q@#!/bin/bash
 81 | ### BEGIN INIT INFO
 82 | # Provides:       nv_peer_mem
 83 | # Required-Start: openibd
 84 | # Required-Stop:
 85 | # Default-Start:  2 3 5
 86 | # Default-Stop: 0 1 2 6
 87 | # Description:    Activates/Deactivates nv_peer_mem module to \
 88 | #                 start at boot time.
 89 | ### END INIT INFO
 90 | @;
 91 |                  } else {
 92 |                      print;
 93 |                  }" /etc/init.d/nv_peer_mem
 94 | 
 95 |         if ! ( /sbin/insserv nv_peer_mem > /dev/null 2>&1 ); then
 96 |                 true
 97 |         fi
 98 | fi
 99 | 
100 | %preun
101 | %{MODPROBE} -rq nv_peer_mem
102 | 
103 | if [[ -f /etc/redhat-release || -f /etc/rocks-release ]]; then
104 | 	if ! ( /sbin/chkconfig --del nv_peer_mem  > /dev/null 2>&1 ); then
105 | 		true
106 | 	fi
107 | fi
108 | if [ -f /etc/SuSE-release ]; then
109 | 	if ! ( /sbin/insserv -r nv_peer_mem > /dev/null 2>&1 ); then
110 | 		true
111 | 	fi
112 | fi
113 | 
114 | %clean
115 | # We may be in the directory that we're about to remove, so cd out of
116 | # there before we remove it
117 | cd /tmp
118 | # Remove installed driver after rpm build finished
119 | chmod -R o+w $RPM_BUILD_DIR/%{name}-%{version}
120 | rm -rf $RPM_BUILD_DIR/%{name}-%{version}
121 | 
122 | test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT
123 | 
124 | 
125 | %files
126 | %defattr(-, root, root)
127 | /lib/modules/%{KVERSION}/
128 | /etc/init.d/nv_peer_mem
129 | /etc/infiniband/nv_peer_mem.conf
130 | 


--------------------------------------------------------------------------------