├── .gitignore ├── .travis.yml ├── COPYING ├── Makefile.am ├── README.md ├── autogen.sh ├── ci └── build.sh ├── configure.ac ├── documentation └── libnvme.doxygen ├── exports ├── include ├── Makemodule.am └── libnvme │ ├── nvme.h │ └── nvme_spec.h ├── lib ├── common │ ├── Makemodule.am │ ├── nvme_arch.h │ ├── nvme_atomic.h │ ├── nvme_common.c │ ├── nvme_common.h │ ├── nvme_cpu.c │ ├── nvme_cpu.h │ ├── nvme_log.c │ ├── nvme_log.h │ ├── nvme_mem.c │ ├── nvme_mem.h │ ├── nvme_pci.c │ ├── nvme_pci.h │ ├── nvme_rwlock.h │ └── nvme_spinlock.h └── nvme │ ├── Makemodule.am │ ├── nvme.c │ ├── nvme_admin.c │ ├── nvme_ctrlr.c │ ├── nvme_intel.h │ ├── nvme_internal.h │ ├── nvme_ns.c │ ├── nvme_qpair.c │ ├── nvme_quirks.c │ └── nvme_request.c ├── libnvme.pc.in ├── m4 ├── acx_pthread.m4 ├── dontremove ├── libtool.m4 ├── ltoptions.m4 ├── ltsugar.m4 ├── ltversion.m4 └── lt~obsolete.m4 └── tools ├── configure_hugepages.sh ├── info ├── Makemodule.am └── nvme_info.c └── perf ├── Makemodule.am ├── nvme_perf.c └── nvme_perf.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.lo 4 | *.ko 5 | *.obj 6 | *.elf 7 | *.mod.c 8 | .*.o.cmd 9 | .*.ko.cmd 10 | *.lib 11 | *.a 12 | *.la 13 | 14 | # Shared objects (inc. Windows DLLs) 15 | *.dll 16 | *.so 17 | *.so.* 18 | *.dylib 19 | 20 | # Executables 21 | *.exe 22 | *.out 23 | *.app 24 | *.i*86 25 | *.x86_64 26 | *.hex 27 | nvme_perf 28 | nvme_info 29 | 30 | # Kernel compile files 31 | *.symvers 32 | modules.order 33 | .tmp_versions 34 | 35 | # Dependency files 36 | .depfile 37 | .depend 38 | 39 | # Windows-specific files 40 | Thumbs.db 41 | desktop.ini 42 | 43 | # Mac-specific things (thanks to Michael Aaron Safyan) 44 | .DS_Store 45 | 46 | # Editors temporary files 47 | *~ 48 | 49 | # Autotools stuff 50 | Makefile.in 51 | aclocal.m4 52 | autom4te.cache/ 53 | build-aux/ 54 | configure 55 | include/config.h.in 56 | Makefile 57 | config.log 58 | config.status 59 | include/config.h 60 | include/stamp-h1 61 | .deps 62 | libtool 63 | libnvme.pc 64 | .libs 65 | .dirstamp 66 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | # The gcc compiler in the default distro (precise) is too old for libnvme. 3 | # Hence use trusty. 4 | dist: trusty 5 | addons: 6 | apt: 7 | packages: 8 | - build-essential 9 | - gcc 10 | - libpciaccess-dev 11 | - make 12 | - libnuma-dev 13 | script: ci/build.sh 14 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) Intel Corporation. All rights reserved. 3 | Copyright (c) 2017, Western Digital Corporation or its affiliates. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, 10 | this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 | TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # 2 | # BSD LICENSE 3 | # 4 | # Copyright (c) Intel Corporation. All rights reserved. 5 | # Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions 9 | # are met: 10 | # 11 | # * Redistributions of source code must retain the above copyright 12 | # notice, this list of conditions and the following disclaimer. 13 | # * Redistributions in binary form must reproduce the above copyright 14 | # notice, this list of conditions and the following disclaimer in 15 | # the documentation and/or other materials provided with the 16 | # distribution. 17 | # * Neither the name of Intel Corporation nor the names of its 18 | # contributors may be used to endorse or promote products derived 19 | # from this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | # 33 | 34 | ACLOCAL_AMFLAGS = -I m4 35 | 36 | SUBDIRS = . $(subdirs) 37 | EXTRA_DIST = autogen.sh exports 38 | 39 | AM_CPPFLAGS = \ 40 | -O2 \ 41 | -Wall -Wextra -Wno-unused-parameter \ 42 | -I$(top_srcdir)/include 43 | # -msse4.1 -mavx 44 | 45 | pkgconfdir = $(libdir)/pkgconfig 46 | pkgconf_DATA = libnvme.pc 47 | pkginclude_HEADERS = 48 | 49 | bin_PROGRAMS = 50 | noinst_PROGRAMS = 51 | 52 | noinst_HEADERS = 53 | noinst_LTLIBRARIES = libnvme_common.la 54 | 55 | include include/Makemodule.am 56 | include lib/common/Makemodule.am 57 | 58 | lib_LTLIBRARIES = libnvme.la 59 | libnvme_la_DEPENDENCIES = exports libnvme_common.la 60 | libnvme_ldadd = $(LDADD) libnvme.la 61 | 62 | include lib/nvme/Makemodule.am 63 | include tools/perf/Makemodule.am 64 | include tools/info/Makemodule.am 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | LIBNVME 3 | ======= 4 | 5 | libnvme provides a user space driver for NVMe PCI devices. 6 | 7 | libnvme is based on code from the Storage Performance Development Kit 8 | (SPDK) available at https://github.com/spdk/spdk. in its current 9 | version, libnvme API is very similar to that of SPDK. However, libnvme 10 | removes all dependencies on the Data Plane Development Kit (DPDK). To 11 | this end, some functions necessary to the library operation were 12 | reimplemented. This mainly consists of memory management functions 13 | using huge-pages mappings in order to gain access to physically 14 | contiguous large memory areas. 15 | 16 | In addition to the device driver library itself, a set of tools is 17 | also provided for quick testing and as examples of the library 18 | functions use. 19 | 20 | License 21 | ======= 22 | 23 | libnvme is distributed under the terms of the of the BSD 2-clause 24 | license ("Simplified BSD License" or "FreeBSD License"). A copy of 25 | this license with the library copyright can be found in the 26 | COPYING.BSD file. 27 | 28 | Since libnvme is based on SPDK code, libnvme also retains the 29 | copyright of SPDK as indicated in the COPYING file. 30 | 31 | libnvme and all its example applications are distributed "as is," 32 | without technical support, and WITHOUT ANY WARRANTY, without even the 33 | implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 34 | PURPOSE. Along with libnvme, you should have received a copy of the 35 | BSD 2-clause license. If not, please see 36 | . 37 | 38 | Documentation 39 | ============= 40 | 41 | libnvme API documentation can be generated using doxygen. To generate 42 | documentation in html format, the libnvme.doxygen file in the 43 | documentation directory can be used. 44 | 45 | > cd documentation 46 | > doxygen libnvme.doxygen 47 | 48 | Contact and Bug Reports 49 | ======================= 50 | 51 | Please contact Damien Le Moal (damien.lemoal@wdc.com) or 52 | Christophe Louargant (christophe.louargant@wdc.com) to report problems. 53 | 54 | Requirements 55 | ============ 56 | 57 | To build libnvme, the following dependencies must be installed. 58 | 59 | * libpciaccess and associated development headers (libpciaccess and 60 | libpciaccess-devel) 61 | * libnuma and associated development headers (numactl-libs and 62 | numactl-devel) 63 | * autotools GNU development tools (autoconf and automake) 64 | 65 | Compilation 66 | =========== 67 | 68 | To compile and install libnvme, execute the following commands. 69 | 70 | > sh ./autogen.sh 71 | > ./configure 72 | > make 73 | > sudo make install 74 | 75 | The library files are installed by default in /usr/lib. The library 76 | header files are installed in /usr/include/libnvme. The example tools 77 | provided are installed in /usr/bin. 78 | 79 | Execution Prerequisites 80 | ======================= 81 | 82 | libnvme requires some hugepages to be available for allocation through 83 | the hugetlbfs file system. This implies first that some hugepages must 84 | be reserved. The following command, executed as root, achieves that. 85 | 86 | echo 32 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages 87 | 88 | Next, a mount point for hugetlbfs must exists. For a system with 89 | transparent hugepage support enabled, this is generally the case by 90 | default. This can be checked using the following command. 91 | 92 | > cat /proc/mounts | grep hugetlbfs 93 | hugetlbfs /dev/hugepages hugetlbfs rw,seclabel,relatime 0 0 94 | 95 | If no hugetlbfs mount exists, one can be created using the following 96 | commands. 97 | 98 | > mkdir -p /mnt/hugepages 99 | > mount -t hugetlbfs nodev /mnt/hugepages 100 | 101 | libnvme, upon initialization, will automatically detect and use the 102 | first available hugetlbfs mount. 103 | 104 | Target PCI NVMe devices must be unbound from the native kernel 105 | drivers. This can be done manually per device using the following 106 | command. 107 | 108 | > echo "0000:01:00.0" > /sys/bus/pci/drivers/nvme/unbind 109 | 110 | Where "0000:01:00.0" is the PCI ID of the device to unbind from the 111 | kernel nvme driver. 112 | 113 | Tools 114 | ===== 115 | 116 | Two simple applications are provided with libnvme for testing and as 117 | example of the library API use. The first application is nvme_info 118 | and allows getting information on a NVMe device. 119 | 120 | > nvme_info pci://0000:03:00.0 121 | Opening NVMe controller pci://0000:03:00.0 122 | Model name: INTEL SSDPEDMW400G4 123 | Serial number: CVCQ542500ZJ400AGN 124 | HW maximum queue entries: 4096 125 | Maximum queue depth: 1024 126 | Maximum request size: 128 KiB 127 | 1 namespaces: 128 | Namespace 1/1: 512 bytes sectors, 781422768 sectors (372 GiB) 129 | 31 I/O queue pairs: 130 | qpair 1/31: ID 1, max qd 1024, prio 0 131 | qpair 2/31: ID 2, max qd 1024, prio 0 132 | qpair 3/31: ID 3, max qd 1024, prio 0 133 | qpair 4/31: ID 4, max qd 1024, prio 0 134 | qpair 5/31: ID 5, max qd 1024, prio 0 135 | qpair 6/31: ID 6, max qd 1024, prio 0 136 | qpair 7/31: ID 7, max qd 1024, prio 0 137 | qpair 8/31: ID 8, max qd 1024, prio 0 138 | qpair 9/31: ID 9, max qd 1024, prio 0 139 | qpair 10/31: ID 10, max qd 1024, prio 0 140 | qpair 11/31: ID 11, max qd 1024, prio 0 141 | qpair 12/31: ID 12, max qd 1024, prio 0 142 | qpair 13/31: ID 13, max qd 1024, prio 0 143 | qpair 14/31: ID 14, max qd 1024, prio 0 144 | qpair 15/31: ID 15, max qd 1024, prio 0 145 | qpair 16/31: ID 16, max qd 1024, prio 0 146 | qpair 17/31: ID 17, max qd 1024, prio 0 147 | qpair 18/31: ID 18, max qd 1024, prio 0 148 | qpair 19/31: ID 19, max qd 1024, prio 0 149 | qpair 20/31: ID 20, max qd 1024, prio 0 150 | qpair 21/31: ID 21, max qd 1024, prio 0 151 | qpair 22/31: ID 22, max qd 1024, prio 0 152 | qpair 23/31: ID 23, max qd 1024, prio 0 153 | qpair 24/31: ID 24, max qd 1024, prio 0 154 | qpair 25/31: ID 25, max qd 1024, prio 0 155 | qpair 26/31: ID 26, max qd 1024, prio 0 156 | qpair 27/31: ID 27, max qd 1024, prio 0 157 | qpair 28/31: ID 28, max qd 1024, prio 0 158 | qpair 29/31: ID 29, max qd 1024, prio 0 159 | qpair 30/31: ID 30, max qd 1024, prio 0 160 | qpair 31/31: ID 31, max qd 1024, prio 0 161 | 162 | The second application is a simple benchmark tool allowing to quickly 163 | measure a device performance. This application is called 164 | nvme_perf. An example run is shown below (512B random reads at queue 165 | depth 1). 166 | 167 | > nvme_perf -ns 1 -qd 1 -rnd pci://0000:03:00.0 512 168 | Opening NVMe controller pci://0000:03:00.0 169 | Attached NVMe controller INTEL SSDPEDMW400G4 (CVCQ542500ZJ400AGN) (1 namespace) 170 | Qpair 1, depth: 1024 171 | Device 0000:03:00.0, namespace 1: 172 | 372.611 GiB capacity (781422768 sectors of 512 B) 173 | Starting test on CPU 0 for 10 seconds: 174 | 100 % read I/O, 0 % write I/Os 175 | 512 B I/O size, random access, qd 1 176 | -> 2653607 I/Os in 10.000 secs 177 | 135.865 MB/sec, 265360 IOPS 178 | 3.768 usecs average I/O latency 179 | Detaching NVMe controller 0000:03:00.0 180 | 181 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | exec autoreconf -f -i 4 | -------------------------------------------------------------------------------- /ci/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ./autogen.sh && 4 | mkdir -p build && 5 | cd build && 6 | ../configure && 7 | make 8 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # Process this file with autoconf to produce a configure script. 2 | 3 | #AC_PREREQ([2.69]) 4 | AC_INIT([libnvme], [0.0.1], [christophe.louargant@wdc.com, damien.lemoal@wdc.com]) 5 | AC_CONFIG_AUX_DIR([build-aux]) 6 | AC_CONFIG_MACRO_DIR([m4]) 7 | AC_CONFIG_HEADER([include/config.h]) 8 | 9 | AC_PREFIX_DEFAULT(/usr) 10 | 11 | AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects]) 12 | AM_SILENT_RULES([yes]) 13 | 14 | AC_PROG_CC 15 | AM_PROG_CC_C_O 16 | 17 | m4_ifdef([AM_PROG_AR], [AM_PROG_AR]) 18 | m4_pattern_allow([AM_PROG_AR]) 19 | LT_INIT 20 | 21 | ACX_PTHREAD 22 | LIBS="$PTHREAD_LIBS $LIBS" 23 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 24 | CC="$PTHREAD_CC" 25 | 26 | # Checks for header files. 27 | AC_CHECK_HEADERS([stddef.h stdint.h stdlib.h \ 28 | string.h inttypes.h \ 29 | unistd.h assert.h]) 30 | AC_CHECK_LIB(pciaccess, pci_device_probe, [], AC_MSG_ERROR([Couldn't find libpciaccess. Try installing pciaccess library.])) 31 | AC_CHECK_HEADER(pciaccess.h, [], [AC_MSG_ERROR([Couldn't find pciaccess.h. Try installing pciaccess library development package.])]) 32 | AC_CHECK_LIB(numa, mbind, [], AC_MSG_ERROR([Couldn't find libnuma. Try installing numa library package.])) 33 | AC_CHECK_HEADER(numaif.h, [], [AC_MSG_ERROR([Couldn't find numaif.h. Try installing numa library development package.])]) 34 | 35 | # CPU flags detection 36 | if grep "^flags.* sse" /proc/cpuinfo > /dev/null; then 37 | have_sse=yes 38 | fi 39 | if grep "^flags.* sse2" /proc/cpuinfo > /dev/null; then 40 | have_sse2=yes 41 | fi 42 | if grep "^flags.* avx" /proc/cpuinfo > /dev/null; then 43 | have_avx=yes 44 | fi 45 | 46 | if test "$have_sse" = "yes"; then have_sse2=yes; fi 47 | if test "$have_sse2" = "yes"; then 48 | AC_DEFINE(__SSE2__,1,[Define to enable SSE/SSE2 optimizations.]) 49 | fi 50 | AM_CONDITIONAL(__SSE2__, test "$have_sse2" = "yes") 51 | 52 | if test "$have_avx" = "yes"; then 53 | AC_DEFINE(__AVX__,1,[Define to enable AVX optimizations.]) 54 | fi 55 | AM_CONDITIONAL(__AVX__, test "$have_avx" = "yes") 56 | 57 | # Checks for library functions. 58 | # AC_CHECK_FUNCS([memset]) 59 | AC_CONFIG_FILES([ 60 | libnvme.pc 61 | Makefile 62 | ]) 63 | 64 | AC_OUTPUT 65 | -------------------------------------------------------------------------------- /exports: -------------------------------------------------------------------------------- 1 | NVME_GLOBAL { 2 | global: 3 | 4 | nvme_lib_init; 5 | 6 | nvme_set_log_facility; 7 | nvme_get_log_facility; 8 | nvme_set_log_level; 9 | nvme_get_log_level; 10 | 11 | nvme_ctrlr_open; 12 | nvme_ctrlr_close; 13 | nvme_ctrlr_stat; 14 | nvme_ctrlr_data; 15 | nvme_ctrlr_set_feature; 16 | nvme_ctrlr_get_feature; 17 | nvme_ctrlr_update_firmware; 18 | 19 | nvme_ctrlr_attach_ns; 20 | nvme_ctrlr_detach_ns; 21 | nvme_ctrlr_create_ns; 22 | nvme_ctrlr_delete_ns; 23 | 24 | nvme_ioqp_get; 25 | nvme_ioqp_release; 26 | nvme_ioqp_submit_cmd; 27 | nvme_ioqp_poll; 28 | nvme_qpair_stat; 29 | 30 | nvme_ns_open; 31 | nvme_ns_close; 32 | nvme_ns_stat; 33 | nvme_ns_data; 34 | 35 | nvme_ns_write; 36 | nvme_ns_writev; 37 | nvme_ns_write_with_md; 38 | nvme_ns_write_zeroes; 39 | nvme_ns_read; 40 | nvme_ns_readv; 41 | nvme_ns_read_with_md; 42 | nvme_ns_deallocate; 43 | nvme_ns_flush; 44 | 45 | nvme_ns_format; 46 | nvme_ns_reservation_register; 47 | nvme_ns_reservation_release; 48 | nvme_ns_reservation_acquire; 49 | nvme_ns_reservation_report; 50 | 51 | nvme_malloc_node; 52 | nvme_free; 53 | nvme_memstat; 54 | 55 | local: 56 | *; 57 | }; 58 | -------------------------------------------------------------------------------- /include/Makemodule.am: -------------------------------------------------------------------------------- 1 | 2 | pkginclude_HEADERS += \ 3 | include/libnvme/nvme.h \ 4 | include/libnvme/nvme_spec.h 5 | 6 | noinst_HEADERS += 7 | -------------------------------------------------------------------------------- /lib/common/Makemodule.am: -------------------------------------------------------------------------------- 1 | 2 | UTILS_CFILES = \ 3 | lib/common/nvme_common.c \ 4 | lib/common/nvme_log.c \ 5 | lib/common/nvme_cpu.c \ 6 | lib/common/nvme_pci.c \ 7 | lib/common/nvme_mem.c 8 | 9 | UTILS_HFILES = \ 10 | include/libnvme/nvme.h \ 11 | include/libnvme/nvme_spec.h \ 12 | lib/common/nvme_arch.h \ 13 | lib/common/nvme_atomic.h \ 14 | lib/common/nvme_rwlock.h \ 15 | lib/common/nvme_spinlock.h \ 16 | lib/common/nvme_common.h \ 17 | lib/common/nvme_log.h \ 18 | lib/common/nvme_cpu.h \ 19 | lib/common/nvme_pci.h \ 20 | lib/common/nvme_mem.h 21 | 22 | AM_CPPFLAGS += -Ilib/common 23 | 24 | libnvme_common_la_SOURCES = $(UTILS_HFILES) $(UTILS_CFILES) 25 | libnvme_common_la_CFLAGS = $(AM_CPPFLAGS) 26 | 27 | -------------------------------------------------------------------------------- /lib/common/nvme_arch.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_ARCH_H__ 35 | #define __NVME_ARCH_H__ 36 | 37 | #if defined(__x86_64__) 38 | 39 | #define NVME_ARCH "x86_64" 40 | #define NVME_ARCH_X86_64 1 41 | #define NVME_ARCH_64 1 42 | #undef NVME_ARCH_X86 43 | #define NVME_CACHE_LINE_SIZE 64 44 | #define NVME_MMIO_64BIT 1 45 | 46 | #elif defined(__i386__) 47 | 48 | #define NVME_ARCH "x86" 49 | #undef NVME_ARCH_X86_64 50 | #undef NVME_ARCH_64 51 | #define NVME_ARCH_X86 1 52 | #define NVME_CACHE_LINE_SIZE 64 53 | #undef NVME_MMIO_64BIT 54 | 55 | #else 56 | 57 | #error "Unsupported architecture type" 58 | 59 | #endif 60 | 61 | #ifndef asm 62 | #define asm __asm__ 63 | #endif 64 | 65 | /* 66 | * Compiler barrier. 67 | * Guarantees that operation reordering does not occur at compile time 68 | * for operations directly before and after the barrier. 69 | */ 70 | #define nvme_compiler_barrier() do { \ 71 | __asm__ volatile ("" : : : "memory"); \ 72 | } while(0) 73 | 74 | /* 75 | * General memory barrier. 76 | * Guarantees that the LOAD and STORE operations generated before the 77 | * barrier occur before the LOAD and STORE operations generated after. 78 | * This function is architecture dependent. 79 | */ 80 | #define nvme_mb() __asm__ volatile("mfence" ::: "memory") 81 | 82 | /* 83 | * Write memory barrier. 84 | * Guarantees that the STORE operations generated before the barrier 85 | * occur before the STORE operations generated after. 86 | * This function is architecture dependent. 87 | */ 88 | #define nvme_wmb() __asm__ volatile("sfence" ::: "memory") 89 | 90 | /* 91 | * Read memory barrier. 92 | * Guarantees that the LOAD operations generated before the barrier 93 | * occur before the LOAD operations generated after. 94 | * This function is architecture dependent. 95 | */ 96 | #define nvme_rmb() __asm__ volatile("lfence" ::: "memory") 97 | 98 | /* 99 | * General memory barrier between CPUs. 100 | * Guarantees that the LOAD and STORE operations that precede the 101 | * nvme_smp_mb() call are globally visible across the lcores 102 | * before the the LOAD and STORE operations that follows it. 103 | */ 104 | #define nvme_smp_mb() nvme_mb() 105 | 106 | /* 107 | * Write memory barrier between CPUs. 108 | * Guarantees that the STORE operations that precede the 109 | * nvme_smp_wmb() call are globally visible across the lcores 110 | * before the the STORE operations that follows it. 111 | */ 112 | #define nvme_smp_wmb() nvme_compiler_barrier() 113 | 114 | /* 115 | * Read memory barrier between CPUs. 116 | * Guarantees that the LOAD operations that precede the 117 | * nvme_smp_rmb() call are globally visible across the lcores 118 | * before the the LOAD operations that follows it. 119 | */ 120 | #define nvme_smp_rmb() nvme_compiler_barrier() 121 | 122 | /* 123 | * Get the number of cycles since boot from the default timer. 124 | */ 125 | static inline __u64 nvme_rdtsc(void) 126 | { 127 | union { 128 | __u64 tsc_64; 129 | struct { 130 | __u32 lo_32; 131 | __u32 hi_32; 132 | }; 133 | } tsc; 134 | 135 | asm volatile("rdtsc" : 136 | "=a" (tsc.lo_32), 137 | "=d" (tsc.hi_32)); 138 | return tsc.tsc_64; 139 | } 140 | 141 | static inline __u32 nvme_mmio_read_4(const volatile __u32 *addr) 142 | { 143 | return *addr; 144 | } 145 | 146 | static inline void nvme_mmio_write_4(volatile __u32 *addr, __u32 val) 147 | { 148 | *addr = val; 149 | } 150 | 151 | static inline __u64 nvme_mmio_read_8(volatile __u64 *addr) 152 | { 153 | #ifdef NVME_MMIO_64BIT 154 | return *addr; 155 | #else 156 | volatile __u32 *addr32 = (volatile __u32 *)addr; 157 | __u64 val; 158 | 159 | /* 160 | * Read lower 4 bytes before upper 4 bytes. 161 | * This particular order is required by I/OAT. 162 | * If the other order is required, use a pair of 163 | * _nvme_mmio_read_4() calls. 164 | */ 165 | val = addr32[0]; 166 | val |= (__u64)addr32[1] << 32; 167 | 168 | return val; 169 | #endif 170 | } 171 | 172 | static inline void nvme_mmio_write_8(volatile __u64 *addr, __u64 val) 173 | { 174 | 175 | #ifdef NVME_MMIO_64BIT 176 | *addr = val; 177 | #else 178 | volatile __u32 *addr32 = (volatile __u32 *)addr; 179 | 180 | addr32[0] = (__u32)val; 181 | addr32[1] = (__u32)(val >> 32); 182 | #endif 183 | } 184 | 185 | #endif /* __NVME_ARCH_H__ */ 186 | -------------------------------------------------------------------------------- /lib/common/nvme_atomic.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_ATOMIC_H__ 35 | #define __NVME_ATOMIC_H__ 36 | 37 | #include "nvme_common.h" 38 | 39 | #include 40 | 41 | /* 42 | * 32 bits atomic counter structure. 43 | */ 44 | typedef struct { 45 | volatile int32_t cnt; 46 | } nvme_atomic_t; 47 | 48 | /* 49 | * Static initializer for an atomic counter. 50 | */ 51 | #define NVME_ATOMIC_INIT(val) { (val) } 52 | 53 | /* 54 | * Initialize an atomic counter. 55 | */ 56 | static inline void nvme_atomic_init(nvme_atomic_t *v) 57 | { 58 | v->cnt = 0; 59 | } 60 | 61 | /* 62 | * Atomically read a value from a counter. 63 | */ 64 | static inline int32_t nvme_atomic_read(const nvme_atomic_t *v) 65 | { 66 | return v->cnt; 67 | } 68 | 69 | /* 70 | * Atomically set a counter to a value. 71 | */ 72 | static inline void nvme_atomic_set(nvme_atomic_t *v, 73 | int32_t new_value) 74 | { 75 | v->cnt = new_value; 76 | } 77 | 78 | /* 79 | * Atomically add a value to an atomic counter. 80 | */ 81 | static inline void nvme_atomic_add(nvme_atomic_t *v, int32_t inc) 82 | { 83 | __sync_fetch_and_add(&v->cnt, inc); 84 | } 85 | 86 | /* 87 | * Atomically subtract a value from an atomic counter. 88 | */ 89 | static inline void nvme_atomic_sub(nvme_atomic_t *v, int32_t dec) 90 | { 91 | __sync_fetch_and_sub(&v->cnt, dec); 92 | } 93 | 94 | /* 95 | * Atomically increment a counter by one. 96 | */ 97 | static inline void nvme_atomic_inc(nvme_atomic_t *v) 98 | { 99 | nvme_atomic_add(v, 1); 100 | } 101 | 102 | /* 103 | * Atomically decrement a counter by one. 104 | */ 105 | static inline void nvme_atomic_dec(nvme_atomic_t *v) 106 | { 107 | nvme_atomic_sub(v,1); 108 | } 109 | 110 | /* 111 | * Atomically add a value to a counter and return the result. 112 | */ 113 | static inline int32_t nvme_atomic_add_return(nvme_atomic_t *v, int32_t inc) 114 | { 115 | return __sync_add_and_fetch(&v->cnt, inc); 116 | } 117 | 118 | /* 119 | * Atomically subtracts a value from the atomic counter 120 | * and returns the value after the subtraction. 121 | */ 122 | static inline int32_t nvme_atomic_sub_return(nvme_atomic_t *v, int32_t dec) 123 | { 124 | return __sync_sub_and_fetch(&v->cnt, dec); 125 | } 126 | 127 | /* 128 | * Atomically increment the atomic by one and returns true if 129 | * the result is 0, or false in all other cases. 130 | */ 131 | static inline int nvme_atomic_inc_and_test(nvme_atomic_t *v) 132 | { 133 | return __sync_add_and_fetch(&v->cnt, 1) == 0; 134 | } 135 | 136 | /* 137 | * Atomically decrements the atomic by one and returns true if 138 | * the result is 0, or false in all other cases. 139 | */ 140 | static inline int nvme_atomic_dec_and_test(nvme_atomic_t *v) 141 | { 142 | return __sync_sub_and_fetch(&v->cnt, 1) == 0; 143 | } 144 | 145 | /* 146 | * Atomically test and set a 32-bit atomic counter. 147 | * If the counter value is already set, return 0 (failed). Otherwise, set 148 | * the counter value to 1 and return 1 (success). 149 | */ 150 | static inline int nvme_atomic_test_and_set(nvme_atomic_t *v) 151 | { 152 | return __sync_bool_compare_and_swap((volatile uint32_t *)&v->cnt, 0, 1); 153 | } 154 | 155 | /* 156 | * Atomically set a counter to 0. 157 | */ 158 | static inline void nvme_atomic_clear(nvme_atomic_t *v) 159 | { 160 | v->cnt = 0; 161 | } 162 | 163 | /* 164 | * The atomic counter structure. 165 | */ 166 | typedef struct { 167 | volatile int64_t cnt; 168 | } nvme_atomic64_t; 169 | 170 | /* 171 | * Static initializer for an atomic counter. 172 | */ 173 | #define NVME_ATOMIC64_INIT(val) { (val) } 174 | 175 | /* 176 | * Initialize an atomic counter. 177 | */ 178 | static inline void nvme_atomic64_init(nvme_atomic64_t *v) 179 | { 180 | #ifdef __LP64__ 181 | v->cnt = 0; 182 | #else 183 | int success = 0; 184 | uint64_t tmp; 185 | 186 | while (success == 0) { 187 | tmp = v->cnt; 188 | success = __sync_bool_compare_and_swap((volatile uint64_t *)&v->cnt, tmp, 0); 189 | } 190 | #endif 191 | } 192 | 193 | /* 194 | * Atomically read a 64-bit counter. 195 | */ 196 | static inline int64_t nvme_atomic64_read(nvme_atomic64_t *v) 197 | { 198 | #ifdef __LP64__ 199 | return v->cnt; 200 | #else 201 | int success = 0; 202 | uint64_t tmp; 203 | 204 | while (success == 0) { 205 | tmp = v->cnt; 206 | /* replace the value by itself */ 207 | success = __sync_bool_compare_and_swap((volatile uint64_t *)&v->cnt, 208 | tmp, tmp); 209 | } 210 | return tmp; 211 | #endif 212 | } 213 | 214 | /* 215 | * Atomically set a 64-bit counter. 216 | */ 217 | static inline void nvme_atomic64_set(nvme_atomic64_t *v, int64_t new_value) 218 | { 219 | #ifdef __LP64__ 220 | v->cnt = new_value; 221 | #else 222 | int success = 0; 223 | uint64_t tmp; 224 | 225 | while (success == 0) { 226 | tmp = v->cnt; 227 | success = __sync_bool_compare_and_swap((volatile uint64_t *)&v->cnt, 228 | tmp, new_value); 229 | } 230 | #endif 231 | } 232 | 233 | /* 234 | * Atomically add a 64-bit value to a counter. 235 | */ 236 | static inline void nvme_atomic64_add(nvme_atomic64_t *v, int64_t inc) 237 | { 238 | __sync_fetch_and_add(&v->cnt, inc); 239 | } 240 | 241 | /* 242 | * Atomically subtract a 64-bit value from a counter. 243 | */ 244 | static inline void nvme_atomic64_sub(nvme_atomic64_t *v, int64_t dec) 245 | { 246 | __sync_fetch_and_sub(&v->cnt, dec); 247 | } 248 | 249 | /* 250 | * Atomically increment a 64-bit counter by one and test. 251 | */ 252 | static inline void nvme_atomic64_inc(nvme_atomic64_t *v) 253 | { 254 | nvme_atomic64_add(v, 1); 255 | } 256 | 257 | /* 258 | * Atomically decrement a 64-bit counter by one and test. 259 | */ 260 | static inline void nvme_atomic64_dec(nvme_atomic64_t *v) 261 | { 262 | nvme_atomic64_sub(v, 1); 263 | } 264 | 265 | /* 266 | * Add a 64-bit value to an atomic counter and return the result. 267 | * Atomically adds the 64-bit value (inc) to the atomic counter (v) and 268 | * returns the value of v after the addition. 269 | */ 270 | static inline int64_t nvme_atomic64_add_return(nvme_atomic64_t *v, int64_t inc) 271 | { 272 | return __sync_add_and_fetch(&v->cnt, inc); 273 | } 274 | 275 | /* 276 | * Subtract a 64-bit value from an atomic counter and return the result. 277 | * Atomically subtracts the 64-bit value (dec) from the atomic counter (v) 278 | * and returns the value of v after the subtraction. 279 | */ 280 | static inline int64_t nvme_atomic64_sub_return(nvme_atomic64_t *v, int64_t dec) 281 | { 282 | return __sync_sub_and_fetch(&v->cnt, dec); 283 | } 284 | 285 | /* 286 | * Atomically increment a 64-bit counter by one and test. 287 | * Atomically increments the atomic counter (v) by one and returns 288 | * true if the result is 0, or false in all other cases. 289 | */ 290 | static inline int nvme_atomic64_inc_and_test(nvme_atomic64_t *v) 291 | { 292 | return nvme_atomic64_add_return(v, 1) == 0; 293 | } 294 | 295 | /* 296 | * Atomically decrement a 64-bit counter by one and test. 297 | * Atomically decrements the atomic counter (v) by one and returns true if 298 | * the result is 0, or false in all other cases. 299 | */ 300 | static inline int nvme_atomic64_dec_and_test(nvme_atomic64_t *v) 301 | { 302 | return nvme_atomic64_sub_return(v, 1) == 0; 303 | } 304 | 305 | /* 306 | * Atomically test and set a 64-bit atomic counter. 307 | * If the counter value is already set, return 0 (failed). Otherwise, set 308 | * the counter value to 1 and return 1 (success). 309 | */ 310 | static inline int nvme_atomic64_test_and_set(nvme_atomic64_t *v) 311 | { 312 | return __sync_bool_compare_and_swap((volatile uint64_t *)&v->cnt, 0, 1); 313 | } 314 | 315 | /* 316 | * Atomically set a 64-bit counter to 0. 317 | */ 318 | static inline void nvme_atomic64_clear(nvme_atomic64_t *v) 319 | { 320 | nvme_atomic64_set(v, 0); 321 | } 322 | 323 | #endif /* __NVME_ATOMIC_H__ */ 324 | -------------------------------------------------------------------------------- /lib/common/nvme_common.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. 5 | * Copyright(c) 2012-2014 6WIND S.A. All rights reserved. 6 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * * Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * * Redistributions in binary form must reproduce the above copyright 15 | * notice, this list of conditions and the following disclaimer in 16 | * the documentation and/or other materials provided with the 17 | * distribution. 18 | * * Neither the name of Intel Corporation nor the names of its 19 | * contributors may be used to endorse or promote products derived 20 | * from this software without specific prior written permission. 21 | * 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 | */ 34 | 35 | #include "nvme_pci.h" 36 | #include "nvme_common.h" 37 | #include "nvme_mem.h" 38 | #include "nvme_cpu.h" 39 | 40 | #include 41 | #include 42 | #include 43 | 44 | #if defined(NVME_ARCH_X86) 45 | #include 46 | #endif 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | /* 53 | * Trim whitespace from a string in place. 54 | */ 55 | void nvme_str_trim(char *s) 56 | { 57 | char *p, *q; 58 | 59 | /* Remove header */ 60 | p = s; 61 | while (*p != '\0' && isspace(*p)) 62 | p++; 63 | 64 | /* Remove tailer */ 65 | q = p + strlen(p); 66 | while (q - 1 >= p && isspace(*(q - 1))) { 67 | q--; 68 | *q = '\0'; 69 | } 70 | 71 | /* if remove header, move */ 72 | if (p != s) { 73 | q = s; 74 | while (*p != '\0') 75 | *q++ = *p++; 76 | *q = '\0'; 77 | } 78 | } 79 | 80 | /* 81 | * Split string into tokens 82 | */ 83 | int nvme_str_split(char *string, int stringlen, 84 | char **tokens, int maxtokens, char delim) 85 | { 86 | int i, tok = 0; 87 | int tokstart = 1; 88 | 89 | if (string == NULL || tokens == NULL) { 90 | errno = EINVAL; 91 | return -1; 92 | } 93 | 94 | for (i = 0; i < stringlen; i++) { 95 | if (string[i] == '\0' || tok >= maxtokens) 96 | break; 97 | if (tokstart) { 98 | tokstart = 0; 99 | tokens[tok++] = &string[i]; 100 | } 101 | if (string[i] == delim) { 102 | string[i] = '\0'; 103 | tokstart = 1; 104 | } 105 | } 106 | 107 | return tok; 108 | } 109 | 110 | /* 111 | * Parse a sysfs (or other) file containing one integer value 112 | */ 113 | int nvme_parse_sysfs_value(const char *filename, 114 | unsigned long *val) 115 | { 116 | FILE *f; 117 | char buf[BUFSIZ]; 118 | char *end = NULL; 119 | 120 | if ((f = fopen(filename, "r")) == NULL) { 121 | nvme_err("%s(): cannot open sysfs value %s\n", 122 | __func__, filename); 123 | return -1; 124 | } 125 | 126 | if (fgets(buf, sizeof(buf), f) == NULL) { 127 | nvme_err("%s(): cannot read sysfs value %s\n", 128 | __func__, filename); 129 | fclose(f); 130 | return -1; 131 | } 132 | *val = strtoul(buf, &end, 0); 133 | if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { 134 | nvme_err("%s(): cannot parse sysfs value %s\n", 135 | __func__, filename); 136 | fclose(f); 137 | return -1; 138 | } 139 | fclose(f); 140 | return 0; 141 | } 142 | 143 | /* 144 | * Get a block device block size in Bytes. 145 | */ 146 | ssize_t nvme_dev_get_blocklen(int fd) 147 | { 148 | uint32_t blocklen = 0; 149 | 150 | if (ioctl(fd, BLKSSZGET, &blocklen) < 0) { 151 | nvme_err("iioctl BLKSSZGET failed %d (%s)\n", 152 | errno, 153 | strerror(errno)); 154 | return -1; 155 | } 156 | 157 | return blocklen; 158 | } 159 | 160 | /* 161 | * Get a file size in Bytes. 162 | */ 163 | uint64_t nvme_file_get_size(int fd) 164 | { 165 | struct stat st; 166 | uint64_t size; 167 | 168 | if (fstat(fd, &st) != 0) 169 | return 0; 170 | 171 | if (S_ISLNK(st.st_mode)) 172 | return 0; 173 | 174 | if (S_ISBLK(st.st_mode) || S_ISCHR(st.st_mode)) { 175 | if (ioctl(fd, BLKGETSIZE64, &size) == 0) 176 | return size; 177 | else 178 | return 0; 179 | } 180 | 181 | if (S_ISREG(st.st_mode)) 182 | return st.st_size; 183 | 184 | /* Not REG, CHR or BLK */ 185 | return 0; 186 | } 187 | 188 | /* 189 | * Dump the stack of the calling core. 190 | */ 191 | static void nvme_dump_stack(void) 192 | { 193 | #define BACKTRACE_SIZE 256 194 | void *func[BACKTRACE_SIZE]; 195 | char **symb = NULL; 196 | int size; 197 | 198 | size = backtrace(func, BACKTRACE_SIZE); 199 | symb = backtrace_symbols(func, size); 200 | 201 | if (symb == NULL) 202 | return; 203 | 204 | while (size > 0) { 205 | nvme_crit("%d: [%s]\n", size, symb[size - 1]); 206 | size --; 207 | } 208 | 209 | free(symb); 210 | } 211 | 212 | /* 213 | * call abort(), it will generate a coredump if enabled. 214 | */ 215 | void __nvme_panic(const char *funcname, const char *format, ...) 216 | { 217 | va_list ap; 218 | 219 | nvme_crit("PANIC in %s():\n", funcname); 220 | va_start(ap, format); 221 | nvme_vlog(NVME_LOG_CRIT, format, ap); 222 | va_end(ap); 223 | nvme_dump_stack(); 224 | abort(); 225 | } 226 | 227 | /** 228 | * Library initialization: must be run first by any application 229 | * before calling any libnvme API. 230 | */ 231 | int nvme_lib_init(enum nvme_log_level level, 232 | enum nvme_log_facility facility, const char *path) 233 | { 234 | int ret; 235 | 236 | /* Set log level and facility first */ 237 | nvme_set_log_level(level); 238 | nvme_set_log_facility(facility, path); 239 | 240 | /* Gather CPU information */ 241 | ret = nvme_cpu_init(); 242 | if (ret != 0) { 243 | nvme_crit("Failed to gather CPU information\n"); 244 | goto out; 245 | } 246 | 247 | /* PCI subsystem initialization (libpciaccess) */ 248 | ret = nvme_pci_init(); 249 | if (ret != 0) { 250 | nvme_crit("PCI subsystem initialization failed\n"); 251 | goto out; 252 | } 253 | 254 | /* Initialize memory management */ 255 | ret = nvme_mem_init(); 256 | if (ret != 0) 257 | nvme_crit("Memory management initialization failed\n"); 258 | 259 | out: 260 | 261 | return ret; 262 | } 263 | 264 | /* 265 | * Will be executed automatically last on termination of the user application. 266 | */ 267 | __attribute__((destructor)) void nvme_lib_exit(void) 268 | { 269 | 270 | nvme_ctrlr_cleanup(); 271 | 272 | nvme_mem_cleanup(); 273 | 274 | } 275 | -------------------------------------------------------------------------------- /lib/common/nvme_common.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_COMMON_H__ 35 | #define __NVME_COMMON_H__ 36 | 37 | #define _GNU_SOURCE 38 | #define _FILE_OFFSET_BITS 64 39 | 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | #include 55 | #include 56 | #include 57 | 58 | #include "libnvme/nvme.h" 59 | #include "nvme_arch.h" 60 | #include "nvme_log.h" 61 | 62 | /* 63 | * Check if a branch is likely to be taken. 64 | */ 65 | #ifndef likely 66 | #define likely(x) __builtin_expect((x),1) 67 | #endif /* likely */ 68 | 69 | /* 70 | * Check if a branch is unlikely to be taken. 71 | */ 72 | #ifndef unlikely 73 | #define unlikely(x) __builtin_expect((x),0) 74 | #endif /* unlikely */ 75 | 76 | #ifndef typeof 77 | #define typeof __typeof__ 78 | #endif 79 | 80 | /* 81 | * Trim whitespace from a string in place. 82 | */ 83 | extern void nvme_str_trim(char *s); 84 | 85 | /* 86 | * Split string into tokens 87 | */ 88 | extern int nvme_str_split(char *string, int stringlen, 89 | char **tokens, int maxtokens, char delim); 90 | 91 | /* 92 | * Converts a numeric string to the equivalent uint64_t value. 93 | * As well as straight number conversion, also recognises the suffixes 94 | * k, m and g for kilobytes, megabytes and gigabytes respectively. 95 | * 96 | * If a negative number is passed in, zero is returned. 97 | * Zero is also returned in the case of an error with the 98 | * strtoull call in the function. 99 | */ 100 | static inline size_t nvme_str2size(const char *str) 101 | { 102 | unsigned long long size; 103 | char *endptr; 104 | 105 | while (isspace((int)*str)) 106 | str++; 107 | if (*str == '-') 108 | return 0; 109 | 110 | errno = 0; 111 | size = strtoull(str, &endptr, 0); 112 | if (errno) 113 | return 0; 114 | 115 | /* Allow 1 space gap between number and unit */ 116 | if (*endptr == ' ') 117 | endptr++; 118 | 119 | switch (*endptr){ 120 | case 'G': 121 | case 'g': 122 | size *= 1024; 123 | /* Fall through */ 124 | case 'M': 125 | case 'm': 126 | size *= 1024; 127 | /* Fall through */ 128 | case 'K': 129 | case 'k': 130 | size *= 1024; 131 | } 132 | 133 | return size; 134 | } 135 | 136 | /* 137 | * Function to read a single numeric value from a file on the filesystem. 138 | * Used to read information from files on /sys 139 | */ 140 | extern int nvme_parse_sysfs_value(const char *filename, unsigned long *val); 141 | 142 | /* 143 | * Get a file size in Bytes. 144 | */ 145 | extern uint64_t nvme_file_get_size(int fd); 146 | 147 | /* 148 | * Get a block device block size in Bytes. 149 | */ 150 | extern ssize_t nvme_dev_get_blocklen(int fd); 151 | 152 | /* 153 | * Get current time in nano seconds. 154 | */ 155 | static inline unsigned long long nvme_time_nsec(void) 156 | { 157 | struct timespec ts; 158 | 159 | clock_gettime(CLOCK_REALTIME, &ts); 160 | 161 | return (unsigned long long) ts.tv_sec * 1000000000ULL 162 | + (unsigned long long) ts.tv_nsec; 163 | } 164 | 165 | /* 166 | * Get current time in micro seconds. 167 | */ 168 | static inline unsigned long long nvme_time_usec(void) 169 | { 170 | return nvme_time_nsec() / 1000; 171 | } 172 | 173 | /* 174 | * Get current time in milli seconds. 175 | */ 176 | static inline unsigned long long nvme_time_msec(void) 177 | { 178 | return nvme_time_nsec() / 1000000; 179 | } 180 | 181 | /* 182 | * PAUSE instruction for tight loops (avoid busy waiting) 183 | */ 184 | #ifdef __SSE2__ 185 | #include 186 | static inline void nvme_pause(void) 187 | { 188 | _mm_pause(); 189 | } 190 | #else 191 | static inline void nvme_pause(void) {} 192 | #endif 193 | 194 | /* 195 | * Micro-seconds sleep. 196 | */ 197 | static inline void nvme_usleep(int usecs) 198 | { 199 | struct timeval tv; 200 | 201 | tv.tv_sec = usecs / 1000000; 202 | tv.tv_usec = usecs % 1000000; 203 | select(0, NULL, NULL, NULL, &tv); 204 | } 205 | 206 | /* 207 | * Milli-seconds sleep. 208 | */ 209 | static inline void nvme_msleep(int msecs) 210 | { 211 | struct timeval tv; 212 | 213 | tv.tv_sec = msecs / 1000; 214 | tv.tv_usec = (msecs - tv.tv_sec * 1000) * 1000; 215 | select(0, NULL, NULL, NULL, &tv); 216 | } 217 | 218 | /* 219 | * Provide notification of a critical non-recoverable error and stop. 220 | * This function should not be called directly. Use nvme_panic() instead. 221 | */ 222 | extern void __nvme_panic(const char *funcname , const char *format, ...) 223 | #ifdef __GNUC__ 224 | #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) 225 | __attribute__((cold)) 226 | #endif 227 | #endif 228 | __attribute__((noreturn)) 229 | __attribute__((format(printf, 2, 3))); 230 | 231 | /* 232 | * Provide notification of a critical non-recoverable error 233 | * and terminate execution abnormally. 234 | */ 235 | #define nvme_panic(format, args...) \ 236 | __nvme_panic(__FUNCTION__, format, ## args) 237 | 238 | /* 239 | * Macro to evaluate a scalar expression and 240 | * abort the program if the assertion is false. 241 | */ 242 | #define _nvme_assert_default(exp) \ 243 | do { \ 244 | if (unlikely(!(exp))) \ 245 | nvme_panic("line %d, assert %s failed\n", \ 246 | __LINE__, # exp); \ 247 | } while (0) 248 | 249 | #define _nvme_assert_msg(exp, msg) \ 250 | do { \ 251 | if (unlikely(!(exp))) \ 252 | nvme_panic("%s\n", msg); \ 253 | } while (0) 254 | 255 | #define _NVME_GET_ASSERT_OVERLOAD(_1, _2, NAME, args...) NAME 256 | 257 | #define nvme_assert(args...) \ 258 | _NVME_GET_ASSERT_OVERLOAD(args, \ 259 | _nvme_assert_msg, \ 260 | _nvme_assert_default) \ 261 | (args) 262 | 263 | /* 264 | * Macro to return the minimum of two numbers 265 | */ 266 | #define nvme_min(a, b) ({ \ 267 | typeof (a) _a = (a); \ 268 | typeof (b) _b = (b); \ 269 | _a < _b ? _a : _b; \ 270 | }) 271 | 272 | /* 273 | * Macro to return the maximum of two numbers 274 | */ 275 | #define nvme_max(a, b) ({ \ 276 | typeof (a) _a = (a); \ 277 | typeof (b) _b = (b); \ 278 | _a > _b ? _a : _b; \ 279 | }) 280 | 281 | /* 282 | * Returns true if n is a power of 2. 283 | */ 284 | static inline int nvme_is_pow2(__u64 v) 285 | { 286 | return v && !(v & (v - 1)); 287 | } 288 | 289 | /* 290 | * Return the power of 2 immediately after v. 291 | */ 292 | static inline __u64 nvme_align_pow2(__u64 v) 293 | { 294 | v--; 295 | v |= v >> 1; 296 | v |= v >> 2; 297 | v |= v >> 4; 298 | v |= v >> 8; 299 | v |= v >> 16; 300 | v |= v >> 32; 301 | 302 | return v + 1; 303 | } 304 | 305 | /* 306 | * Calculate log2 of a power of 2 size. 307 | */ 308 | static inline size_t nvme_log2(size_t size) 309 | { 310 | size_t bits = 0; 311 | 312 | if (!nvme_is_pow2(size)) 313 | return 0; 314 | 315 | while (size >>= 1) 316 | bits++; 317 | 318 | return bits; 319 | } 320 | 321 | /* 322 | * Handle alignements. 323 | */ 324 | #define nvme_align_down(val, align) \ 325 | ((val) & (~((typeof(val))((align) - 1)))) 326 | #define nvme_align_up(val, align) \ 327 | nvme_align_down((val) + (align) - 1, (align)) 328 | 329 | /* 330 | * Test a bit value. 331 | */ 332 | static inline int test_bit(__u8 *bitmap, unsigned int bit) 333 | { 334 | return bitmap[bit >> 3] & (1U << (bit & 0x7)); 335 | } 336 | 337 | /* 338 | * Set a bit. 339 | */ 340 | static inline void set_bit(__u8 *bitmap, unsigned int bit) 341 | { 342 | bitmap[bit >> 3] |= 1U << (bit & 0x7); 343 | } 344 | 345 | /* 346 | * Clear a bit. 347 | */ 348 | static inline void clear_bit(__u8 *bitmap, unsigned int bit) 349 | { 350 | bitmap[bit >> 3] &= ~(1U << (bit & 0x7)); 351 | } 352 | 353 | /* 354 | * Find the first zero bit in a bitmap of size nr_bits. 355 | * If no zero bit is found, return -1. 356 | */ 357 | static inline int find_first_zero_bit(__u8 *bitmap, unsigned int nr_bits) 358 | { 359 | __u64 *b = (__u64 *)bitmap; 360 | unsigned int i, j, bit, count = (nr_bits + 63) >> 6; 361 | 362 | for(i = 0; i < count; i++) { 363 | if (b[i] != ~0UL) 364 | break; 365 | } 366 | 367 | bit = i << 6; 368 | for (j = bit; j < nr_bits; j++) { 369 | if (!test_bit(bitmap, j)) 370 | return j; 371 | } 372 | 373 | return -1; 374 | } 375 | 376 | /* 377 | * Close all open controllers on exit. 378 | * Defined in lib/nvme/nvme.c 379 | */ 380 | extern void nvme_ctrlr_cleanup(void); 381 | 382 | #endif /* __NVME_COMMON_H__ */ 383 | -------------------------------------------------------------------------------- /lib/common/nvme_cpu.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include "nvme_common.h" 35 | #include "nvme_cpu.h" 36 | 37 | #include 38 | 39 | struct nvme_cpu_info cpui; 40 | 41 | /* 42 | * Check if a cpu is present by the presence 43 | * of the cpu information for it. 44 | */ 45 | static bool nvme_cpu_present(unsigned int cpu_id) 46 | { 47 | char path[128]; 48 | 49 | snprintf(path, sizeof(path), 50 | "/sys/devices/system/cpu/cpu%u/topology/core_id", 51 | cpu_id); 52 | 53 | return access(path, F_OK) == 0; 54 | } 55 | 56 | /* 57 | * Count the number of sockets. 58 | */ 59 | static unsigned int nvme_socket_count(void) 60 | { 61 | unsigned int socket; 62 | char path[128]; 63 | unsigned int n = 0; 64 | 65 | for (socket = 0; socket < NVME_SOCKET_MAX; socket++) { 66 | snprintf(path, sizeof(path), 67 | "/sys/devices/system/node/node%u", 68 | socket); 69 | if (!access(path, F_OK) == 0) 70 | break; 71 | n++; 72 | } 73 | 74 | return n; 75 | } 76 | 77 | /* 78 | * Get the socket ID (NUMA node) of a CPU. 79 | */ 80 | static unsigned int nvme_cpu_socket_id(unsigned int cpu_id) 81 | { 82 | char path[128]; 83 | unsigned long id; 84 | 85 | snprintf(path, sizeof(path), 86 | "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", 87 | cpu_id); 88 | if (nvme_parse_sysfs_value(path, &id) != 0) { 89 | nvme_err("Parse %s failed\n", path); 90 | return 0; 91 | } 92 | 93 | return id; 94 | } 95 | 96 | /* 97 | * Get the core ID of a CPU. 98 | */ 99 | static unsigned int nvme_cpu_core_id(unsigned int cpu_id) 100 | { 101 | char path[128]; 102 | unsigned long id; 103 | 104 | snprintf(path, sizeof(path), 105 | "/sys/devices/system/cpu/cpu%u/topology/core_id", 106 | cpu_id); 107 | if (nvme_parse_sysfs_value(path, &id) != 0) { 108 | nvme_err("Parse %s failed\n", path); 109 | return 0; 110 | } 111 | 112 | return id; 113 | } 114 | 115 | /* 116 | * Get the thread ID of a CPU. 117 | */ 118 | static unsigned int nvme_cpu_thread_id(unsigned int cpu_id) 119 | { 120 | struct nvme_cpu *cpu = &cpui.cpu[cpu_id]; 121 | unsigned int i, thid = 0; 122 | 123 | for (i = 0; i < cpui.nr_cpus; i++) { 124 | if (cpui.cpu[i].socket == cpu->socket && 125 | cpui.cpu[i].core == cpu->core) 126 | thid++; 127 | } 128 | 129 | return thid; 130 | } 131 | 132 | /* 133 | * Parse /sys/devices/system/cpu to initialize CPU information. 134 | */ 135 | int nvme_cpu_init(void) 136 | { 137 | struct nvme_cpu *cpu; 138 | unsigned int i; 139 | 140 | memset(&cpui, 0, sizeof(struct nvme_cpu_info)); 141 | cpui.nr_sockets = nvme_socket_count(); 142 | 143 | for (i = 0; i < NVME_CPU_MAX; i++) { 144 | 145 | cpu = &cpui.cpu[i]; 146 | cpu->id = -1; 147 | 148 | /* init cpuset for per lcore config */ 149 | cpu->present = nvme_cpu_present(i); 150 | if (!cpu->present) { 151 | continue; 152 | } 153 | 154 | cpu->id = i; 155 | cpu->socket = nvme_cpu_socket_id(i); 156 | cpu->core = nvme_cpu_core_id(i); 157 | cpu->thread = nvme_cpu_thread_id(i); 158 | 159 | cpui.nr_cpus++; 160 | if (cpu->thread == 0) 161 | cpui.nr_cores++; 162 | 163 | nvme_debug("CPU %02u: socket %02u, core %02u, thread %u\n", 164 | cpu->id, cpu->socket, cpu->core, cpu->thread); 165 | 166 | } 167 | 168 | nvme_info("Detected %u CPUs: %u sockets, %u cores, %u threads\n", 169 | cpui.nr_cpus, 170 | cpui.nr_sockets, 171 | cpui.nr_cores, 172 | cpui.nr_cpus); 173 | 174 | return 0; 175 | } 176 | 177 | /* 178 | * Get caller current CPU. 179 | */ 180 | struct nvme_cpu *nvme_get_cpu(void) 181 | { 182 | int cpu; 183 | 184 | /* 185 | * Get current CPU. If trhe caller thread is not pinned down 186 | * to a particular CPU using sched_setaffinity, this result 187 | * may be only temporary. 188 | */ 189 | cpu = sched_getcpu(); 190 | if (cpu < 0) { 191 | nvme_err("sched_getcpu failed %d (%s)\n", 192 | errno, strerror(errno)); 193 | return NULL; 194 | } 195 | 196 | if (cpu >= (int)cpui.nr_cpus) { 197 | nvme_err("Invalid CPU number %d (Max %u)\n", 198 | cpu, cpui.nr_cpus - 1); 199 | return NULL; 200 | } 201 | 202 | return &cpui.cpu[cpu]; 203 | } 204 | 205 | 206 | -------------------------------------------------------------------------------- /lib/common/nvme_cpu.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_CPU_H__ 35 | #define __NVME_CPU_H__ 36 | 37 | #include "nvme_common.h" 38 | 39 | #include 40 | 41 | /* 42 | * Maximum number of CPU supported. 43 | */ 44 | #define NVME_CPU_MAX 64 45 | 46 | /* 47 | * Undefined CPU ID. 48 | */ 49 | #define NVME_CPU_ID_ANY UINT_MAX 50 | 51 | /* 52 | * Maximum number of sockets supported. 53 | */ 54 | #define NVME_SOCKET_MAX 32 55 | 56 | /* 57 | * Undefined SOCKET ID. 58 | */ 59 | #define NVME_SOCKET_ID_ANY UINT_MAX 60 | 61 | /* 62 | * System CPU descriptor. 63 | */ 64 | struct nvme_cpu { 65 | 66 | /* 67 | * CPU ID. 68 | */ 69 | unsigned int id; 70 | 71 | /* 72 | * Socket number. 73 | */ 74 | unsigned int socket; 75 | 76 | /* 77 | * Core number. 78 | */ 79 | unsigned int core; 80 | 81 | /* 82 | * Thread number. 83 | */ 84 | unsigned int thread; 85 | 86 | /* 87 | * CPU preset. 88 | */ 89 | bool present; 90 | 91 | }; 92 | 93 | /* 94 | * System CPU information. 95 | */ 96 | struct nvme_cpu_info { 97 | 98 | /* 99 | * Total number of CPUs. 100 | */ 101 | unsigned int nr_cpus; 102 | 103 | /* 104 | * CPU information. 105 | */ 106 | struct nvme_cpu cpu[NVME_CPU_MAX]; 107 | 108 | /* 109 | * Number of sockets. 110 | */ 111 | unsigned int nr_sockets; 112 | 113 | /* 114 | * Number of CPU cores. 115 | */ 116 | unsigned int nr_cores; 117 | 118 | 119 | }; 120 | 121 | extern struct nvme_cpu_info cpui; 122 | 123 | /* 124 | * Initialize system CPU information. 125 | */ 126 | extern int nvme_cpu_init(void); 127 | 128 | /* 129 | * Return the CPU of the caller. 130 | */ 131 | extern struct nvme_cpu *nvme_get_cpu(void); 132 | 133 | /* 134 | * Return the CPU ID of the caller. 135 | */ 136 | static inline unsigned int nvme_cpu_id(void) 137 | { 138 | struct nvme_cpu *cpu = nvme_get_cpu(); 139 | 140 | return cpu ? cpu->id : NVME_CPU_ID_ANY; 141 | } 142 | 143 | /* 144 | * Return the Socket ID of the caller. 145 | */ 146 | static inline unsigned int nvme_socket_id(void) 147 | { 148 | struct nvme_cpu *cpu = nvme_get_cpu(); 149 | 150 | return cpu ? cpu->socket : NVME_SOCKET_ID_ANY; 151 | } 152 | 153 | #endif /* __NVME_CPU_H__ */ 154 | -------------------------------------------------------------------------------- /lib/common/nvme_log.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include "nvme_common.h" 35 | 36 | #include 37 | #include 38 | 39 | /* 40 | * Log control structure: initialize to default early log, 41 | * which is stdout output and INFO level for all log types. 42 | */ 43 | static struct nvme_log log = { 44 | .level = NVME_LOG_NOTICE, 45 | .facility = NVME_LOG_STDOUT, 46 | .file = NULL, 47 | }; 48 | 49 | /* 50 | * Close the current log facility. 51 | */ 52 | static void nvme_close_log(void) 53 | { 54 | 55 | switch (log.facility) { 56 | case NVME_LOG_FILE: 57 | if (log.file) { 58 | fflush(log.file); 59 | fclose(log.file); 60 | log.file = NULL; 61 | } 62 | break; 63 | case NVME_LOG_SYSLOG: 64 | closelog(); 65 | break; 66 | case NVME_LOG_STDOUT: 67 | default: 68 | break; 69 | } 70 | 71 | log.facility = NVME_LOG_STDOUT; 72 | } 73 | 74 | /* 75 | * Send a log message to syslog. 76 | */ 77 | static void nvme_vlog_syslog(enum nvme_log_level level, const char *format, 78 | va_list ap) 79 | { 80 | char buf[BUFSIZ + 1]; 81 | int ret; 82 | 83 | /* Generate message in buf */ 84 | ret = vsnprintf(buf, BUFSIZ, format, ap); 85 | if (ret > 0) { 86 | buf[ret] = '\0'; 87 | syslog(log.level - 1, "%s", buf); 88 | } 89 | } 90 | 91 | /* 92 | * Open a new log facility. 93 | */ 94 | int nvme_set_log_facility(enum nvme_log_facility facility, const char *path) 95 | { 96 | int ret = 0; 97 | 98 | /* Close current log */ 99 | nvme_close_log(); 100 | 101 | switch (facility) { 102 | case NVME_LOG_STDOUT: 103 | /* Nothing to do */ 104 | break; 105 | case NVME_LOG_FILE: 106 | log.file = fopen(path, "w+"); 107 | if (!log.file) { 108 | ret = -errno; 109 | goto out; 110 | } 111 | break; 112 | case NVME_LOG_SYSLOG: 113 | openlog(path, LOG_NDELAY | LOG_PID, LOG_USER); 114 | ret = 0; 115 | break; 116 | default: 117 | ret = -EINVAL; 118 | goto out; 119 | } 120 | 121 | log.facility = facility; 122 | 123 | out: 124 | 125 | if (ret != 0) { 126 | /* Fallback to default on error */ 127 | log.facility = NVME_LOG_STDOUT; 128 | log.file = NULL; 129 | } 130 | 131 | return ret; 132 | } 133 | 134 | /* 135 | * Get log facility. 136 | */ 137 | enum nvme_log_facility nvme_get_log_facility(void) 138 | { 139 | return log.facility; 140 | } 141 | 142 | /* 143 | * Set log level. 144 | */ 145 | void nvme_set_log_level(enum nvme_log_level level) 146 | { 147 | if ((level >= NVME_LOG_EMERG) && (level <= NVME_LOG_DEBUG)) 148 | log.level = level; 149 | } 150 | 151 | /* 152 | * Get log level. 153 | */ 154 | enum nvme_log_level nvme_get_log_level(void) 155 | { 156 | return log.level; 157 | } 158 | 159 | /* 160 | * Generates a log message. 161 | */ 162 | void nvme_vlog(enum nvme_log_level level, const char *format, va_list ap) 163 | { 164 | FILE *f = log.file; 165 | 166 | if (level > log.level) 167 | return; 168 | 169 | switch(log.facility) { 170 | case NVME_LOG_STDOUT: 171 | f = stdout; 172 | /* fallthru */ 173 | case NVME_LOG_FILE: 174 | if (f) { 175 | (void)vfprintf(f, format, ap); 176 | fflush(f); 177 | } 178 | break; 179 | case NVME_LOG_SYSLOG: 180 | nvme_vlog_syslog(level, format, ap); 181 | break; 182 | } 183 | } 184 | 185 | /* 186 | * Generates a log message. 187 | */ 188 | void nvme_log(enum nvme_log_level level, const char *format, ...) 189 | { 190 | va_list ap; 191 | 192 | va_start(ap, format); 193 | nvme_vlog(level, format, ap); 194 | va_end(ap); 195 | } 196 | -------------------------------------------------------------------------------- /lib/common/nvme_log.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_LOG_H__ 35 | #define __NVME_LOG_H__ 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | /* 42 | * Log control structure. 43 | */ 44 | struct nvme_log { 45 | 46 | /* 47 | * Log level. 48 | */ 49 | enum nvme_log_level level; 50 | 51 | /* 52 | * Log facility (output target) 53 | */ 54 | enum nvme_log_facility facility; 55 | 56 | /* 57 | * The output file for the NVME_LOG_FILE facility. 58 | */ 59 | FILE *file; 60 | 61 | }; 62 | 63 | /* 64 | * Generates a log message. 65 | * The message will be sent to the current log facility. 66 | * The level argument determines if the log should be displayed or 67 | * not, depending on the current library log level. 68 | */ 69 | extern void nvme_log(enum nvme_log_level level, 70 | const char *format, ...) 71 | #ifdef __GNUC__ 72 | #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) 73 | __attribute__((cold)) 74 | #endif 75 | #endif 76 | __attribute__((format(printf, 2, 3))); 77 | 78 | /* 79 | * Generates a log message. 80 | * The message will be sent to the current log facility. 81 | * The level argument determines if the log should be displayed or 82 | * not, depending on the current library log level. 83 | */ 84 | extern void nvme_vlog(enum nvme_log_level level, 85 | const char *format, 86 | va_list ap) 87 | __attribute__((format(printf,2,0))); 88 | 89 | /* System is unusable */ 90 | #define nvme_emerg(format, args...) \ 91 | nvme_log(NVME_LOG_EMERG, \ 92 | "libnvme (FATAL): " format, \ 93 | ## args) 94 | 95 | /* Action must be taken immediately */ 96 | #define nvme_alert(format, args...) \ 97 | nvme_log(NVME_LOG_ALERT, \ 98 | "libnvme (ALERT): " format, \ 99 | ## args) 100 | 101 | /* Critical conditions */ 102 | #define nvme_crit(format, args...) \ 103 | nvme_log(NVME_LOG_CRIT, \ 104 | "libnvme (CRITICAL): " format, \ 105 | ## args) 106 | 107 | /* Error conditions */ 108 | #define nvme_err(format, args...) \ 109 | nvme_log(NVME_LOG_ERR, \ 110 | "libnvme (ERROR): " format, \ 111 | ## args) 112 | 113 | /* Warning conditions */ 114 | #define nvme_warning(format, args...) \ 115 | nvme_log(NVME_LOG_WARNING, \ 116 | "libnvme (WARNING): " format, \ 117 | ## args) 118 | 119 | /* Normal but significant condition */ 120 | #define nvme_notice(format, args...) \ 121 | nvme_log(NVME_LOG_NOTICE, \ 122 | "libnvme: " format, \ 123 | ## args) 124 | 125 | /* Informational */ 126 | #define nvme_info(format, args...) \ 127 | nvme_log(NVME_LOG_INFO, \ 128 | "libnvme: " format, \ 129 | ## args) 130 | 131 | /* Debug-level messages */ 132 | #define nvme_debug(format, args...) \ 133 | nvme_log(NVME_LOG_DEBUG, \ 134 | "libnvme: " format, \ 135 | ## args) 136 | 137 | #endif /* __NVME_LOG_H__ */ 138 | -------------------------------------------------------------------------------- /lib/common/nvme_mem.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 3 | * 4 | * SPDX-License-Identifier: BSD-2-Clause 5 | * Please see COPYING file for license text. 6 | */ 7 | 8 | #include "nvme_mem.h" 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | /* 17 | * Memory management information. 18 | */ 19 | static struct nvme_mem mm; 20 | 21 | /* 22 | * Find where hugetlbfs is mounted. 23 | */ 24 | static int nvme_mem_get_hp_dir(void) 25 | { 26 | char dev[64], dir[PATH_MAX]; 27 | char type[64], opts[128]; 28 | char *mntdir = NULL; 29 | int n, tmp1, tmp2; 30 | char buf[512]; 31 | FILE *f; 32 | 33 | f = fopen("/proc/mounts", "r"); 34 | if (!f) { 35 | nvme_err("Open /proc/mounts failed\n"); 36 | return -ENOENT; 37 | } 38 | 39 | while (fgets(buf, sizeof(buf), f)) { 40 | n = sscanf(buf, "%s %s %s %s %d %d", 41 | dev, dir, type, opts, &tmp1, &tmp2); 42 | if (n != 6) 43 | continue; 44 | 45 | if (strcmp(type, "hugetlbfs") == 0) { 46 | mntdir = dir; 47 | break; 48 | } 49 | } 50 | 51 | fclose(f); 52 | 53 | if (!mntdir) { 54 | nvme_err("hugetlbfs mount not found\n"); 55 | return -ENOENT; 56 | } 57 | 58 | nvme_debug("hugetlbfs mounted at %s\n", mntdir); 59 | 60 | /* Create a unique subdirectory in the mount point for this process */ 61 | asprintf(&mm.hp_dir, "%s/libnvme.%d.XXXXXX", mntdir, getpid()); 62 | if (!mm.hp_dir) 63 | return -ENOMEM; 64 | if (!mkdtemp(mm.hp_dir)) { 65 | nvme_err("Create hugepage directory %s failed %d (%s)\n", 66 | mm.hp_dir, errno, strerror(errno)); 67 | free(mm.hp_dir); 68 | mm.hp_dir = NULL; 69 | return -errno; 70 | } 71 | 72 | nvme_debug("Using hugepage directory %s\n", 73 | mm.hp_dir); 74 | 75 | return 0; 76 | } 77 | 78 | /* 79 | * Determine the size of hugepages. 80 | */ 81 | static size_t nvme_mem_get_hp_size(void) 82 | { 83 | char buf[256]; 84 | size_t size = 0; 85 | FILE *f; 86 | 87 | f = fopen("/proc/meminfo", "r"); 88 | if (f == NULL) { 89 | nvme_err("Open /proc/meminfo failed\n"); 90 | return 0; 91 | } 92 | 93 | while(fgets(buf, sizeof(buf), f)) { 94 | if (strncmp(buf, "Hugepagesize:", 13) == 0) { 95 | size = nvme_str2size(&buf[13]); 96 | break; 97 | } 98 | } 99 | 100 | fclose(f); 101 | 102 | nvme_debug("Hugepage size is %zu B\n", size); 103 | 104 | return size; 105 | } 106 | 107 | /* 108 | * Allocate a hugepage descriptor and create its backing file 109 | * in hugetlbfs. 110 | */ 111 | static struct nvme_hugepage *nvme_mem_alloc_hp(unsigned int node_id) 112 | { 113 | unsigned long nodemask, maxnodes; 114 | unsigned int hphash; 115 | struct nvme_hugepage *hp; 116 | void *vaddr = MAP_FAILED; 117 | int ret; 118 | 119 | /* Allocate and initialize a hugepage descriptor */ 120 | hp = calloc(1, sizeof(struct nvme_hugepage)); 121 | if (!hp) 122 | return NULL; 123 | 124 | hp->size = mm.hp_size; 125 | hp->size_bits = mm.hp_size_bits; 126 | hp->node_id = node_id; 127 | 128 | /* Create the hugepage file */ 129 | sprintf(hp->fname, "libnvme.%d-%zu", 130 | getpid(), 131 | nvme_atomic64_add_return(&mm.hp_tmp, 1)); 132 | 133 | hp->fd = openat(mm.hp_dd, hp->fname, 134 | O_RDWR | O_LARGEFILE | O_EXCL | O_CREAT, 135 | S_IRUSR | S_IWUSR); 136 | if (hp->fd < 0) { 137 | nvme_err("Open hugepage file %s failed %d (%s)\n", 138 | hp->fname, errno, strerror(errno)); 139 | goto err; 140 | } 141 | 142 | /* Mmap the file */ 143 | vaddr = mmap(NULL, hp->size, PROT_READ | PROT_WRITE, 144 | MAP_PRIVATE, hp->fd, 0); 145 | if (vaddr == MAP_FAILED) { 146 | nvme_err("mmap hugepage file %s failed %d (%s)\n", 147 | hp->fname, errno, strerror(errno)); 148 | goto err; 149 | } 150 | 151 | /* 152 | * At this point, there is no page allocated yet. Set the NUMA 153 | * memory policy and fault in the page to get a hugepage from 154 | * the desired NUMA node. 155 | */ 156 | if (node_id == NVME_NODE_ID_ANY) { 157 | nvme_debug("Allocating hugepage on any node\n"); 158 | nodemask = 0; 159 | maxnodes = 0; 160 | } else { 161 | nvme_debug("Allocating hugepage on node %u\n", node_id); 162 | nodemask = node_id; 163 | maxnodes = 1; 164 | } 165 | 166 | ret = mbind(vaddr, hp->size, MPOL_PREFERRED, &nodemask, maxnodes, 0); 167 | if (ret != 0) { 168 | nvme_err("mbind hugepage %p to node %u failed %d (%s)\n", 169 | vaddr, node_id, 170 | errno, strerror(errno)); 171 | goto err; 172 | } 173 | 174 | /* Fault in the page */ 175 | memset(vaddr, 0, hp->size); 176 | 177 | /* Lock the page */ 178 | if (mlock(vaddr, hp->size) != 0) { 179 | nvme_err("Lock hugepage %p failed %d (%s)\n", 180 | vaddr, errno, strerror(errno)); 181 | goto err; 182 | } 183 | 184 | hp->vaddr = (unsigned long) vaddr; 185 | hp->paddr = nvme_mem_vtophys(vaddr); 186 | if (hp->paddr == NVME_VTOPHYS_ERROR) { 187 | nvme_err("Get hugepage %p physical address failed\n", 188 | vaddr); 189 | goto err; 190 | } 191 | 192 | /* Add the hugepage to the hash table */ 193 | hphash = (hp->vaddr >> mm.hp_size_bits) & NVME_HP_HASH_MASK; 194 | nvme_spin_lock(&mm.hp_lock); 195 | 196 | LIST_INSERT_HEAD(&mm.hp_list[hphash], hp, link); 197 | nvme_atomic_inc(&mm.nr_hp); 198 | 199 | nvme_debug("Allocated hugepage %s (%u, hash %u, 0x%lx / 0x%lx)\n", 200 | hp->fname, nvme_atomic_read(&mm.nr_hp), 201 | hphash, hp->vaddr, hp->paddr); 202 | 203 | nvme_spin_unlock(&mm.hp_lock); 204 | 205 | return hp; 206 | 207 | err: 208 | if (hp->fd >= 0) { 209 | if (vaddr != MAP_FAILED) 210 | munmap(vaddr, hp->size); 211 | close(hp->fd); 212 | unlinkat(mm.hp_dd, hp->fname, 0); 213 | } 214 | 215 | free(hp); 216 | 217 | return NULL; 218 | } 219 | 220 | /* 221 | * Free an allocated hugepage. 222 | * Close and unlink the hugepage backing file and free its descriptor. 223 | */ 224 | static void nvme_mem_free_hp(struct nvme_hugepage *hp) 225 | { 226 | 227 | if (!hp) 228 | return; 229 | 230 | /* Remove the hugepage from the hash table */ 231 | nvme_spin_lock(&mm.hp_lock); 232 | 233 | nvme_debug("Free hugepage %s (%u, 0x%lx / 0x%lx)\n", 234 | hp->fname, nvme_atomic_read(&mm.nr_hp), 235 | hp->vaddr, hp->paddr); 236 | 237 | LIST_REMOVE(hp, link); 238 | nvme_atomic_dec(&mm.nr_hp); 239 | 240 | nvme_spin_unlock(&mm.hp_lock); 241 | 242 | /* Unmap, close and unlink the hugepage file */ 243 | if (munlock((void *)hp->vaddr, hp->size) < 0) 244 | nvme_crit("Unlock hugepage %s failed %d (%s)\n", 245 | hp->fname, errno, strerror(errno)); 246 | 247 | if (munmap((void *)hp->vaddr, hp->size) < 0) 248 | nvme_crit("Unmap hugepage file %s failed %d (%s)\n", 249 | hp->fname, errno, strerror(errno)); 250 | 251 | if (close(hp->fd) < 0) 252 | nvme_crit("Close hugepage file %s failed %d (%s)\n", 253 | hp->fname, errno, strerror(errno)); 254 | 255 | if (unlinkat(mm.hp_dd, hp->fname, 0) < 0) 256 | nvme_crit("Unlink hugepage file %s failed %d (%s)\n", 257 | hp->fname, errno, strerror(errno)); 258 | 259 | free(hp); 260 | } 261 | 262 | /* 263 | * Search the hugepage containing the specified address. 264 | */ 265 | struct nvme_hugepage *nvme_mem_search_hp(unsigned long vaddr) 266 | { 267 | unsigned long hpn = vaddr >> mm.hp_size_bits; 268 | unsigned long hphash = hpn & NVME_HP_HASH_MASK; 269 | struct nvme_hugepage *hp, *res = NULL; 270 | 271 | /* Search the hash table */ 272 | nvme_spin_lock(&mm.hp_lock); 273 | 274 | LIST_FOREACH(hp, &mm.hp_list[hphash], link) { 275 | if ((hp->vaddr >> mm.hp_size_bits) == hpn) { 276 | res = hp; 277 | break; 278 | } 279 | } 280 | 281 | nvme_spin_unlock(&mm.hp_lock); 282 | 283 | return res; 284 | } 285 | 286 | /* 287 | * Initialize hugepages management. 288 | */ 289 | static int nvme_mem_hp_init(void) 290 | { 291 | int i, ret; 292 | 293 | /* Initialize hugepages management data */ 294 | nvme_atomic64_init(&mm.hp_tmp); 295 | nvme_atomic_init(&mm.nr_hp); 296 | for (i = 0; i < NVME_HP_HASH_SIZE; i++) { 297 | LIST_INIT(&mm.hp_list[i]); 298 | nvme_spinlock_init(&mm.hp_lock); 299 | } 300 | 301 | /* Find out where hugetlbfs is mounted */ 302 | ret = nvme_mem_get_hp_dir(); 303 | if (ret < 0) { 304 | nvme_crit("hugetlbfs mount point not found\n"); 305 | return ret; 306 | } 307 | 308 | /* Determine the size of hugepages */ 309 | mm.hp_size = nvme_mem_get_hp_size(); 310 | if (!mm.hp_size) { 311 | nvme_crit("Failed to determine the size of hugepages\n"); 312 | return -ENOMEM; 313 | } 314 | mm.hp_size_bits = nvme_log2(mm.hp_size); 315 | 316 | /* Open hugetlbfs directory */ 317 | ret = open(mm.hp_dir, O_RDONLY | O_DIRECTORY); 318 | if (ret < 0) { 319 | nvme_crit("Open hugepage directory %s failed %d (%s)\n", 320 | mm.hp_dir, errno, strerror(errno)); 321 | return -errno; 322 | } 323 | 324 | mm.hp_dd = ret; 325 | 326 | return 0; 327 | } 328 | 329 | /* 330 | * Cleanup hugepages management. 331 | */ 332 | static void nvme_mem_hp_cleanup(void) 333 | { 334 | struct nvme_hugepage *hp; 335 | int i; 336 | 337 | /* Free hugepages still in use */ 338 | for (i = 0; i < NVME_HP_HASH_SIZE; i++) { 339 | while ((hp = LIST_FIRST(&mm.hp_list[i]))) 340 | nvme_mem_free_hp(hp); 341 | } 342 | 343 | if (mm.hp_dd != -1) 344 | close(mm.hp_dd); 345 | rmdir(mm.hp_dir); 346 | free(mm.hp_dir); 347 | } 348 | 349 | /* 350 | * Add a heap to the specified mempool. 351 | */ 352 | static struct nvme_heap *nvme_mem_pool_grow(struct nvme_mempool *mp) 353 | { 354 | struct nvme_hugepage *hp; 355 | struct nvme_heap *heap = NULL; 356 | 357 | /* Allocate a hugepage */ 358 | hp = nvme_mem_alloc_hp(mp->node_id); 359 | if (!hp) 360 | return NULL; 361 | 362 | /* Allocate a heap descriptor */ 363 | heap = calloc(1, sizeof(struct nvme_heap)); 364 | if (!heap) 365 | goto err; 366 | 367 | hp->mp = mp; 368 | hp->heap = heap; 369 | 370 | heap->hp = hp; 371 | heap->nr_objs = hp->size >> mp->size_bits; 372 | heap->nr_free_objs = heap->nr_objs; 373 | 374 | /* Allocate the heap bitmap */ 375 | heap->bitmap = calloc(nvme_align_up(heap->nr_objs, 64) >> 6, 376 | sizeof(__u64)); 377 | if (!heap->bitmap) 378 | goto err; 379 | 380 | /* Add the heap to the memory pool use list */ 381 | LIST_INSERT_HEAD(&mp->use_list, heap, link); 382 | mp->nr_use++; 383 | mp->nr_objs += heap->nr_objs; 384 | mp->nr_free_objs += heap->nr_free_objs; 385 | 386 | nvme_debug("Mempool %zu B: Created heap %p, %zu objects (%zu heaps)\n", 387 | mp->size, heap, heap->nr_objs, 388 | mp->nr_use + mp->nr_full); 389 | 390 | return heap; 391 | 392 | err: 393 | free(heap); 394 | nvme_mem_free_hp(hp); 395 | 396 | return NULL; 397 | } 398 | 399 | /* 400 | * Allocate an object from a mempool. 401 | */ 402 | static void *nvme_mem_pool_alloc(struct nvme_mempool *mp, unsigned long *paddr) 403 | { 404 | struct nvme_heap *heap; 405 | void *obj = NULL; 406 | size_t ofst; 407 | int bit; 408 | 409 | pthread_mutex_lock(&mp->lock); 410 | 411 | /* 412 | * Get a heap to allocate from: If there are heaps in use, 413 | * keep using them until full. Otherwise, grow the mempool. 414 | */ 415 | if (mp->nr_use) 416 | heap = LIST_FIRST(&mp->use_list); 417 | else 418 | heap = nvme_mem_pool_grow(mp); 419 | if (!heap) { 420 | nvme_err("No heap for allocation in mempool %zu B\n", 421 | mp->size); 422 | goto out; 423 | } 424 | 425 | /* Search a free object in the heap */ 426 | bit = find_first_zero_bit(heap->bitmap, heap->nr_objs); 427 | if (bit < 0) { 428 | nvme_crit("No free object found in heap size %zu (%zu / %zu)\n", 429 | mp->size, heap->nr_free_objs, heap->nr_objs); 430 | goto out; 431 | } 432 | 433 | /* Got it: mark object as allocated */ 434 | set_bit(heap->bitmap, bit); 435 | ofst = (size_t)bit << mp->size_bits; 436 | obj = (void *) (heap->hp->vaddr + ofst); 437 | if (paddr) 438 | *paddr = heap->hp->paddr + ofst; 439 | 440 | mp->nr_free_objs--; 441 | heap->nr_free_objs--; 442 | if (nvme_heap_full(heap)) { 443 | LIST_REMOVE(heap, link); 444 | mp->nr_use--; 445 | LIST_INSERT_HEAD(&mp->full_list, heap, link); 446 | mp->nr_full++; 447 | } 448 | 449 | nvme_debug("Mempool %zu B: allocated object %p (%p / %d), %zu / %zu objects in use\n", 450 | mp->size, obj, heap, bit, 451 | mp->nr_objs - mp->nr_free_objs, mp->nr_objs); 452 | 453 | out: 454 | pthread_mutex_unlock(&mp->lock); 455 | 456 | return obj; 457 | } 458 | 459 | /* 460 | * Shrink a mempool. 461 | */ 462 | static void nvme_mem_pool_shrink(struct nvme_mempool *mp, 463 | bool force) 464 | { 465 | struct nvme_heap *heap, *next; 466 | int n = 0; 467 | 468 | /* 469 | * Scan the use list and free unused heaps, but skip 470 | * the first empty one. 471 | */ 472 | heap = LIST_FIRST(&mp->use_list); 473 | while (heap) { 474 | 475 | if (!force) { 476 | if (nvme_heap_empty(heap)) 477 | n++; 478 | if (!nvme_heap_empty(heap) || n == 1) { 479 | heap = LIST_NEXT(heap, link); 480 | continue; 481 | } 482 | } 483 | 484 | nvme_debug("Mempool %zu B: Freed heap %p, %zu objects (%zu heaps)\n", 485 | mp->size, heap, heap->nr_objs, 486 | mp->nr_use + mp->nr_full); 487 | 488 | if (!nvme_heap_empty(heap)) 489 | nvme_warning("Mempool %zu B: Free non-empty heap %p, %zu / %zu objects in use\n", 490 | mp->size, heap, 491 | mp->nr_objs - mp->nr_free_objs, mp->nr_objs); 492 | 493 | next = LIST_NEXT(heap, link); 494 | 495 | /* Remove the heap from the meory pool empty list */ 496 | LIST_REMOVE(heap, link); 497 | mp->nr_use--; 498 | mp->nr_objs -= heap->nr_objs; 499 | mp->nr_free_objs -= heap->nr_free_objs; 500 | 501 | /* Free resources */ 502 | free(heap->bitmap); 503 | nvme_mem_free_hp(heap->hp); 504 | free(heap); 505 | 506 | heap = next; 507 | 508 | } 509 | } 510 | 511 | /* 512 | * Free a mempool object. 513 | */ 514 | static void nvme_mem_pool_free(struct nvme_mempool *mp, struct nvme_heap *heap, 515 | void *vaddr) 516 | { 517 | struct nvme_hugepage *hp = heap->hp; 518 | unsigned long obj = (unsigned long)vaddr; 519 | int bit; 520 | 521 | pthread_mutex_lock(&mp->lock); 522 | 523 | if (obj < hp->vaddr || obj >= hp->vaddr + hp->size) { 524 | nvme_crit("Object %p does not belong to heap 0x%lx + %zu\n", 525 | vaddr, hp->vaddr, hp->size); 526 | goto out; 527 | } 528 | 529 | bit = (obj - hp->vaddr) >> mp->size_bits; 530 | if (heap->nr_free_objs == heap->nr_objs || 531 | !test_bit(heap->bitmap, bit)) { 532 | nvme_crit("Double free on object %p in heap size %zu (%zu / %zu)\n", 533 | vaddr, mp->size, heap->nr_free_objs, heap->nr_objs); 534 | goto out; 535 | } 536 | 537 | clear_bit(heap->bitmap, bit); 538 | 539 | if (nvme_heap_full(heap)) { 540 | LIST_REMOVE(heap, link); 541 | mp->nr_full--; 542 | LIST_INSERT_HEAD(&mp->use_list, heap, link); 543 | mp->nr_use++; 544 | } 545 | 546 | heap->nr_free_objs++; 547 | mp->nr_free_objs++; 548 | 549 | if (nvme_heap_empty(heap)) 550 | nvme_mem_pool_shrink(mp, false); 551 | 552 | nvme_debug("Mempool %zu B: freed object %p (%p / %d), %zu / %zu objects in use\n", 553 | mp->size, (void *)obj, heap, bit, 554 | mp->nr_objs - mp->nr_free_objs, mp->nr_objs); 555 | 556 | out: 557 | pthread_mutex_unlock(&mp->lock); 558 | } 559 | 560 | /* 561 | * Allocate memory on the specified NUMA node. 562 | */ 563 | void *nvme_mem_alloc_node(size_t size, size_t align, unsigned int node_id, 564 | unsigned long *paddr) 565 | { 566 | unsigned int size_bits; 567 | struct nvme_mempool *mp; 568 | 569 | if (size == 0 || (align && !nvme_is_pow2(align))) { 570 | nvme_err("Invalid allocation request %zu / %zu\n", 571 | size, align); 572 | return NULL; 573 | } 574 | 575 | if (node_id == NVME_NODE_ID_ANY || 576 | node_id >= nvme_node_max()) 577 | node_id = nvme_node_id(); 578 | 579 | nvme_debug("Allocation from CPU %u, NUMA node %u\n", 580 | nvme_cpu_id(), 581 | nvme_node_id()); 582 | 583 | /* Get a suitable memory pool for the allocation */ 584 | size_bits = nvme_log2(nvme_align_pow2(nvme_max(size, align))); 585 | if (size_bits <= NVME_MP_SIZE_BITS_MIN) { 586 | mp = &mm.mp[0]; 587 | } else if (size_bits <= NVME_MP_SIZE_BITS_MAX) { 588 | mp = &mm.mp[size_bits - NVME_MP_SIZE_BITS_MIN]; 589 | } else { 590 | nvme_debug("No memory pool for %zu B (align %zu B)\n", 591 | size, align); 592 | return NULL; 593 | } 594 | 595 | nvme_debug("Allocate %zu B, align %zu B => mempool %zu B (order %zu)\n", 596 | size, align, 597 | mp->size, mp->size_bits); 598 | 599 | return nvme_mem_pool_alloc(mp, paddr); 600 | } 601 | 602 | /* 603 | * Allocate memory on the specified NUMA node. 604 | */ 605 | void *nvme_malloc_node(size_t size, size_t align, 606 | unsigned int node_id) 607 | { 608 | return nvme_mem_alloc_node(size, align, node_id, NULL); 609 | } 610 | 611 | /* 612 | * Free the memory space back to heap. 613 | */ 614 | void nvme_free(void *addr) 615 | { 616 | struct nvme_hugepage *hp; 617 | 618 | if (!addr) 619 | return; 620 | 621 | hp = nvme_mem_search_hp((unsigned long)addr); 622 | if (!hp) { 623 | nvme_crit("Invalid address %p for free\n", addr); 624 | return; 625 | } 626 | 627 | nvme_mem_pool_free(hp->mp, hp->heap, addr); 628 | } 629 | 630 | /* 631 | * Return the physical address of the specified virtual address. 632 | */ 633 | unsigned long nvme_mem_vtophys(void *addr) 634 | { 635 | unsigned long vaddr = (unsigned long) addr; 636 | struct nvme_hugepage *hp; 637 | unsigned long ofst; 638 | ssize_t ret; 639 | __u64 ppfn, vpn; 640 | 641 | /* Avoid the system call if this is a hugepage address */ 642 | hp = nvme_mem_search_hp(vaddr); 643 | if (hp) 644 | return hp->paddr + vaddr - hp->vaddr; 645 | 646 | /* Read the page frame entry (8B per entry) */ 647 | vpn = (unsigned long)vaddr >> mm.pg_size_bits; 648 | ofst = (unsigned long)vaddr & mm.pg_size_mask; 649 | 650 | ret = pread(mm.pg_mapfd, &ppfn, 8, vpn << NVME_PFN_SIZE_SHIFT); 651 | if (ret != NVME_PFN_SIZE) { 652 | if (ret < 0) 653 | nvme_err("Read /proc/self/pagemap failed %d (%s)\n", 654 | errno, strerror(errno)); 655 | else 656 | nvme_err("Partial pfn %llu read from /proc/self/pagemap\n", 657 | vpn); 658 | return NVME_VTOPHYS_ERROR; 659 | } 660 | 661 | return ((ppfn & NVME_PFN_MASK) << mm.pg_size_bits) + ofst; 662 | } 663 | 664 | /* 665 | * Get memory usage statistics for the specified socket. 666 | */ 667 | int nvme_memstat(struct nvme_mem_stats *stats, unsigned int node_id) 668 | { 669 | struct nvme_mempool *mp; 670 | unsigned int i; 671 | 672 | if (!stats) 673 | return -EFAULT; 674 | 675 | if (node_id != NVME_NODE_ID_ANY && 676 | node_id > NVME_NODE_MAX) 677 | return -EINVAL; 678 | 679 | /* Get stats */ 680 | stats->nr_hugepages = nvme_atomic_read(&mm.nr_hp); 681 | stats->total_bytes = 0; 682 | stats->free_bytes = 0; 683 | 684 | for(i = 0; i < NVME_MP_NUM; i++) { 685 | mp = &mm.mp[i]; 686 | pthread_mutex_lock(&mp->lock); 687 | stats->total_bytes += mp->nr_objs << mp->size_bits; 688 | stats->free_bytes += mp->nr_free_objs << mp->size_bits; 689 | pthread_mutex_unlock(&mp->lock); 690 | } 691 | 692 | return 0; 693 | } 694 | 695 | /* 696 | * Initialize memory management. 697 | */ 698 | int nvme_mem_init(void) 699 | { 700 | struct nvme_mempool *mp; 701 | int size_bits = NVME_MP_SIZE_BITS_MIN; 702 | int i, ret; 703 | 704 | memset(&mm, 0, sizeof(struct nvme_mem)); 705 | mm.pg_size = sysconf(_SC_PAGESIZE); 706 | mm.pg_size_bits = nvme_log2(mm.pg_size); 707 | mm.pg_size_mask = mm.pg_size - 1; 708 | 709 | nvme_debug("System page size: %zu B (order %zu)\n", 710 | mm.pg_size, 711 | mm.pg_size_bits); 712 | 713 | mm.pg_mapfd = open("/proc/self/pagemap", O_RDONLY); 714 | if (mm.pg_mapfd < 0) { 715 | nvme_err("Open /proc/self/pagemap failed %d (%s)\n", 716 | errno, strerror(errno)); 717 | return -errno; 718 | } 719 | 720 | /* Initialize hugepages management */ 721 | ret = nvme_mem_hp_init(); 722 | if (ret != 0) 723 | return ret; 724 | 725 | /* Initialize memory pools */ 726 | for(i = 0; i < NVME_MP_NUM; i++) { 727 | 728 | mp = &mm.mp[i]; 729 | 730 | mp->size_bits = size_bits; 731 | mp->size = 1UL << size_bits; 732 | 733 | pthread_mutex_init(&mp->lock, NULL); 734 | LIST_INIT(&mp->use_list); 735 | LIST_INIT(&mp->full_list); 736 | 737 | size_bits++; 738 | 739 | } 740 | 741 | return 0; 742 | } 743 | 744 | /* 745 | * Cleanup memory resources on exit. 746 | */ 747 | void nvme_mem_cleanup(void) 748 | { 749 | struct nvme_mempool *mp; 750 | struct nvme_heap *heap; 751 | int i; 752 | 753 | /* Cleanup memory pools */ 754 | for(i = 0; i < NVME_MP_NUM; i++) { 755 | 756 | mp = &mm.mp[i]; 757 | 758 | while ((heap = LIST_FIRST(&mp->full_list))) { 759 | LIST_REMOVE(heap, link); 760 | LIST_INSERT_HEAD(&mp->use_list, heap, link); 761 | printf("full heap %d\n", i); 762 | } 763 | while ((heap = LIST_FIRST(&mp->use_list))) 764 | nvme_mem_pool_shrink(mp, true); 765 | 766 | } 767 | 768 | /* Cleanup hugepages */ 769 | nvme_mem_hp_cleanup(); 770 | 771 | return; 772 | } 773 | -------------------------------------------------------------------------------- /lib/common/nvme_mem.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 3 | * 4 | * SPDX-License-Identifier: BSD-2-Clause 5 | * Please see COPYING file for license text. 6 | */ 7 | 8 | #ifndef __NVME_MEMORY_H_ 9 | #define __NVME_MEMORY_H_ 10 | 11 | #include "nvme_common.h" 12 | #include "nvme_spinlock.h" 13 | #include "nvme_atomic.h" 14 | #include "nvme_cpu.h" 15 | 16 | #include 17 | #include 18 | 19 | /* 20 | * Physical address. 21 | */ 22 | typedef __u64 phys_addr_t; 23 | 24 | #define NVME_VTOPHYS_ERROR (~0ULL) 25 | 26 | /* 27 | * Page frame numbers are stored on 8 bytes in /proc/self/pagemap. 28 | */ 29 | #define NVME_PFN_SIZE_SHIFT 3 30 | #define NVME_PFN_SIZE (1 << NVME_PFN_SIZE_SHIFT) 31 | 32 | /* 33 | * Page frame numbers are bits 0-54 34 | * (see pagemap.txt in linux Documentation). 35 | */ 36 | #define NVME_PFN_MASK 0x7fffffffffffffULL 37 | 38 | /* 39 | * Maximum number of NUMA nodes. 40 | */ 41 | #define NVME_NODE_MAX NVME_SOCKET_MAX 42 | 43 | /* 44 | * Huge-page descriptor. 45 | */ 46 | struct nvme_hugepage { 47 | 48 | /* 49 | * For listing internally. 50 | */ 51 | LIST_ENTRY(nvme_hugepage) link; 52 | 53 | /* 54 | * This hugepage size in Bytes. 55 | */ 56 | size_t size; 57 | size_t size_bits; 58 | 59 | /* 60 | * Virtual and physical address of the page. 61 | */ 62 | unsigned long vaddr; 63 | unsigned long paddr; 64 | 65 | /* 66 | * The NUMA node this page belongs to. 67 | */ 68 | unsigned int node_id; 69 | 70 | /* 71 | * The page file path and descriptor. 72 | */ 73 | int fd; 74 | char fname[128]; 75 | 76 | /* 77 | * The memory pool owing this hugepage. 78 | */ 79 | struct nvme_mempool *mp; 80 | 81 | /* 82 | * The heap using this hugepage. 83 | */ 84 | struct nvme_heap *heap; 85 | 86 | }; 87 | 88 | /* 89 | * Per hugepage heap descriptor. 90 | */ 91 | struct nvme_heap { 92 | 93 | /* 94 | * For listing in a memory pool. 95 | */ 96 | LIST_ENTRY(nvme_heap) link; 97 | 98 | /* 99 | * The backing hugepage used. 100 | */ 101 | struct nvme_hugepage *hp; 102 | 103 | /* 104 | * Total number of objects. 105 | */ 106 | size_t nr_objs; 107 | 108 | /* 109 | * Number of free objects. 110 | */ 111 | size_t nr_free_objs; 112 | 113 | /* 114 | * Slot allocation state bitmap (0 = free, 1 = allocated). 115 | */ 116 | __u8 *bitmap; 117 | 118 | }; 119 | 120 | /* 121 | * Test if a heap is empty or full. 122 | */ 123 | #define nvme_heap_empty(heap) ((heap)->nr_free_objs == (heap)->nr_objs) 124 | #define nvme_heap_full(heap) ((heap)->nr_free_objs == 0) 125 | 126 | /* 127 | * Memory pool descriptor. 128 | * A memory pool is a set of heaps, each heap being 129 | * built on top of a single hugpage. All heaps of the 130 | * memory pool have the same slot size. 131 | */ 132 | struct nvme_mempool { 133 | 134 | /* 135 | * Mempool lock. 136 | */ 137 | pthread_mutex_t lock; 138 | 139 | /* 140 | * Objects size. 141 | */ 142 | size_t size_bits; 143 | size_t size; 144 | 145 | /* 146 | * Total number of objects. 147 | */ 148 | size_t nr_objs; 149 | 150 | /* 151 | * Total number of free objects. 152 | */ 153 | size_t nr_free_objs; 154 | 155 | /* 156 | * The NUMA node this memory pool belongs to. 157 | */ 158 | unsigned int node_id; 159 | 160 | /* 161 | * List of heaps in use but not full. 162 | */ 163 | size_t nr_use; 164 | LIST_HEAD(, nvme_heap) use_list; 165 | 166 | /* 167 | * List of full heaps. 168 | */ 169 | size_t nr_full; 170 | LIST_HEAD(, nvme_heap) full_list; 171 | 172 | }; 173 | 174 | /* 175 | * Static mempool sizes. 176 | */ 177 | enum nvme_mempool_size { 178 | 179 | /* 128 Bytes */ 180 | NVME_MP_SIZE_BITS_MIN = 7, 181 | 182 | /* 2 MBytes */ 183 | NVME_MP_SIZE_BITS_MAX = 21, 184 | 185 | /* All powers of 2 in between */ 186 | NVME_MP_NUM = 15 187 | 188 | }; 189 | 190 | /* 191 | * Hugepage hash table size. 192 | */ 193 | #define NVME_HP_HASH_SIZE 32 194 | #define NVME_HP_HASH_MASK (NVME_HP_HASH_SIZE - 1) 195 | 196 | /* 197 | * Memory managament data. 198 | */ 199 | struct nvme_mem { 200 | 201 | /* 202 | * System memory page size. 203 | */ 204 | size_t pg_size; 205 | size_t pg_size_bits; 206 | size_t pg_size_mask; 207 | 208 | /* 209 | * /proc/self/pagemap file descriptor. 210 | */ 211 | int pg_mapfd; 212 | 213 | /* 214 | * Directory where to store hugepage files 215 | * (within hugetlbfs mount). 216 | */ 217 | char *hp_dir; 218 | int hp_dd; 219 | 220 | /* 221 | * Huge page size. 222 | */ 223 | size_t hp_size; 224 | size_t hp_size_bits; 225 | 226 | /* 227 | * For generating huge-page file names. 228 | */ 229 | nvme_atomic64_t hp_tmp; 230 | 231 | /* 232 | * Hugepage management spinlock. 233 | */ 234 | nvme_spinlock_t hp_lock; 235 | 236 | /* 237 | * Number of hugepages currently allocated. 238 | */ 239 | nvme_atomic_t nr_hp; 240 | 241 | /* 242 | * Hugepage hash table (array of lists). 243 | */ 244 | LIST_HEAD(, nvme_hugepage) hp_list[NVME_HP_HASH_SIZE]; 245 | 246 | /* 247 | * Static memory pools. 248 | */ 249 | struct nvme_mempool mp[NVME_MP_NUM]; 250 | 251 | }; 252 | 253 | /* 254 | * Initialize memory management. 255 | */ 256 | extern int nvme_mem_init(void); 257 | 258 | /* 259 | * Cleanup memory resources on exit. 260 | */ 261 | extern void nvme_mem_cleanup(void); 262 | 263 | /* 264 | * Allocate memory on the specified NUMA node. 265 | */ 266 | extern void *nvme_mem_alloc_node(size_t size, size_t align, 267 | unsigned int node_id, unsigned long *paddr); 268 | 269 | /* 270 | * Return the physical address of the specifed virtual address. 271 | */ 272 | extern unsigned long nvme_mem_vtophys(void *vaddr); 273 | 274 | /* 275 | * Maximum number of NUMA nodes. 276 | */ 277 | #define nvme_node_max() (cpui.nr_sockets) 278 | 279 | /* 280 | * Current NUMA node. 281 | */ 282 | #define nvme_node_id() nvme_socket_id() 283 | 284 | #endif /* __NVME_MEMORY_H_ */ 285 | -------------------------------------------------------------------------------- /lib/common/nvme_pci.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright (c) Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include "nvme_common.h" 35 | #include "nvme_pci.h" 36 | 37 | /* 38 | * Initialize PCI subsystem. 39 | */ 40 | int nvme_pci_init(void) 41 | { 42 | int ret; 43 | 44 | ret = pci_system_init(); 45 | if (ret) { 46 | fprintf(stderr, "pci_system_init failed %d\n", ret); 47 | return ret; 48 | } 49 | 50 | return 0; 51 | } 52 | 53 | /* 54 | * Check if a device has a driver binded. 55 | */ 56 | static int nvme_pci_device_has_kernel_driver(struct pci_device *dev) 57 | { 58 | char linkname[NVME_PCI_PATH_MAX]; 59 | char driver[NVME_PCI_PATH_MAX]; 60 | ssize_t driver_len; 61 | 62 | snprintf(linkname, sizeof(linkname), 63 | "/sys/bus/pci/devices/%04x:%02x:%02x.%1u/driver", 64 | dev->domain, dev->bus, dev->dev, dev->func); 65 | 66 | memset(driver, 0, sizeof(driver)); 67 | driver_len = readlink(linkname, driver, sizeof(driver)); 68 | if ((driver_len <= 0) || (driver_len >= NVME_PCI_PATH_MAX)) 69 | return 0; 70 | 71 | nvme_err("NVME controller %04x:%02x:%02x.%1u binded to kernel driver %s\n", 72 | dev->domain, dev->bus, dev->dev, dev->func, 73 | basename(driver)); 74 | 75 | return 1; 76 | } 77 | 78 | /* 79 | * Search a PCI device and grab it if found. 80 | */ 81 | struct pci_device *nvme_pci_device_probe(const struct pci_slot_match *slot) 82 | { 83 | struct pci_device_iterator *pci_dev_iter; 84 | struct pci_device *pci_dev = NULL; 85 | int ret = -ENODEV; 86 | 87 | pci_dev_iter = pci_slot_match_iterator_create(slot); 88 | pci_dev = pci_device_next(pci_dev_iter); 89 | if (pci_dev) 90 | ret = pci_device_probe(pci_dev); 91 | pci_iterator_destroy(pci_dev_iter); 92 | 93 | if (ret != 0) 94 | return NULL; 95 | 96 | if (pci_dev->device_class != NVME_PCI_CLASS) { 97 | nvme_err("Device PCI class is not NVME\n"); 98 | pci_dev = NULL; 99 | } 100 | 101 | if (nvme_pci_device_has_kernel_driver(pci_dev)) 102 | return NULL; 103 | 104 | return pci_dev; 105 | } 106 | 107 | /* 108 | * Get a device serial number. 109 | */ 110 | int nvme_pci_device_get_serial_number(struct pci_device *dev, 111 | char *sn, size_t len) 112 | { 113 | uint32_t pos, header = 0; 114 | uint32_t i, buf[2]; 115 | int ret; 116 | 117 | if (len < 17) 118 | return -1; 119 | 120 | ret = nvme_pcicfg_read32(dev, &header, NVME_PCI_CFG_SIZE); 121 | if (ret || !header) 122 | return -1; 123 | 124 | pos = NVME_PCI_CFG_SIZE; 125 | while (1) { 126 | 127 | if ((header & 0x0000ffff) == NVME_PCI_EXT_CAP_ID_SN && 128 | pos != 0) { 129 | for (i = 0; i < 2; i++) { 130 | /* skip the header */ 131 | pos += 4; 132 | ret = nvme_pcicfg_read32(dev, &buf[i], 133 | pos); 134 | if (ret) 135 | return -1; 136 | } 137 | sprintf(sn, "%08x%08x", buf[1], buf[0]); 138 | return 0; 139 | } 140 | 141 | pos = (header >> 20) & 0xffc; 142 | 143 | /* 0 if no other items exist */ 144 | if (pos < NVME_PCI_CFG_SIZE) 145 | return -1; 146 | 147 | ret = nvme_pcicfg_read32(dev, &header, pos); 148 | if (ret) 149 | return -1; 150 | 151 | } 152 | 153 | return -1; 154 | } 155 | 156 | /* 157 | * Reset a PCI device. 158 | */ 159 | int nvme_pci_device_reset(struct pci_device *dev) 160 | { 161 | char filename[NVME_PCI_PATH_MAX]; 162 | char *buf = "1"; 163 | FILE *fd; 164 | int ret; 165 | 166 | snprintf(filename, sizeof(filename), 167 | "/sys/bus/pci/devices/%04x:%02x:%02x.%1u/reset", 168 | dev->domain, dev->bus, dev->dev, dev->func); 169 | 170 | nvme_debug("Resetting PCI device (%s)\n", 171 | filename); 172 | 173 | fd = fopen(filename, "w"); 174 | if (!fd) 175 | return -1; 176 | 177 | if (fwrite(buf, strlen(buf), 1, fd) != strlen(buf)) 178 | ret = -1; 179 | else 180 | ret = 0; 181 | 182 | fclose(fd); 183 | 184 | return ret; 185 | } 186 | 187 | -------------------------------------------------------------------------------- /lib/common/nvme_pci.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright (c) Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_PCI_H__ 35 | #define __NVME_PCI_H__ 36 | 37 | #include "nvme_common.h" 38 | 39 | #include 40 | 41 | #define NVME_PCI_PATH_MAX 256 42 | #define NVME_PCI_CFG_SIZE 256 43 | #define NVME_PCI_EXT_CAP_ID_SN 0x03 44 | 45 | #define NVME_PCI_ANY_ID 0xffff 46 | #define NVME_PCI_VID_INTEL 0x8086 47 | #define NVME_PCI_VID_MEMBLAZE 0x1c5f 48 | 49 | /* 50 | * PCI class code for NVMe devices. 51 | * 52 | * Base class code 01h: mass storage 53 | * Subclass code 08h: non-volatile memory 54 | * Programming interface 02h: NVM Express 55 | */ 56 | #define NVME_PCI_CLASS 0x010802 57 | 58 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB0 0x3c20 59 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB1 0x3c21 60 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB2 0x3c22 61 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB3 0x3c23 62 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB4 0x3c24 63 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB5 0x3c25 64 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB6 0x3c26 65 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB7 0x3c27 66 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB8 0x3c2e 67 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_SNB9 0x3c2f 68 | 69 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB0 0x0e20 70 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB1 0x0e21 71 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB2 0x0e22 72 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB3 0x0e23 73 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB4 0x0e24 74 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB5 0x0e25 75 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB6 0x0e26 76 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB7 0x0e27 77 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB8 0x0e2e 78 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_IVB9 0x0e2f 79 | 80 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW0 0x2f20 81 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW1 0x2f21 82 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW2 0x2f22 83 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW3 0x2f23 84 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW4 0x2f24 85 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW5 0x2f25 86 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW6 0x2f26 87 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW7 0x2f27 88 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW8 0x2f2e 89 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_HSW9 0x2f2f 90 | 91 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BWD0 0x0C50 92 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BWD1 0x0C51 93 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BWD2 0x0C52 94 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BWD3 0x0C53 95 | 96 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDXDE0 0x6f50 97 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDXDE1 0x6f51 98 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDXDE2 0x6f52 99 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDXDE3 0x6f53 100 | 101 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX0 0x6f20 102 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX1 0x6f21 103 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX2 0x6f22 104 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX3 0x6f23 105 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX4 0x6f24 106 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX5 0x6f25 107 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX6 0x6f26 108 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX7 0x6f27 109 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX8 0x6f2e 110 | #define NVME_PCI_DEVICE_ID_INTEL_IOAT_BDX9 0x6f2f 111 | 112 | struct pci_device; 113 | struct pci_slot_match; 114 | 115 | struct pci_id { 116 | uint16_t vendor_id; 117 | uint16_t device_id; 118 | uint16_t subvendor_id; 119 | uint16_t subdevice_id; 120 | }; 121 | 122 | /* 123 | * Initialize PCI subsystem. 124 | */ 125 | extern int nvme_pci_init(void); 126 | 127 | /* 128 | * Search a PCI device and grab it if found. 129 | */ 130 | extern struct pci_device * 131 | nvme_pci_device_probe(const struct pci_slot_match *slot); 132 | 133 | /* 134 | * Reset a PCI device. 135 | */ 136 | extern int nvme_pci_device_reset(struct pci_device *dev); 137 | 138 | /* 139 | * Get a device serial number. 140 | */ 141 | extern int nvme_pci_device_get_serial_number(struct pci_device *dev, 142 | char *sn, size_t len); 143 | 144 | /* 145 | * Compare two devices. 146 | * Return 0 if the devices are the same, 1 otherwise. 147 | */ 148 | static inline int nvme_pci_dev_cmp(struct pci_device *pci_dev1, 149 | struct pci_device *pci_dev2) 150 | { 151 | if (pci_dev1 == pci_dev2) 152 | return 0; 153 | 154 | if (pci_dev1->domain == pci_dev2->domain && 155 | pci_dev1->bus == pci_dev2->bus && 156 | pci_dev1->dev == pci_dev2->dev && 157 | pci_dev1->func == pci_dev2->func) 158 | return 0; 159 | 160 | return 1; 161 | } 162 | 163 | 164 | /* 165 | * Get a device PCI ID. 166 | */ 167 | static inline void nvme_pci_get_pci_id(struct pci_device *pci_dev, 168 | struct pci_id *pci_id) 169 | { 170 | pci_id->vendor_id = pci_dev->vendor_id; 171 | pci_id->device_id = pci_dev->device_id; 172 | pci_id->subvendor_id = pci_dev->subvendor_id; 173 | pci_id->subdevice_id = pci_dev->subdevice_id; 174 | } 175 | 176 | /* 177 | * Read a device config register. 178 | */ 179 | static inline int nvme_pcicfg_read8(struct pci_device *dev, 180 | uint8_t *value, uint32_t offset) 181 | { 182 | return pci_device_cfg_read_u8(dev, value, offset); 183 | } 184 | 185 | /* 186 | * Write a device config register. 187 | */ 188 | static inline int nvme_pcicfg_write8(struct pci_device *dev, 189 | uint8_t value, uint32_t offset) 190 | { 191 | return pci_device_cfg_write_u8(dev, value, offset); 192 | } 193 | 194 | /* 195 | * Read a device config register. 196 | */ 197 | static inline int nvme_pcicfg_read16(struct pci_device *dev, 198 | uint16_t *value, uint32_t offset) 199 | { 200 | return pci_device_cfg_read_u16(dev, value, offset); 201 | } 202 | 203 | /* 204 | * Write a device config register. 205 | */ 206 | static inline int nvme_pcicfg_write16(struct pci_device *dev, 207 | uint16_t value, uint32_t offset) 208 | { 209 | return pci_device_cfg_write_u16(dev, value, offset); 210 | } 211 | 212 | /* 213 | * Read a device config register. 214 | */ 215 | static inline int nvme_pcicfg_read32(struct pci_device *dev, 216 | uint32_t *value, uint32_t offset) 217 | { 218 | return pci_device_cfg_read_u32(dev, value, offset); 219 | } 220 | 221 | /* 222 | * Write a device config register. 223 | */ 224 | static inline int nvme_pcicfg_write32(struct pci_device *dev, 225 | uint32_t value, uint32_t offset) 226 | { 227 | return pci_device_cfg_write_u32(dev, value, offset); 228 | } 229 | 230 | /* 231 | * Map a device PCI BAR. 232 | */ 233 | static inline int nvme_pcicfg_map_bar(void *devhandle, unsigned int bar, 234 | bool read_only, void **mapped_addr) 235 | { 236 | struct pci_device *dev = devhandle; 237 | uint32_t flags = (read_only ? 0 : PCI_DEV_MAP_FLAG_WRITABLE); 238 | 239 | return pci_device_map_range(dev, dev->regions[bar].base_addr, 240 | dev->regions[bar].size, flags, mapped_addr); 241 | } 242 | 243 | /* 244 | * Map a device PCI BAR (write combine). 245 | */ 246 | static inline int nvme_pcicfg_map_bar_write_combine(void *devhandle, 247 | unsigned int bar, 248 | void **mapped_addr) 249 | { 250 | struct pci_device *dev = devhandle; 251 | uint32_t flags = PCI_DEV_MAP_FLAG_WRITABLE | 252 | PCI_DEV_MAP_FLAG_WRITE_COMBINE; 253 | 254 | return pci_device_map_range(dev, dev->regions[bar].base_addr, 255 | dev->regions[bar].size, flags, mapped_addr); 256 | } 257 | 258 | /* 259 | * Unmap a device PCI BAR. 260 | */ 261 | static inline int nvme_pcicfg_unmap_bar(void *devhandle, unsigned int bar, 262 | void *addr) 263 | { 264 | struct pci_device *dev = devhandle; 265 | 266 | return pci_device_unmap_range(dev, addr, dev->regions[bar].size); 267 | } 268 | 269 | /* 270 | * Get a device PCI BAR address and length. 271 | */ 272 | static inline void nvme_pcicfg_get_bar_addr_len(void *devhandle, 273 | unsigned int bar, 274 | uint64_t *addr, uint64_t *size) 275 | { 276 | struct pci_device *dev = devhandle; 277 | 278 | *addr = (uint64_t)dev->regions[bar].base_addr; 279 | *size = (uint64_t)dev->regions[bar].size; 280 | } 281 | 282 | #endif /* __NVME_PCI_H__ */ 283 | -------------------------------------------------------------------------------- /lib/common/nvme_rwlock.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_RWLOCK_H__ 35 | #define __NVME_RWLOCK_H__ 36 | 37 | #include "nvme_common.h" 38 | #include "nvme_atomic.h" 39 | 40 | /* 41 | * nvme_rwlock_t type. 42 | */ 43 | typedef struct { 44 | 45 | /* 46 | * -1 when write lock is held, and > 0 when read locks are held. 47 | */ 48 | volatile int32_t cnt; 49 | 50 | } nvme_rwlock_t; 51 | 52 | /* 53 | * A static rwlock initializer. 54 | */ 55 | #define NVME_RWLOCK_INITIALIZER { 0 } 56 | 57 | /* 58 | * Initialize the rwlock to an unlocked state. 59 | */ 60 | static inline void nvme_rwlock_init(nvme_rwlock_t *rwl) 61 | { 62 | rwl->cnt = 0; 63 | } 64 | 65 | /* 66 | * Take a read lock. Loop until the lock is held. 67 | */ 68 | static inline void nvme_rwlock_read_lock(nvme_rwlock_t *rwl) 69 | { 70 | int32_t x; 71 | int success = 0; 72 | 73 | while (success == 0) { 74 | x = rwl->cnt; 75 | /* write lock is held */ 76 | if (x < 0) { 77 | nvme_pause(); 78 | continue; 79 | } 80 | success = nvme_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, 81 | x, x + 1); 82 | } 83 | } 84 | 85 | /* 86 | * Release a read lock. 87 | */ 88 | static inline void nvme_rwlock_read_unlock(nvme_rwlock_t *rwl) 89 | { 90 | nvme_atomic32_dec((nvme_atomic32_t *)(intptr_t)&rwl->cnt); 91 | } 92 | 93 | /* 94 | * Take a write lock. Loop until the lock is held. 95 | */ 96 | static inline void nvme_rwlock_write_lock(nvme_rwlock_t *rwl) 97 | { 98 | int32_t x; 99 | int success = 0; 100 | 101 | while (success == 0) { 102 | x = rwl->cnt; 103 | /* a lock is held */ 104 | if (x != 0) { 105 | nvme_pause(); 106 | continue; 107 | } 108 | success = nvme_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, 109 | 0, -1); 110 | } 111 | } 112 | 113 | /* 114 | * Release a write lock. 115 | */ 116 | static inline void nvme_rwlock_write_unlock(nvme_rwlock_t *rwl) 117 | { 118 | nvme_atomic32_inc((nvme_atomic32_t *)(intptr_t)&rwl->cnt); 119 | } 120 | 121 | #endif /* __NVME_RWLOCK_H__ */ 122 | -------------------------------------------------------------------------------- /lib/common/nvme_spinlock.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #ifndef __NVME_SPINLOCK_H__ 35 | #define __NVME_SPINLOCK_H__ 36 | 37 | #include "nvme_common.h" 38 | 39 | /* 40 | * The nvme_spinlock_t type. 41 | */ 42 | typedef struct { 43 | 44 | /* 45 | * lock status 0 = unlocked, 1 = locked 46 | */ 47 | volatile int locked; 48 | 49 | } nvme_spinlock_t; 50 | 51 | /* 52 | * Spinlock initializer. 53 | */ 54 | #define NVME_SPINLOCK_INITIALIZER { 0 } 55 | 56 | /* 57 | * Initialize the spinlock to an unlocked state. 58 | */ 59 | static inline void nvme_spinlock_init(nvme_spinlock_t *sl) 60 | { 61 | sl->locked = 0; 62 | } 63 | 64 | /* 65 | * Take the spinlock. 66 | */ 67 | static inline void nvme_spin_lock(nvme_spinlock_t *sl) 68 | { 69 | while (__sync_lock_test_and_set(&sl->locked, 1)) 70 | while (sl->locked) 71 | nvme_pause(); 72 | } 73 | 74 | /* 75 | * Release the spinlock. 76 | */ 77 | static inline void nvme_spin_unlock(nvme_spinlock_t *sl) 78 | { 79 | __sync_lock_release(&sl->locked); 80 | } 81 | 82 | /* 83 | * Try to take the lock. 84 | * Returns 1 if the lock is successfully taken; 0 otherwise. 85 | */ 86 | static inline int nvme_spinlock_trylock(nvme_spinlock_t *sl) 87 | { 88 | return __sync_lock_test_and_set(&sl->locked,1) == 0; 89 | } 90 | 91 | /* 92 | * Test if the lock is taken. 93 | * Returns 1 if the lock is currently taken; 0 otherwise. 94 | */ 95 | static inline int nvme_spinlock_is_locked(nvme_spinlock_t *sl) 96 | { 97 | return sl->locked; 98 | } 99 | 100 | #endif /* __NVME_SPINLOCK_H__ */ 101 | -------------------------------------------------------------------------------- /lib/nvme/Makemodule.am: -------------------------------------------------------------------------------- 1 | 2 | NVME_CFILES = \ 3 | lib/nvme/nvme.c \ 4 | lib/nvme/nvme_request.c \ 5 | lib/nvme/nvme_ctrlr.c \ 6 | lib/nvme/nvme_admin.c \ 7 | lib/nvme/nvme_ns.c \ 8 | lib/nvme/nvme_qpair.c \ 9 | lib/nvme/nvme_quirks.c 10 | 11 | NVME_HFILES = \ 12 | include/libnvme/nvme.h \ 13 | include/libnvme/nvme_spec.h \ 14 | lib/common/nvme_arch.h \ 15 | lib/common/nvme_atomic.h \ 16 | lib/common/nvme_rwlock.h \ 17 | lib/common/nvme_spinlock.h \ 18 | lib/common/nvme_common.h \ 19 | lib/common/nvme_log.h \ 20 | lib/common/nvme_cpu.h \ 21 | lib/common/nvme_pci.h \ 22 | lib/common/nvme_mem.h 23 | 24 | AM_CPPFLAGS += -Ilib/nvme -Ilib/common 25 | 26 | libnvme_la_SOURCES = $(NVME_CFILES) $(NVME_HFILES) 27 | libnvme_la_CFLAGS = $(AM_CPPFLAGS) 28 | libnvme_la_LIBADD = libnvme_common.la 29 | libnvme_la_LDFLAGS = -lnuma -Wl,--version-script,$(top_srcdir)/exports \ 30 | -release '$(PACKAGE_VERSION)' 31 | 32 | -------------------------------------------------------------------------------- /lib/nvme/nvme.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright (c) Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include "nvme_internal.h" 35 | 36 | /* 37 | * List of open controllers and its lock. 38 | */ 39 | LIST_HEAD(, nvme_ctrlr) ctrlr_head = LIST_HEAD_INITIALIZER(ctrlr_head); 40 | static pthread_mutex_t ctrlr_lock = PTHREAD_MUTEX_INITIALIZER; 41 | 42 | /* 43 | * Search for an open controller. 44 | */ 45 | static struct nvme_ctrlr *nvme_ctrlr_get(struct nvme_ctrlr *ctrlr, 46 | bool remove) 47 | { 48 | struct nvme_ctrlr *c; 49 | 50 | pthread_mutex_lock(&ctrlr_lock); 51 | 52 | LIST_FOREACH(c, &ctrlr_head, link) { 53 | if (c == ctrlr) { 54 | if (remove) 55 | LIST_REMOVE(c, link); 56 | goto out; 57 | } 58 | } 59 | 60 | ctrlr = NULL; 61 | 62 | out: 63 | pthread_mutex_unlock(&ctrlr_lock); 64 | 65 | return ctrlr; 66 | } 67 | 68 | /* 69 | * Probe a pci device identified by its name. 70 | * Name should be in the form: [0000:]00:00.0 71 | * Return NULL if failed 72 | */ 73 | static struct pci_device *nvme_pci_ctrlr_probe(const char *slot_name) 74 | { 75 | char *domain = NULL, *bus = NULL, *dev = NULL, *func = NULL, *end = NULL; 76 | char *pciid = strdup(slot_name); 77 | struct pci_slot_match slot; 78 | struct pci_device *pci_dev = NULL; 79 | 80 | if (!pciid) 81 | return NULL; 82 | 83 | memset(&slot, 0, sizeof(struct pci_slot_match)); 84 | 85 | func = strrchr(pciid, '.'); 86 | if (func) { 87 | *func = '\0'; 88 | func++; 89 | } 90 | 91 | dev = strrchr(pciid, ':'); 92 | if (dev) { 93 | *dev = '\0'; 94 | dev++; 95 | } 96 | 97 | bus = strrchr(pciid, ':'); 98 | if (!bus) { 99 | domain = NULL; 100 | bus = pciid; 101 | } else { 102 | domain = pciid; 103 | *bus = '\0'; 104 | bus++; 105 | } 106 | 107 | if (!bus || !dev || !func) { 108 | nvme_err("Malformed PCI device slot name %s\n", 109 | slot_name); 110 | goto out; 111 | } 112 | 113 | if (domain) { 114 | slot.domain = (uint32_t)strtoul(domain, &end, 16); 115 | if ((end && *end) || (slot.domain > 0xffff)) { 116 | nvme_err("Invalid domain number: 0x%X\n", slot.domain); 117 | return NULL; 118 | } 119 | } else { 120 | slot.domain = PCI_MATCH_ANY; 121 | } 122 | 123 | slot.bus = (uint32_t)strtoul(bus, &end, 16); 124 | if ((end && *end) || (slot.bus > 0xff)) { 125 | nvme_err("Invalid bus number: 0x%X\n", slot.bus); 126 | return NULL; 127 | } 128 | 129 | slot.dev = strtoul(dev, &end, 16); 130 | if ((end && *end) || (slot.dev > 0x1f)) { 131 | nvme_err("Invalid device number: 0x%X\n", slot.dev); 132 | return NULL; 133 | } 134 | 135 | slot.func = strtoul(func, &end, 16); 136 | if ((end && *end) || (slot.func > 7)) { 137 | nvme_err("Invalid function number: 0x%X\n", slot.func); 138 | return NULL; 139 | } 140 | 141 | nvme_debug("PCI URL: domain 0x%X, bus 0x%X, dev 0x%X, func 0x%X\n", 142 | slot.domain, slot.bus, slot.dev, slot.func); 143 | 144 | pci_dev = nvme_pci_device_probe(&slot); 145 | if (pci_dev) { 146 | slot.domain = pci_dev->domain; 147 | if (slot.domain == PCI_MATCH_ANY) 148 | slot.domain = 0; 149 | nvme_info("Found NVMe controller %04x:%02x:%02x.%1u\n", 150 | slot.domain, 151 | slot.bus, 152 | slot.dev, 153 | slot.func); 154 | } 155 | 156 | out: 157 | free(pciid); 158 | 159 | return pci_dev; 160 | } 161 | 162 | /* 163 | * Open an NVMe controller. 164 | */ 165 | struct nvme_ctrlr *nvme_ctrlr_open(const char *url, 166 | struct nvme_ctrlr_opts *opts) 167 | { 168 | struct pci_device *pdev; 169 | struct nvme_ctrlr *ctrlr; 170 | char *slot; 171 | 172 | /* Check url */ 173 | if (strncmp(url, "pci://", 6) != 0) { 174 | nvme_err("Invalid URL %s\n", url); 175 | return NULL; 176 | } 177 | 178 | /* Probe PCI device */ 179 | slot = (char *)url + 6; 180 | pdev = nvme_pci_ctrlr_probe(slot); 181 | if (!pdev) { 182 | nvme_err("Device %s not found\n", url); 183 | return NULL; 184 | } 185 | 186 | pthread_mutex_lock(&ctrlr_lock); 187 | 188 | /* Verify that this controller is not already open */ 189 | LIST_FOREACH(ctrlr, &ctrlr_head, link) { 190 | if (nvme_pci_dev_cmp(ctrlr->pci_dev, pdev) == 0) { 191 | nvme_err("Controller already open\n"); 192 | ctrlr = NULL; 193 | goto out; 194 | } 195 | } 196 | 197 | /* Attach the device */ 198 | ctrlr = nvme_ctrlr_attach(pdev, opts); 199 | if (!ctrlr) { 200 | nvme_err("Attach %s failed\n", url); 201 | goto out; 202 | } 203 | 204 | /* Add controller to the list */ 205 | LIST_INSERT_HEAD(&ctrlr_head, ctrlr, link); 206 | 207 | out: 208 | pthread_mutex_unlock(&ctrlr_lock); 209 | 210 | return ctrlr; 211 | 212 | } 213 | 214 | /* 215 | * Close an open controller. 216 | */ 217 | int nvme_ctrlr_close(struct nvme_ctrlr *ctrlr) 218 | { 219 | 220 | /* 221 | * Verify that this controller is open. 222 | * If it is, remove it from the list. 223 | */ 224 | ctrlr = nvme_ctrlr_get(ctrlr, true); 225 | if (!ctrlr) { 226 | nvme_err("Invalid controller\n"); 227 | return -EINVAL; 228 | } 229 | 230 | nvme_ctrlr_detach(ctrlr); 231 | 232 | return 0; 233 | } 234 | 235 | /* 236 | * Get controller information. 237 | */ 238 | int nvme_ctrlr_stat(struct nvme_ctrlr *ctrlr, struct nvme_ctrlr_stat *cstat) 239 | { 240 | struct pci_device *pdev = ctrlr->pci_dev; 241 | unsigned int i; 242 | 243 | /* Verify that this controller is open */ 244 | ctrlr = nvme_ctrlr_get(ctrlr, false); 245 | if (!ctrlr) { 246 | nvme_err("Invalid controller\n"); 247 | return -EINVAL; 248 | } 249 | 250 | pthread_mutex_lock(&ctrlr->lock); 251 | 252 | memset(cstat, 0, sizeof(struct nvme_ctrlr_stat)); 253 | 254 | /* Controller serial and model number */ 255 | strncpy(cstat->sn, (char *)ctrlr->cdata.sn, 256 | NVME_SERIAL_NUMBER_LENGTH - 1); 257 | strncpy(cstat->mn, (char *)ctrlr->cdata.mn, 258 | NVME_MODEL_NUMBER_LENGTH - 1); 259 | 260 | /* Remove heading and trailling spaces */ 261 | nvme_str_trim(cstat->sn); 262 | nvme_str_trim(cstat->mn); 263 | 264 | /* PCI device info */ 265 | cstat->vendor_id = pdev->vendor_id; 266 | cstat->device_id = pdev->device_id; 267 | cstat->subvendor_id = pdev->subvendor_id; 268 | cstat->subdevice_id = pdev->subdevice_id; 269 | cstat->device_class = pdev->device_class; 270 | cstat->revision = pdev->revision; 271 | cstat->domain = pdev->domain; 272 | cstat->bus = pdev->bus; 273 | cstat->dev = pdev->dev; 274 | cstat->func = pdev->func; 275 | 276 | /* Maximum transfer size */ 277 | cstat->max_xfer_size = ctrlr->max_xfer_size; 278 | 279 | memcpy(&cstat->features, &ctrlr->feature_supported, 280 | sizeof(ctrlr->feature_supported)); 281 | memcpy(&cstat->log_pages, &ctrlr->log_page_supported, 282 | sizeof(ctrlr->log_page_supported)); 283 | 284 | cstat->nr_ns = ctrlr->nr_ns; 285 | for (i = 0; i < ctrlr->nr_ns; i++) { 286 | cstat->ns_ids[i] = i + 1; 287 | } 288 | 289 | /* Maximum io qpair possible */ 290 | cstat->max_io_qpairs = ctrlr->max_io_queues; 291 | 292 | /* Constructed io qpairs */ 293 | cstat->io_qpairs = ctrlr->io_queues; 294 | 295 | /* Enabled io qpairs */ 296 | cstat->enabled_io_qpairs = ctrlr->enabled_io_qpairs; 297 | 298 | /* Max queue depth */ 299 | cstat->max_qd = ctrlr->io_qpairs_max_entries; 300 | 301 | pthread_mutex_unlock(&ctrlr->lock); 302 | 303 | return 0; 304 | } 305 | 306 | /* 307 | * Get controller data 308 | */ 309 | int nvme_ctrlr_data(struct nvme_ctrlr *ctrlr, struct nvme_ctrlr_data *cdata, 310 | struct nvme_register_data *rdata) 311 | { 312 | union nvme_cap_register cap; 313 | 314 | /* Verify that this controller is open */ 315 | ctrlr = nvme_ctrlr_get(ctrlr, false); 316 | if (!ctrlr) { 317 | nvme_err("Invalid controller\n"); 318 | return -EINVAL; 319 | } 320 | 321 | pthread_mutex_lock(&ctrlr->lock); 322 | 323 | /* Controller data */ 324 | if (cdata) 325 | memcpy(cdata, &ctrlr->cdata, sizeof(struct nvme_ctrlr_data)); 326 | 327 | /* Read capabilities register */ 328 | if (rdata) { 329 | cap.raw = nvme_reg_mmio_read_8(ctrlr, cap.raw); 330 | rdata->mqes = cap.bits.mqes; 331 | } 332 | 333 | pthread_mutex_unlock(&ctrlr->lock); 334 | 335 | return 0; 336 | } 337 | 338 | /* 339 | * Get qpair information 340 | */ 341 | int nvme_qpair_stat(struct nvme_qpair *qpair, struct nvme_qpair_stat *qpstat) 342 | { 343 | struct nvme_ctrlr *ctrlr = qpair->ctrlr; 344 | 345 | /* Verify that this controller is open */ 346 | ctrlr = nvme_ctrlr_get(ctrlr, false); 347 | if (!ctrlr) { 348 | nvme_err("Invalid controller\n"); 349 | return -EINVAL; 350 | } 351 | 352 | pthread_mutex_lock(&ctrlr->lock); 353 | 354 | qpstat->id = qpair->id; 355 | qpstat->qd = qpair->entries; 356 | qpstat->enabled = qpair->enabled; 357 | qpstat->qprio = qpair->qprio; 358 | 359 | pthread_mutex_unlock(&ctrlr->lock); 360 | 361 | return 0; 362 | } 363 | 364 | /* 365 | * Close all open controllers on exit. 366 | */ 367 | void nvme_ctrlr_cleanup(void) 368 | { 369 | struct nvme_ctrlr *ctrlr; 370 | 371 | while ((ctrlr = LIST_FIRST(&ctrlr_head))) { 372 | LIST_REMOVE(ctrlr, link); 373 | nvme_ctrlr_detach(ctrlr); 374 | } 375 | } 376 | -------------------------------------------------------------------------------- /lib/nvme/nvme_admin.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright (c) Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in sourete and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of sourete code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include "nvme_internal.h" 35 | 36 | /* 37 | * Allocate a request, set its command and submit it 38 | * to the controller admin queue. 39 | */ 40 | static int nvme_admin_submit_cmd(struct nvme_ctrlr *ctrlr, 41 | struct nvme_cmd *cmd, 42 | void *buf, uint32_t len, 43 | nvme_cmd_cb cb_fn, void *cb_arg) 44 | { 45 | struct nvme_request *req; 46 | 47 | if (buf) 48 | req = nvme_request_allocate_contig(&ctrlr->adminq, buf, len, 49 | cb_fn, cb_arg); 50 | else 51 | req = nvme_request_allocate_null(&ctrlr->adminq, cb_fn, cb_arg); 52 | if (!req) 53 | return -ENOMEM; 54 | 55 | memcpy(&req->cmd, cmd, sizeof(req->cmd)); 56 | 57 | return nvme_qpair_submit_request(&ctrlr->adminq, req); 58 | } 59 | 60 | /* 61 | * Poll the controller admin queue waiting for a 62 | * command completion. 63 | */ 64 | static int nvme_admin_wait_cmd(struct nvme_ctrlr *ctrlr, 65 | struct nvme_completion_poll_status *status) 66 | { 67 | 68 | /* Wait for completion and check result */ 69 | while (status->done == false) 70 | nvme_qpair_poll(&ctrlr->adminq, 0); 71 | 72 | if (nvme_cpl_is_error(&status->cpl)) { 73 | nvme_notice("Admin command failed\n"); 74 | return -ENXIO; 75 | } 76 | 77 | return 0; 78 | } 79 | 80 | /* 81 | * Execute an admin command. 82 | */ 83 | static int nvme_admin_exec_cmd(struct nvme_ctrlr *ctrlr, 84 | struct nvme_cmd *cmd, 85 | void *buf, uint32_t len) 86 | { 87 | struct nvme_completion_poll_status status; 88 | int ret; 89 | 90 | /* Submit the command */ 91 | status.done = false; 92 | ret = nvme_admin_submit_cmd(ctrlr, cmd, buf, len, 93 | nvme_request_completion_poll_cb, 94 | &status); 95 | if (ret != 0) 96 | return ret; 97 | 98 | /* Wait for the command completion and check result */ 99 | return nvme_admin_wait_cmd(ctrlr, &status); 100 | } 101 | 102 | /* 103 | * Get a controller information. 104 | */ 105 | int nvme_admin_identify_ctrlr(struct nvme_ctrlr *ctrlr, 106 | struct nvme_ctrlr_data *cdata) 107 | { 108 | struct nvme_cmd cmd; 109 | 110 | /* Setup the command */ 111 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 112 | cmd.opc = NVME_OPC_IDENTIFY; 113 | cmd.cdw10 = NVME_IDENTIFY_CTRLR; 114 | 115 | /* Execute the command */ 116 | return nvme_admin_exec_cmd(ctrlr, &cmd, 117 | cdata, sizeof(struct nvme_ctrlr_data)); 118 | } 119 | 120 | /* 121 | * Get a controller feature. 122 | */ 123 | int nvme_admin_get_feature(struct nvme_ctrlr *ctrlr, 124 | enum nvme_feat_sel sel, 125 | enum nvme_feat feature, 126 | uint32_t cdw11, 127 | uint32_t *attributes) 128 | { 129 | struct nvme_completion_poll_status status; 130 | struct nvme_cmd cmd; 131 | int ret; 132 | 133 | /* Setup the command */ 134 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 135 | cmd.opc = NVME_OPC_GET_FEATURES; 136 | cmd.cdw10 = (sel << 8) | feature; 137 | cmd.cdw11 = cdw11; 138 | 139 | /* Submit the command */ 140 | status.done = false; 141 | ret = nvme_admin_submit_cmd(ctrlr, &cmd, NULL, 0, 142 | nvme_request_completion_poll_cb, 143 | &status); 144 | if (ret == 0) { 145 | /* Wait for the command completion and check result */ 146 | ret = nvme_admin_wait_cmd(ctrlr, &status); 147 | if (ret == 0 && attributes) 148 | *attributes = status.cpl.cdw0; 149 | } 150 | 151 | return ret; 152 | } 153 | 154 | /* 155 | * Set a feature. 156 | */ 157 | int nvme_admin_set_feature(struct nvme_ctrlr *ctrlr, 158 | bool save, 159 | enum nvme_feat feature, 160 | uint32_t cdw11, 161 | uint32_t cdw12, 162 | uint32_t *attributes) 163 | { 164 | struct nvme_completion_poll_status status; 165 | struct nvme_cmd cmd; 166 | int ret; 167 | 168 | /* Setup the command */ 169 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 170 | cmd.opc = NVME_OPC_SET_FEATURES; 171 | cmd.cdw10 = feature; 172 | if (save) 173 | cmd.cdw10 |= (1 << 31); 174 | cmd.cdw11 = cdw11; 175 | cmd.cdw12 = cdw12; 176 | 177 | /* Submit the command */ 178 | status.done = false; 179 | ret = nvme_admin_submit_cmd(ctrlr, &cmd, NULL, 0, 180 | nvme_request_completion_poll_cb, 181 | &status); 182 | if (ret == 0) { 183 | /* Wait for the command completion and check result */ 184 | ret = nvme_admin_wait_cmd(ctrlr, &status); 185 | if (ret == 0 && attributes) 186 | *attributes = status.cpl.cdw0; 187 | } 188 | 189 | return ret; 190 | } 191 | 192 | /* 193 | * Create an I/O queue. 194 | */ 195 | int nvme_admin_create_ioq(struct nvme_ctrlr *ctrlr, 196 | struct nvme_qpair *qpair, 197 | enum nvme_io_queue_type io_qtype) 198 | { 199 | struct nvme_cmd cmd; 200 | 201 | /* Setup the command */ 202 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 203 | switch(io_qtype) { 204 | case NVME_IO_SUBMISSION_QUEUE: 205 | cmd.opc = NVME_OPC_CREATE_IO_SQ; 206 | cmd.cdw11 = (qpair->id << 16) | (qpair->qprio << 1) | 0x1; 207 | cmd.dptr.prp.prp1 = qpair->cmd_bus_addr; 208 | break; 209 | case NVME_IO_COMPLETION_QUEUE: 210 | cmd.opc = NVME_OPC_CREATE_IO_CQ; 211 | cmd.cdw11 = 0x1; 212 | cmd.dptr.prp.prp1 = qpair->cpl_bus_addr; 213 | break; 214 | default: 215 | return -EINVAL; 216 | } 217 | 218 | cmd.cdw10 = ((qpair->entries - 1) << 16) | qpair->id; 219 | 220 | /* Execute the command */ 221 | return nvme_admin_exec_cmd(ctrlr, &cmd, NULL, 0); 222 | } 223 | 224 | /* 225 | * Delete an I/O queue. 226 | */ 227 | int nvme_admin_delete_ioq(struct nvme_ctrlr *ctrlr, 228 | struct nvme_qpair *qpair, 229 | enum nvme_io_queue_type io_qtype) 230 | { 231 | struct nvme_cmd cmd; 232 | 233 | /* Setup the command */ 234 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 235 | switch(io_qtype) { 236 | case NVME_IO_SUBMISSION_QUEUE: 237 | cmd.opc = NVME_OPC_DELETE_IO_SQ; 238 | break; 239 | case NVME_IO_COMPLETION_QUEUE: 240 | cmd.opc = NVME_OPC_DELETE_IO_CQ; 241 | break; 242 | default: 243 | return -EINVAL; 244 | } 245 | cmd.cdw10 = qpair->id; 246 | 247 | /* Execute the command */ 248 | return nvme_admin_exec_cmd(ctrlr, &cmd, NULL, 0); 249 | } 250 | 251 | /* 252 | * Get a namespace information. 253 | */ 254 | int nvme_admin_identify_ns(struct nvme_ctrlr *ctrlr, 255 | uint16_t nsid, 256 | struct nvme_ns_data *nsdata) 257 | { 258 | struct nvme_cmd cmd; 259 | 260 | /* Setup the command */ 261 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 262 | cmd.opc = NVME_OPC_IDENTIFY; 263 | cmd.cdw10 = NVME_IDENTIFY_NS; 264 | cmd.nsid = nsid; 265 | 266 | /* Execute the command */ 267 | return nvme_admin_exec_cmd(ctrlr, &cmd, 268 | nsdata, sizeof(struct nvme_ns_data)); 269 | } 270 | 271 | /* 272 | * Attach a namespace. 273 | */ 274 | int nvme_admin_attach_ns(struct nvme_ctrlr *ctrlr, 275 | uint32_t nsid, 276 | struct nvme_ctrlr_list *clist) 277 | { 278 | struct nvme_cmd cmd; 279 | 280 | /* Setup the command */ 281 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 282 | cmd.opc = NVME_OPC_NS_ATTACHMENT; 283 | cmd.nsid = nsid; 284 | cmd.cdw10 = NVME_NS_CTRLR_ATTACH; 285 | 286 | /* Execute the command */ 287 | return nvme_admin_exec_cmd(ctrlr, &cmd, 288 | clist, sizeof(struct nvme_ctrlr_list)); 289 | } 290 | 291 | /* 292 | * Detach a namespace. 293 | */ 294 | int nvme_admin_detach_ns(struct nvme_ctrlr *ctrlr, 295 | uint32_t nsid, 296 | struct nvme_ctrlr_list *clist) 297 | { 298 | struct nvme_cmd cmd; 299 | 300 | /* Setup the command */ 301 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 302 | cmd.opc = NVME_OPC_NS_ATTACHMENT; 303 | cmd.nsid = nsid; 304 | cmd.cdw10 = NVME_NS_CTRLR_DETACH; 305 | 306 | /* Execute the command */ 307 | return nvme_admin_exec_cmd(ctrlr, &cmd, 308 | clist, sizeof(struct nvme_ctrlr_list)); 309 | } 310 | 311 | /* 312 | * Create a namespace. 313 | */ 314 | int nvme_admin_create_ns(struct nvme_ctrlr *ctrlr, 315 | struct nvme_ns_data *nsdata, 316 | unsigned int *nsid) 317 | { 318 | struct nvme_completion_poll_status status; 319 | struct nvme_cmd cmd; 320 | int ret; 321 | 322 | /* Setup the command */ 323 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 324 | cmd.opc = NVME_OPC_NS_MANAGEMENT; 325 | cmd.cdw10 = NVME_NS_MANAGEMENT_CREATE; 326 | 327 | /* Submit the command */ 328 | status.done = false; 329 | ret = nvme_admin_submit_cmd(ctrlr, &cmd, 330 | nsdata, sizeof(struct nvme_ns_data), 331 | nvme_request_completion_poll_cb, 332 | &status); 333 | if (ret == 0) 334 | /* Wait for the command completion and check result */ 335 | ret = nvme_admin_wait_cmd(ctrlr, &status); 336 | 337 | if (ret != 0) 338 | return ret; 339 | 340 | *nsid = status.cpl.cdw0; 341 | 342 | return 0; 343 | } 344 | 345 | /* 346 | * Delete a namespace. 347 | */ 348 | int nvme_admin_delete_ns(struct nvme_ctrlr *ctrlr, 349 | unsigned int nsid) 350 | { 351 | struct nvme_cmd cmd; 352 | 353 | /* Setup the command */ 354 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 355 | cmd.opc = NVME_OPC_NS_MANAGEMENT; 356 | cmd.cdw10 = NVME_NS_MANAGEMENT_DELETE; 357 | cmd.nsid = nsid; 358 | 359 | /* Execute the command */ 360 | return nvme_admin_exec_cmd(ctrlr, &cmd, NULL, 0); 361 | } 362 | 363 | /* 364 | * Format media. 365 | * (entire device or just the specified namespace) 366 | */ 367 | int nvme_admin_format_nvm(struct nvme_ctrlr *ctrlr, 368 | unsigned int nsid, 369 | struct nvme_format *format) 370 | { 371 | struct nvme_cmd cmd; 372 | 373 | /* Setup the command */ 374 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 375 | cmd.opc = NVME_OPC_FORMAT_NVM; 376 | cmd.nsid = nsid; 377 | memcpy(&cmd.cdw10, format, sizeof(uint32_t)); 378 | 379 | /* Execute the command */ 380 | return nvme_admin_exec_cmd(ctrlr, &cmd, NULL, 0); 381 | } 382 | 383 | /* 384 | * Get a log page. 385 | */ 386 | int nvme_admin_get_log_page(struct nvme_ctrlr *ctrlr, 387 | uint8_t log_page, 388 | uint32_t nsid, 389 | void *payload, 390 | uint32_t payload_size) 391 | { 392 | struct nvme_cmd cmd; 393 | 394 | /* Setup the command */ 395 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 396 | cmd.opc = NVME_OPC_GET_LOG_PAGE; 397 | cmd.nsid = nsid; 398 | cmd.cdw10 = ((payload_size / sizeof(uint32_t)) - 1) << 16; 399 | cmd.cdw10 |= log_page; 400 | 401 | /* Execute the command */ 402 | return nvme_admin_exec_cmd(ctrlr, &cmd, payload, payload_size); 403 | } 404 | 405 | /* 406 | * Abort an admin or an I/O command. 407 | */ 408 | int nvme_admin_abort_cmd(struct nvme_ctrlr *ctrlr, 409 | uint16_t cid, uint16_t sqid) 410 | { 411 | struct nvme_cmd cmd; 412 | 413 | /* Setup the command */ 414 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 415 | cmd.opc = NVME_OPC_ABORT; 416 | cmd.cdw10 = (cid << 16) | sqid; 417 | 418 | /* Execute the command */ 419 | return nvme_admin_exec_cmd(ctrlr, &cmd, NULL, 0); 420 | } 421 | 422 | /* 423 | * Validate a FW. 424 | */ 425 | int nvme_admin_fw_commit(struct nvme_ctrlr *ctrlr, 426 | const struct nvme_fw_commit *fw_commit) 427 | { 428 | struct nvme_cmd cmd; 429 | 430 | /* Setup the command */ 431 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 432 | cmd.opc = NVME_OPC_FIRMWARE_COMMIT; 433 | memcpy(&cmd.cdw10, fw_commit, sizeof(uint32_t)); 434 | 435 | /* Execute the command */ 436 | return nvme_admin_exec_cmd(ctrlr, &cmd, NULL, 0); 437 | } 438 | 439 | /* 440 | * Download to the device a firmware. 441 | */ 442 | int nvme_admin_fw_image_dl(struct nvme_ctrlr *ctrlr, 443 | void *fw, uint32_t size, 444 | uint32_t offset) 445 | { 446 | struct nvme_cmd cmd; 447 | 448 | /* Setup the command */ 449 | memset(&cmd, 0, sizeof(struct nvme_cmd)); 450 | cmd.opc = NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD; 451 | cmd.cdw10 = (size >> 2) - 1; 452 | cmd.cdw11 = offset >> 2; 453 | 454 | /* Execute the command */ 455 | return nvme_admin_exec_cmd(ctrlr, &cmd, fw, size); 456 | } 457 | -------------------------------------------------------------------------------- /lib/nvme/nvme_intel.h: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright (c) Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | /* 35 | * Intel NVMe vendor-specific definitions 36 | * See: http://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/ssd-dc-p3700-spec.pdf 37 | */ 38 | 39 | #ifndef __NVME_INTEL_H__ 40 | #define __NVME_INTEL_H__ 41 | 42 | #include 43 | #include 44 | 45 | enum nvme_intel_feat { 46 | NVME_INTEL_FEAT_MAX_LBA = 0xC1, 47 | NVME_INTEL_FEAT_NATIVE_MAX_LBA = 0xC2, 48 | NVME_INTEL_FEAT_POWER_GOVERNOR_SETTING = 0xC6, 49 | NVME_INTEL_FEAT_SMBUS_ADDRESS = 0xC8, 50 | NVME_INTEL_FEAT_LED_PATTERN = 0xC9, 51 | NVME_INTEL_FEAT_RESET_TIMED_WORKLOAD_COUNTERS = 0xD5, 52 | NVME_INTEL_FEAT_LATENCY_TRACKING = 0xE2, 53 | }; 54 | 55 | enum nvme_intel_set_max_lba_command_status_code { 56 | NVME_INTEL_EXCEEDS_AVAILABLE_CAPACITY = 0xC0, 57 | NVME_INTEL_SMALLER_THAN_MIN_LIMIT = 0xC1, 58 | NVME_INTEL_SMALLER_THAN_NS_REQUIREMENTS = 0xC2, 59 | }; 60 | 61 | enum nvme_intel_log_page { 62 | NVME_INTEL_LOG_PAGE_DIR = 0xC0, 63 | NVME_INTEL_LOG_READ_CMD_LATENCY = 0xC1, 64 | NVME_INTEL_LOG_WRITE_CMD_LATENCY = 0xC2, 65 | NVME_INTEL_LOG_TEMPERATURE = 0xC5, 66 | NVME_INTEL_LOG_SMART = 0xCA, 67 | NVME_INTEL_MARKETING_DESCRIPTION = 0xDD, 68 | }; 69 | 70 | enum nvme_intel_smart_attribute_code { 71 | NVME_INTEL_SMART_PROGRAM_FAIL_COUNT = 0xAB, 72 | NVME_INTEL_SMART_ERASE_FAIL_COUNT = 0xAC, 73 | NVME_INTEL_SMART_WEAR_LEVELING_COUNT = 0xAD, 74 | NVME_INTEL_SMART_E2E_ERROR_COUNT = 0xB8, 75 | NVME_INTEL_SMART_CRC_ERROR_COUNT = 0xC7, 76 | NVME_INTEL_SMART_MEDIA_WEAR = 0xE2, 77 | NVME_INTEL_SMART_HOST_READ_PERCENTAGE = 0xE3, 78 | NVME_INTEL_SMART_TIMER = 0xE4, 79 | NVME_INTEL_SMART_THERMAL_THROTTLE_STATUS = 0xEA, 80 | NVME_INTEL_SMART_RETRY_BUFFER_OVERFLOW_COUNTER = 0xF0, 81 | NVME_INTEL_SMART_PLL_LOCK_LOSS_COUNT = 0xF3, 82 | NVME_INTEL_SMART_NAND_BYTES_WRITTEN = 0xF4, 83 | NVME_INTEL_SMART_HOST_BYTES_WRITTEN = 0xF5, 84 | }; 85 | 86 | struct nvme_intel_log_page_dir { 87 | uint8_t version[2]; 88 | uint8_t reserved[384]; 89 | uint8_t read_latency_log_len; 90 | uint8_t reserved2; 91 | uint8_t write_latency_log_len; 92 | uint8_t reserved3[5]; 93 | uint8_t temperature_statistics_log_len; 94 | uint8_t reserved4[9]; 95 | uint8_t smart_log_len; 96 | uint8_t reserved5[37]; 97 | uint8_t marketing_description_log_len; 98 | uint8_t reserved6[69]; 99 | }; 100 | nvme_static_assert(sizeof(struct nvme_intel_log_page_dir) == 512, 101 | "Incorrect size"); 102 | 103 | struct nvme_intel_rw_latency_page { 104 | uint16_t major_revison; 105 | uint16_t minor_revison; 106 | uint32_t buckets_32us[32]; 107 | uint32_t buckets_1ms[31]; 108 | uint32_t buckets_32ms[31]; 109 | }; 110 | nvme_static_assert(sizeof(struct nvme_intel_rw_latency_page) == 380, 111 | "Incorrect size"); 112 | 113 | struct nvme_intel_temperature_page { 114 | uint64_t current_temperature; 115 | uint64_t shutdown_flag_last; 116 | uint64_t shutdown_flag_life; 117 | uint64_t highest_temperature; 118 | uint64_t lowest_temperature; 119 | uint64_t reserved[5]; 120 | uint64_t specified_max_op_temperature; 121 | uint64_t reserved2; 122 | uint64_t specified_min_op_temperature; 123 | uint64_t estimated_offset; 124 | }; 125 | nvme_static_assert(sizeof(struct nvme_intel_temperature_page) == 112, 126 | "Incorrect size"); 127 | 128 | struct nvme_intel_smart_attribute { 129 | uint8_t code; 130 | uint8_t reserved[2]; 131 | uint8_t normalized_value; 132 | uint8_t reserved2; 133 | uint8_t raw_value[6]; 134 | uint8_t reserved3; 135 | }; 136 | 137 | struct __attribute__((packed)) nvme_intel_smart_information_page { 138 | struct nvme_intel_smart_attribute attributes[13]; 139 | }; 140 | nvme_static_assert(sizeof(struct nvme_intel_smart_information_page) == 156, 141 | "Incorrect size"); 142 | 143 | union nvme_intel_feat_power_governor { 144 | uint32_t raw; 145 | struct { 146 | /* Power governor setting: 00h = 25W 01h = 20W 02h = 10W */ 147 | uint32_t power_governor_setting : 8; 148 | uint32_t reserved : 24; 149 | } bits; 150 | }; 151 | nvme_static_assert(sizeof(union nvme_intel_feat_power_governor) == 4, 152 | "Incorrect size"); 153 | 154 | union nvme_intel_feat_smbus_address { 155 | uint32_t raw; 156 | struct { 157 | uint32_t reserved : 1; 158 | uint32_t smbus_controller_address : 8; 159 | uint32_t reserved2 : 23; 160 | } bits; 161 | }; 162 | nvme_static_assert(sizeof(union nvme_intel_feat_smbus_address) == 4, 163 | "Incorrect size"); 164 | 165 | union nvme_intel_feat_led_pattern { 166 | uint32_t raw; 167 | struct { 168 | uint32_t feature_options : 24; 169 | uint32_t value : 8; 170 | } bits; 171 | }; 172 | nvme_static_assert(sizeof(union nvme_intel_feat_led_pattern) == 4, 173 | "Incorrect size"); 174 | 175 | union nvme_intel_feat_reset_timed_workload_counters { 176 | uint32_t raw; 177 | struct { 178 | /* 179 | * Write Usage: 00 = NOP, 1 = Reset E2, E3,E4 counters; 180 | * Read Usage: Not Supported 181 | */ 182 | uint32_t reset : 1; 183 | uint32_t reserved : 31; 184 | } bits; 185 | }; 186 | nvme_static_assert(sizeof(union nvme_intel_feat_reset_timed_workload_counters) == 4, 187 | "Incorrect size"); 188 | 189 | union nvme_intel_feat_latency_tracking { 190 | uint32_t raw; 191 | struct { 192 | /* 193 | * Write Usage: 194 | * 00h = Disable Latency Tracking (Default) 195 | * 01h = Enable Latency Tracking 196 | */ 197 | uint32_t enable : 32; 198 | } bits; 199 | }; 200 | nvme_static_assert(sizeof(union nvme_intel_feat_latency_tracking) == 4, 201 | "Incorrect size"); 202 | 203 | struct nvme_intel_marketing_description_page { 204 | uint8_t marketing_product[512]; 205 | }; 206 | nvme_static_assert(sizeof(struct nvme_intel_marketing_description_page) == 512, 207 | "Incorrect size"); 208 | 209 | #endif /* __NVME_INTEL_H__ */ 210 | -------------------------------------------------------------------------------- /lib/nvme/nvme_quirks.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright (c) Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include "nvme_internal.h" 35 | 36 | struct nvme_quirk { 37 | struct pci_id id; 38 | unsigned int flags; 39 | }; 40 | 41 | static const struct nvme_quirk nvme_quirks[] = { 42 | { 43 | { NVME_PCI_VID_INTEL, 0x0953, NVME_PCI_VID_INTEL, 0x3702 }, 44 | NVME_INTEL_QUIRK_READ_LATENCY | NVME_INTEL_QUIRK_WRITE_LATENCY 45 | }, 46 | { 47 | { NVME_PCI_VID_INTEL, 0x0953, NVME_PCI_VID_INTEL, 0x3703 }, 48 | NVME_INTEL_QUIRK_READ_LATENCY | NVME_INTEL_QUIRK_WRITE_LATENCY 49 | }, 50 | { 51 | { NVME_PCI_VID_INTEL, 0x0953, NVME_PCI_VID_INTEL, 0x3704 }, 52 | NVME_INTEL_QUIRK_READ_LATENCY | NVME_INTEL_QUIRK_WRITE_LATENCY 53 | }, 54 | { 55 | { NVME_PCI_VID_INTEL, 0x0953, NVME_PCI_VID_INTEL, 0x3705 }, 56 | NVME_INTEL_QUIRK_READ_LATENCY | NVME_INTEL_QUIRK_WRITE_LATENCY 57 | }, 58 | { 59 | { NVME_PCI_VID_INTEL, 0x0953, NVME_PCI_VID_INTEL, 0x3709 }, 60 | NVME_INTEL_QUIRK_READ_LATENCY | NVME_INTEL_QUIRK_WRITE_LATENCY 61 | }, 62 | { 63 | { NVME_PCI_VID_INTEL, 0x0953, NVME_PCI_VID_INTEL, 0x370a }, 64 | NVME_INTEL_QUIRK_READ_LATENCY | NVME_INTEL_QUIRK_WRITE_LATENCY 65 | }, 66 | { 67 | { NVME_PCI_VID_MEMBLAZE, 0x0540, NVME_PCI_ANY_ID, NVME_PCI_ANY_ID }, 68 | NVME_QUIRK_DELAY_BEFORE_CHK_RDY 69 | }, 70 | { 71 | { NVME_PCI_VID_INTEL, 0x0953, NVME_PCI_VID_INTEL, 0x370d }, 72 | NVME_QUIRK_DELAY_AFTER_RDY 73 | }, 74 | { 75 | { 0x0000, 0x0000, 0x0000, 0x0000 }, 76 | 0 77 | } 78 | }; 79 | 80 | /* 81 | * Compare each field. NVME_PCI_ANY_ID in s1 matches everything. 82 | */ 83 | static bool nvme_quirks_pci_id_match(const struct pci_id *id, 84 | struct pci_device *pdev) 85 | { 86 | if ((id->vendor_id == NVME_PCI_ANY_ID || 87 | id->vendor_id == pdev->vendor_id) && 88 | (id->device_id == NVME_PCI_ANY_ID || 89 | id->device_id == pdev->device_id) && 90 | (id->subvendor_id == NVME_PCI_ANY_ID || 91 | id->subvendor_id == pdev->subvendor_id) && 92 | (id->subdevice_id == NVME_PCI_ANY_ID || 93 | id->subdevice_id == pdev->subdevice_id)) 94 | return true; 95 | 96 | return false; 97 | } 98 | 99 | unsigned int nvme_ctrlr_get_quirks(struct pci_device *pdev) 100 | { 101 | const struct nvme_quirk *quirk = nvme_quirks; 102 | 103 | while (quirk->id.vendor_id) { 104 | if (nvme_quirks_pci_id_match(&quirk->id, pdev)) 105 | return quirk->flags; 106 | quirk++; 107 | } 108 | 109 | return 0; 110 | } 111 | -------------------------------------------------------------------------------- /lib/nvme/nvme_request.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * BSD LICENSE 3 | * 4 | * Copyright (c) Intel Corporation. All rights reserved. 5 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions 9 | * are met: 10 | * 11 | * * Redistributions of source code must retain the above copyright 12 | * notice, this list of conditions and the following disclaimer. 13 | * * Redistributions in binary form must reproduce the above copyright 14 | * notice, this list of conditions and the following disclaimer in 15 | * the documentation and/or other materials provided with the 16 | * distribution. 17 | * * Neither the name of Intel Corporation nor the names of its 18 | * contributors may be used to endorse or promote products derived 19 | * from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | */ 33 | 34 | #include "nvme_internal.h" 35 | 36 | /* 37 | * Allocate a request descriptor from the queue pair free list. 38 | */ 39 | static struct nvme_request *nvme_alloc_request(struct nvme_qpair *qpair) 40 | { 41 | struct nvme_request *req; 42 | 43 | req = STAILQ_FIRST(&qpair->free_req); 44 | if (req) { 45 | STAILQ_REMOVE_HEAD(&qpair->free_req, stailq); 46 | memset(&req->cmd, 0, sizeof(struct nvme_cmd)); 47 | } 48 | 49 | return req; 50 | } 51 | 52 | static void nvme_request_cb_complete_child(void *child_arg, 53 | const struct nvme_cpl *cpl) 54 | { 55 | struct nvme_request *child = child_arg; 56 | struct nvme_request *parent = child->parent; 57 | 58 | nvme_request_remove_child(parent, child); 59 | 60 | if (nvme_cpl_is_error(cpl)) 61 | memcpy(&parent->parent_status, cpl, sizeof(*cpl)); 62 | 63 | if (parent->child_reqs == 0) { 64 | if (parent->cb_fn) 65 | parent->cb_fn(parent->cb_arg, &parent->parent_status); 66 | nvme_request_free(parent); 67 | } 68 | } 69 | 70 | void nvme_request_completion_poll_cb(void *arg, const struct nvme_cpl *cpl) 71 | { 72 | struct nvme_completion_poll_status *status = arg; 73 | 74 | memcpy(&status->cpl, cpl, sizeof(*cpl)); 75 | status->done = true; 76 | } 77 | 78 | int nvme_request_pool_construct(struct nvme_qpair *qpair) 79 | { 80 | struct nvme_request *req; 81 | unsigned int i; 82 | 83 | qpair->num_reqs = qpair->trackers * NVME_IO_ENTRIES_VS_TRACKERS_RATIO; 84 | qpair->reqs = calloc(qpair->num_reqs, sizeof(struct nvme_request)); 85 | if (!qpair->reqs) { 86 | nvme_err("QPair %d: allocate %u requests failed\n", 87 | (int)qpair->id, qpair->num_reqs); 88 | return -ENOMEM; 89 | } 90 | 91 | nvme_info("QPair %d: %d requests in pool\n", 92 | (int)qpair->id, 93 | (int)qpair->num_reqs); 94 | 95 | for(i = 0; i < qpair->num_reqs; i++) { 96 | req = &qpair->reqs[i]; 97 | req->qpair = qpair; 98 | STAILQ_INSERT_TAIL(&qpair->free_req, req, stailq); 99 | req++; 100 | } 101 | 102 | return 0; 103 | } 104 | 105 | void nvme_request_pool_destroy(struct nvme_qpair *qpair) 106 | { 107 | struct nvme_request *req; 108 | unsigned int n = 0; 109 | 110 | while ((req = STAILQ_FIRST(&qpair->free_req))) { 111 | STAILQ_REMOVE_HEAD(&qpair->free_req, stailq); 112 | n++; 113 | } 114 | 115 | if (n != qpair->num_reqs) 116 | nvme_err("QPair %d: Freed %d/%d requests\n", 117 | (int)qpair->id, n, (int)qpair->num_reqs); 118 | 119 | free(qpair->reqs); 120 | } 121 | 122 | struct nvme_request *nvme_request_allocate(struct nvme_qpair *qpair, 123 | const struct nvme_payload *payload, 124 | uint32_t payload_size, 125 | nvme_cmd_cb cb_fn, 126 | void *cb_arg) 127 | { 128 | struct nvme_request *req; 129 | 130 | req = nvme_alloc_request(qpair); 131 | if (req == NULL) 132 | return NULL; 133 | 134 | /* 135 | * Only memset up to (but not including) the children TAILQ_ENTRY. 136 | * Children, and following members, are only used as part of I/O 137 | * splitting so we avoid memsetting them until it is actually needed. 138 | * They will be initialized in nvme_request_add_child() 139 | * if the request is split. 140 | */ 141 | memset(req, 0, offsetof(struct nvme_request, children)); 142 | req->cb_fn = cb_fn; 143 | req->cb_arg = cb_arg; 144 | req->payload = *payload; 145 | req->payload_size = payload_size; 146 | 147 | return req; 148 | } 149 | 150 | struct nvme_request *nvme_request_allocate_contig(struct nvme_qpair *qpair, 151 | void *buffer, 152 | uint32_t payload_size, 153 | nvme_cmd_cb cb_fn, 154 | void *cb_arg) 155 | { 156 | struct nvme_payload payload; 157 | 158 | payload.type = NVME_PAYLOAD_TYPE_CONTIG; 159 | payload.u.contig = buffer; 160 | payload.md = NULL; 161 | 162 | return nvme_request_allocate(qpair, &payload, payload_size, 163 | cb_fn, cb_arg); 164 | } 165 | 166 | struct nvme_request *nvme_request_allocate_null(struct nvme_qpair *qpair, 167 | nvme_cmd_cb cb_fn, void *cb_arg) 168 | { 169 | return nvme_request_allocate_contig(qpair, NULL, 0, cb_fn, cb_arg); 170 | } 171 | 172 | void nvme_request_free(struct nvme_request *req) 173 | { 174 | struct nvme_qpair *qpair = req->qpair; 175 | 176 | nvme_assert(req->child_reqs == 0, "Number of child request not 0\n"); 177 | 178 | STAILQ_INSERT_HEAD(&qpair->free_req, req, stailq); 179 | } 180 | 181 | void nvme_request_add_child(struct nvme_request *parent, 182 | struct nvme_request *child) 183 | { 184 | if (parent->child_reqs == 0) { 185 | /* 186 | * Defer initialization of the children TAILQ since it falls 187 | * on a separate cacheline. This ensures we do not touch this 188 | * cacheline except on request splitting cases, which are 189 | * relatively rare. 190 | */ 191 | TAILQ_INIT(&parent->children); 192 | parent->parent = NULL; 193 | memset(&parent->parent_status, 0, sizeof(struct nvme_cpl)); 194 | } 195 | 196 | parent->child_reqs++; 197 | TAILQ_INSERT_TAIL(&parent->children, child, child_tailq); 198 | child->parent = parent; 199 | child->cb_fn = nvme_request_cb_complete_child; 200 | child->cb_arg = child; 201 | } 202 | 203 | void nvme_request_remove_child(struct nvme_request *parent, 204 | struct nvme_request *child) 205 | { 206 | nvme_assert(child->parent == parent, "child->parent != parent\n"); 207 | nvme_assert(parent->child_reqs != 0, "child_reqs is 0\n"); 208 | 209 | parent->child_reqs--; 210 | TAILQ_REMOVE(&parent->children, child, child_tailq); 211 | } 212 | -------------------------------------------------------------------------------- /libnvme.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@prefix@ 2 | exec_prefix=@exec_prefix@ 3 | libdir=@libdir@ 4 | includedir=@includedir@ 5 | 6 | Name: libnvme 7 | Description: A library implementing a user level NVMe driver 8 | Version: @PACKAGE_VERSION@ 9 | Cflags: -I${includedir} 10 | Libs: -L${libdir} -lnvme 11 | 12 | -------------------------------------------------------------------------------- /m4/acx_pthread.m4: -------------------------------------------------------------------------------- 1 | dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) 2 | dnl 3 | dnl @summary figure out how to build C programs using POSIX threads 4 | dnl 5 | dnl This macro figures out how to build C programs using POSIX threads. 6 | dnl It sets the PTHREAD_LIBS output variable to the threads library and 7 | dnl linker flags, and the PTHREAD_CFLAGS output variable to any special 8 | dnl C compiler flags that are needed. (The user can also force certain 9 | dnl compiler flags/libs to be tested by setting these environment 10 | dnl variables.) 11 | dnl 12 | dnl Also sets PTHREAD_CC to any special C compiler that is needed for 13 | dnl multi-threaded programs (defaults to the value of CC otherwise). 14 | dnl (This is necessary on AIX to use the special cc_r compiler alias.) 15 | dnl 16 | dnl NOTE: You are assumed to not only compile your program with these 17 | dnl flags, but also link it with them as well. e.g. you should link 18 | dnl with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS 19 | dnl $LIBS 20 | dnl 21 | dnl If you are only building threads programs, you may wish to use 22 | dnl these variables in your default LIBS, CFLAGS, and CC: 23 | dnl 24 | dnl LIBS="$PTHREAD_LIBS $LIBS" 25 | dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 26 | dnl CC="$PTHREAD_CC" 27 | dnl 28 | dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute 29 | dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to 30 | dnl that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). 31 | dnl 32 | dnl ACTION-IF-FOUND is a list of shell commands to run if a threads 33 | dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands to 34 | dnl run it if it is not found. If ACTION-IF-FOUND is not specified, the 35 | dnl default action will define HAVE_PTHREAD. 36 | dnl 37 | dnl Please let the authors know if this macro fails on any platform, or 38 | dnl if you have any other suggestions or comments. This macro was based 39 | dnl on work by SGJ on autoconf scripts for FFTW (www.fftw.org) (with 40 | dnl help from M. Frigo), as well as ac_pthread and hb_pthread macros 41 | dnl posted by Alejandro Forero Cuervo to the autoconf macro repository. 42 | dnl We are also grateful for the helpful feedback of numerous users. 43 | dnl 44 | dnl @category InstalledPackages 45 | dnl @author Steven G. Johnson 46 | dnl @version 2006-05-29 47 | dnl @license GPLWithACException 48 | 49 | AC_DEFUN([ACX_PTHREAD], [ 50 | AC_REQUIRE([AC_CANONICAL_HOST]) 51 | AC_LANG_SAVE 52 | AC_LANG_C 53 | acx_pthread_ok=no 54 | 55 | # We used to check for pthread.h first, but this fails if pthread.h 56 | # requires special compiler flags (e.g. on True64 or Sequent). 57 | # It gets checked for in the link test anyway. 58 | 59 | # First of all, check if the user has set any of the PTHREAD_LIBS, 60 | # etcetera environment variables, and if threads linking works using 61 | # them: 62 | if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then 63 | save_CFLAGS="$CFLAGS" 64 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 65 | save_LIBS="$LIBS" 66 | LIBS="$PTHREAD_LIBS $LIBS" 67 | AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) 68 | AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) 69 | AC_MSG_RESULT($acx_pthread_ok) 70 | if test x"$acx_pthread_ok" = xno; then 71 | PTHREAD_LIBS="" 72 | PTHREAD_CFLAGS="" 73 | fi 74 | LIBS="$save_LIBS" 75 | CFLAGS="$save_CFLAGS" 76 | fi 77 | 78 | # We must check for the threads library under a number of different 79 | # names; the ordering is very important because some systems 80 | # (e.g. DEC) have both -lpthread and -lpthreads, where one of the 81 | # libraries is broken (non-POSIX). 82 | 83 | # Create a list of thread flags to try. Items starting with a "-" are 84 | # C compiler flags, and other items are library names, except for "none" 85 | # which indicates that we try without any flags at all, and "pthread-config" 86 | # which is a program returning the flags for the Pth emulation library. 87 | 88 | acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" 89 | 90 | # The ordering *is* (sometimes) important. Some notes on the 91 | # individual items follow: 92 | 93 | # pthreads: AIX (must check this before -lpthread) 94 | # none: in case threads are in libc; should be tried before -Kthread and 95 | # other compiler flags to prevent continual compiler warnings 96 | # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) 97 | # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) 98 | # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) 99 | # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) 100 | # -pthreads: Solaris/gcc 101 | # -mthreads: Mingw32/gcc, Lynx/gcc 102 | # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it 103 | # doesn't hurt to check since this sometimes defines pthreads too; 104 | # also defines -D_REENTRANT) 105 | # ... -mt is also the pthreads flag for HP/aCC 106 | # pthread: Linux, etcetera 107 | # --thread-safe: KAI C++ 108 | # pthread-config: use pthread-config program (for GNU Pth library) 109 | 110 | case "${host_cpu}-${host_os}" in 111 | *solaris*) 112 | 113 | # On Solaris (at least, for some versions), libc contains stubbed 114 | # (non-functional) versions of the pthreads routines, so link-based 115 | # tests will erroneously succeed. (We need to link with -pthreads/-mt/ 116 | # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather 117 | # a function called by this macro, so we could check for that, but 118 | # who knows whether they'll stub that too in a future libc.) So, 119 | # we'll just look for -pthreads and -lpthread first: 120 | 121 | acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" 122 | ;; 123 | esac 124 | 125 | if test x"$acx_pthread_ok" = xno; then 126 | for flag in $acx_pthread_flags; do 127 | 128 | case $flag in 129 | none) 130 | AC_MSG_CHECKING([whether pthreads work without any flags]) 131 | ;; 132 | 133 | -*) 134 | AC_MSG_CHECKING([whether pthreads work with $flag]) 135 | PTHREAD_CFLAGS="$flag" 136 | ;; 137 | 138 | pthread-config) 139 | AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no) 140 | if test x"$acx_pthread_config" = xno; then continue; fi 141 | PTHREAD_CFLAGS="`pthread-config --cflags`" 142 | PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" 143 | ;; 144 | 145 | *) 146 | AC_MSG_CHECKING([for the pthreads library -l$flag]) 147 | PTHREAD_LIBS="-l$flag" 148 | ;; 149 | esac 150 | 151 | save_LIBS="$LIBS" 152 | save_CFLAGS="$CFLAGS" 153 | LIBS="$PTHREAD_LIBS $LIBS" 154 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 155 | 156 | # Check for various functions. We must include pthread.h, 157 | # since some functions may be macros. (On the Sequent, we 158 | # need a special flag -Kthread to make this header compile.) 159 | # We check for pthread_join because it is in -lpthread on IRIX 160 | # while pthread_create is in libc. We check for pthread_attr_init 161 | # due to DEC craziness with -lpthreads. We check for 162 | # pthread_cleanup_push because it is one of the few pthread 163 | # functions on Solaris that doesn't have a non-functional libc stub. 164 | # We try pthread_create on general principles. 165 | AC_TRY_LINK([#include ], 166 | [pthread_t th; pthread_join(th, 0); 167 | pthread_attr_init(0); pthread_cleanup_push(0, 0); 168 | pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], 169 | [acx_pthread_ok=yes]) 170 | 171 | LIBS="$save_LIBS" 172 | CFLAGS="$save_CFLAGS" 173 | 174 | AC_MSG_RESULT($acx_pthread_ok) 175 | if test "x$acx_pthread_ok" = xyes; then 176 | break; 177 | fi 178 | 179 | PTHREAD_LIBS="" 180 | PTHREAD_CFLAGS="" 181 | done 182 | fi 183 | 184 | # Various other checks: 185 | if test "x$acx_pthread_ok" = xyes; then 186 | save_LIBS="$LIBS" 187 | LIBS="$PTHREAD_LIBS $LIBS" 188 | save_CFLAGS="$CFLAGS" 189 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS" 190 | 191 | # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. 192 | AC_MSG_CHECKING([for joinable pthread attribute]) 193 | attr_name=unknown 194 | for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do 195 | AC_TRY_LINK([#include ], [int attr=$attr; return attr;], 196 | [attr_name=$attr; break]) 197 | done 198 | AC_MSG_RESULT($attr_name) 199 | if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then 200 | AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, 201 | [Define to necessary symbol if this constant 202 | uses a non-standard name on your system.]) 203 | fi 204 | 205 | AC_MSG_CHECKING([if more special flags are required for pthreads]) 206 | flag=no 207 | case "${host_cpu}-${host_os}" in 208 | *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";; 209 | *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";; 210 | esac 211 | AC_MSG_RESULT(${flag}) 212 | if test "x$flag" != xno; then 213 | PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" 214 | fi 215 | 216 | LIBS="$save_LIBS" 217 | CFLAGS="$save_CFLAGS" 218 | 219 | # More AIX lossage: must compile with xlc_r or cc_r 220 | if test x"$GCC" != xyes; then 221 | AC_CHECK_PROGS(PTHREAD_CC, xlc_r cc_r, ${CC}) 222 | else 223 | PTHREAD_CC=$CC 224 | fi 225 | else 226 | PTHREAD_CC="$CC" 227 | fi 228 | 229 | AC_SUBST(PTHREAD_LIBS) 230 | AC_SUBST(PTHREAD_CFLAGS) 231 | AC_SUBST(PTHREAD_CC) 232 | 233 | # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: 234 | if test x"$acx_pthread_ok" = xyes; then 235 | ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) 236 | : 237 | else 238 | acx_pthread_ok=no 239 | $2 240 | fi 241 | AC_LANG_RESTORE 242 | ])dnl ACX_PTHREAD 243 | -------------------------------------------------------------------------------- /m4/dontremove: -------------------------------------------------------------------------------- 1 | the only purpose of this file is to keep the m4 directory in git 2 | -------------------------------------------------------------------------------- /m4/ltoptions.m4: -------------------------------------------------------------------------------- 1 | # Helper functions for option handling. -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004-2005, 2007-2009, 2011-2015 Free Software 4 | # Foundation, Inc. 5 | # Written by Gary V. Vaughan, 2004 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 8 ltoptions.m4 12 | 13 | # This is to help aclocal find these macros, as it can't see m4_define. 14 | AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])]) 15 | 16 | 17 | # _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME) 18 | # ------------------------------------------ 19 | m4_define([_LT_MANGLE_OPTION], 20 | [[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])]) 21 | 22 | 23 | # _LT_SET_OPTION(MACRO-NAME, OPTION-NAME) 24 | # --------------------------------------- 25 | # Set option OPTION-NAME for macro MACRO-NAME, and if there is a 26 | # matching handler defined, dispatch to it. Other OPTION-NAMEs are 27 | # saved as a flag. 28 | m4_define([_LT_SET_OPTION], 29 | [m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl 30 | m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]), 31 | _LT_MANGLE_DEFUN([$1], [$2]), 32 | [m4_warning([Unknown $1 option '$2'])])[]dnl 33 | ]) 34 | 35 | 36 | # _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET]) 37 | # ------------------------------------------------------------ 38 | # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. 39 | m4_define([_LT_IF_OPTION], 40 | [m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])]) 41 | 42 | 43 | # _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET) 44 | # ------------------------------------------------------- 45 | # Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME 46 | # are set. 47 | m4_define([_LT_UNLESS_OPTIONS], 48 | [m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), 49 | [m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option), 50 | [m4_define([$0_found])])])[]dnl 51 | m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3 52 | ])[]dnl 53 | ]) 54 | 55 | 56 | # _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST) 57 | # ---------------------------------------- 58 | # OPTION-LIST is a space-separated list of Libtool options associated 59 | # with MACRO-NAME. If any OPTION has a matching handler declared with 60 | # LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about 61 | # the unknown option and exit. 62 | m4_defun([_LT_SET_OPTIONS], 63 | [# Set options 64 | m4_foreach([_LT_Option], m4_split(m4_normalize([$2])), 65 | [_LT_SET_OPTION([$1], _LT_Option)]) 66 | 67 | m4_if([$1],[LT_INIT],[ 68 | dnl 69 | dnl Simply set some default values (i.e off) if boolean options were not 70 | dnl specified: 71 | _LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no 72 | ]) 73 | _LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no 74 | ]) 75 | dnl 76 | dnl If no reference was made to various pairs of opposing options, then 77 | dnl we run the default mode handler for the pair. For example, if neither 78 | dnl 'shared' nor 'disable-shared' was passed, we enable building of shared 79 | dnl archives by default: 80 | _LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED]) 81 | _LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC]) 82 | _LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC]) 83 | _LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install], 84 | [_LT_ENABLE_FAST_INSTALL]) 85 | _LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4], 86 | [_LT_WITH_AIX_SONAME([aix])]) 87 | ]) 88 | ])# _LT_SET_OPTIONS 89 | 90 | 91 | ## --------------------------------- ## 92 | ## Macros to handle LT_INIT options. ## 93 | ## --------------------------------- ## 94 | 95 | # _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME) 96 | # ----------------------------------------- 97 | m4_define([_LT_MANGLE_DEFUN], 98 | [[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])]) 99 | 100 | 101 | # LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE) 102 | # ----------------------------------------------- 103 | m4_define([LT_OPTION_DEFINE], 104 | [m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl 105 | ])# LT_OPTION_DEFINE 106 | 107 | 108 | # dlopen 109 | # ------ 110 | LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes 111 | ]) 112 | 113 | AU_DEFUN([AC_LIBTOOL_DLOPEN], 114 | [_LT_SET_OPTION([LT_INIT], [dlopen]) 115 | AC_DIAGNOSE([obsolete], 116 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 117 | put the 'dlopen' option into LT_INIT's first parameter.]) 118 | ]) 119 | 120 | dnl aclocal-1.4 backwards compatibility: 121 | dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], []) 122 | 123 | 124 | # win32-dll 125 | # --------- 126 | # Declare package support for building win32 dll's. 127 | LT_OPTION_DEFINE([LT_INIT], [win32-dll], 128 | [enable_win32_dll=yes 129 | 130 | case $host in 131 | *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*) 132 | AC_CHECK_TOOL(AS, as, false) 133 | AC_CHECK_TOOL(DLLTOOL, dlltool, false) 134 | AC_CHECK_TOOL(OBJDUMP, objdump, false) 135 | ;; 136 | esac 137 | 138 | test -z "$AS" && AS=as 139 | _LT_DECL([], [AS], [1], [Assembler program])dnl 140 | 141 | test -z "$DLLTOOL" && DLLTOOL=dlltool 142 | _LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl 143 | 144 | test -z "$OBJDUMP" && OBJDUMP=objdump 145 | _LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl 146 | ])# win32-dll 147 | 148 | AU_DEFUN([AC_LIBTOOL_WIN32_DLL], 149 | [AC_REQUIRE([AC_CANONICAL_HOST])dnl 150 | _LT_SET_OPTION([LT_INIT], [win32-dll]) 151 | AC_DIAGNOSE([obsolete], 152 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 153 | put the 'win32-dll' option into LT_INIT's first parameter.]) 154 | ]) 155 | 156 | dnl aclocal-1.4 backwards compatibility: 157 | dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], []) 158 | 159 | 160 | # _LT_ENABLE_SHARED([DEFAULT]) 161 | # ---------------------------- 162 | # implement the --enable-shared flag, and supports the 'shared' and 163 | # 'disable-shared' LT_INIT options. 164 | # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. 165 | m4_define([_LT_ENABLE_SHARED], 166 | [m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl 167 | AC_ARG_ENABLE([shared], 168 | [AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@], 169 | [build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])], 170 | [p=${PACKAGE-default} 171 | case $enableval in 172 | yes) enable_shared=yes ;; 173 | no) enable_shared=no ;; 174 | *) 175 | enable_shared=no 176 | # Look at the argument we got. We use all the common list separators. 177 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 178 | for pkg in $enableval; do 179 | IFS=$lt_save_ifs 180 | if test "X$pkg" = "X$p"; then 181 | enable_shared=yes 182 | fi 183 | done 184 | IFS=$lt_save_ifs 185 | ;; 186 | esac], 187 | [enable_shared=]_LT_ENABLE_SHARED_DEFAULT) 188 | 189 | _LT_DECL([build_libtool_libs], [enable_shared], [0], 190 | [Whether or not to build shared libraries]) 191 | ])# _LT_ENABLE_SHARED 192 | 193 | LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])]) 194 | LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])]) 195 | 196 | # Old names: 197 | AC_DEFUN([AC_ENABLE_SHARED], 198 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared]) 199 | ]) 200 | 201 | AC_DEFUN([AC_DISABLE_SHARED], 202 | [_LT_SET_OPTION([LT_INIT], [disable-shared]) 203 | ]) 204 | 205 | AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) 206 | AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) 207 | 208 | dnl aclocal-1.4 backwards compatibility: 209 | dnl AC_DEFUN([AM_ENABLE_SHARED], []) 210 | dnl AC_DEFUN([AM_DISABLE_SHARED], []) 211 | 212 | 213 | 214 | # _LT_ENABLE_STATIC([DEFAULT]) 215 | # ---------------------------- 216 | # implement the --enable-static flag, and support the 'static' and 217 | # 'disable-static' LT_INIT options. 218 | # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. 219 | m4_define([_LT_ENABLE_STATIC], 220 | [m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl 221 | AC_ARG_ENABLE([static], 222 | [AS_HELP_STRING([--enable-static@<:@=PKGS@:>@], 223 | [build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])], 224 | [p=${PACKAGE-default} 225 | case $enableval in 226 | yes) enable_static=yes ;; 227 | no) enable_static=no ;; 228 | *) 229 | enable_static=no 230 | # Look at the argument we got. We use all the common list separators. 231 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 232 | for pkg in $enableval; do 233 | IFS=$lt_save_ifs 234 | if test "X$pkg" = "X$p"; then 235 | enable_static=yes 236 | fi 237 | done 238 | IFS=$lt_save_ifs 239 | ;; 240 | esac], 241 | [enable_static=]_LT_ENABLE_STATIC_DEFAULT) 242 | 243 | _LT_DECL([build_old_libs], [enable_static], [0], 244 | [Whether or not to build static libraries]) 245 | ])# _LT_ENABLE_STATIC 246 | 247 | LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])]) 248 | LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])]) 249 | 250 | # Old names: 251 | AC_DEFUN([AC_ENABLE_STATIC], 252 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static]) 253 | ]) 254 | 255 | AC_DEFUN([AC_DISABLE_STATIC], 256 | [_LT_SET_OPTION([LT_INIT], [disable-static]) 257 | ]) 258 | 259 | AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) 260 | AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) 261 | 262 | dnl aclocal-1.4 backwards compatibility: 263 | dnl AC_DEFUN([AM_ENABLE_STATIC], []) 264 | dnl AC_DEFUN([AM_DISABLE_STATIC], []) 265 | 266 | 267 | 268 | # _LT_ENABLE_FAST_INSTALL([DEFAULT]) 269 | # ---------------------------------- 270 | # implement the --enable-fast-install flag, and support the 'fast-install' 271 | # and 'disable-fast-install' LT_INIT options. 272 | # DEFAULT is either 'yes' or 'no'. If omitted, it defaults to 'yes'. 273 | m4_define([_LT_ENABLE_FAST_INSTALL], 274 | [m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl 275 | AC_ARG_ENABLE([fast-install], 276 | [AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], 277 | [optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], 278 | [p=${PACKAGE-default} 279 | case $enableval in 280 | yes) enable_fast_install=yes ;; 281 | no) enable_fast_install=no ;; 282 | *) 283 | enable_fast_install=no 284 | # Look at the argument we got. We use all the common list separators. 285 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 286 | for pkg in $enableval; do 287 | IFS=$lt_save_ifs 288 | if test "X$pkg" = "X$p"; then 289 | enable_fast_install=yes 290 | fi 291 | done 292 | IFS=$lt_save_ifs 293 | ;; 294 | esac], 295 | [enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT) 296 | 297 | _LT_DECL([fast_install], [enable_fast_install], [0], 298 | [Whether or not to optimize for fast installation])dnl 299 | ])# _LT_ENABLE_FAST_INSTALL 300 | 301 | LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])]) 302 | LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])]) 303 | 304 | # Old names: 305 | AU_DEFUN([AC_ENABLE_FAST_INSTALL], 306 | [_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install]) 307 | AC_DIAGNOSE([obsolete], 308 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put 309 | the 'fast-install' option into LT_INIT's first parameter.]) 310 | ]) 311 | 312 | AU_DEFUN([AC_DISABLE_FAST_INSTALL], 313 | [_LT_SET_OPTION([LT_INIT], [disable-fast-install]) 314 | AC_DIAGNOSE([obsolete], 315 | [$0: Remove this warning and the call to _LT_SET_OPTION when you put 316 | the 'disable-fast-install' option into LT_INIT's first parameter.]) 317 | ]) 318 | 319 | dnl aclocal-1.4 backwards compatibility: 320 | dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], []) 321 | dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], []) 322 | 323 | 324 | # _LT_WITH_AIX_SONAME([DEFAULT]) 325 | # ---------------------------------- 326 | # implement the --with-aix-soname flag, and support the `aix-soname=aix' 327 | # and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT 328 | # is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'. 329 | m4_define([_LT_WITH_AIX_SONAME], 330 | [m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl 331 | shared_archive_member_spec= 332 | case $host,$enable_shared in 333 | power*-*-aix[[5-9]]*,yes) 334 | AC_MSG_CHECKING([which variant of shared library versioning to provide]) 335 | AC_ARG_WITH([aix-soname], 336 | [AS_HELP_STRING([--with-aix-soname=aix|svr4|both], 337 | [shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])], 338 | [case $withval in 339 | aix|svr4|both) 340 | ;; 341 | *) 342 | AC_MSG_ERROR([Unknown argument to --with-aix-soname]) 343 | ;; 344 | esac 345 | lt_cv_with_aix_soname=$with_aix_soname], 346 | [AC_CACHE_VAL([lt_cv_with_aix_soname], 347 | [lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT) 348 | with_aix_soname=$lt_cv_with_aix_soname]) 349 | AC_MSG_RESULT([$with_aix_soname]) 350 | if test aix != "$with_aix_soname"; then 351 | # For the AIX way of multilib, we name the shared archive member 352 | # based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o', 353 | # and 'shr.imp' or 'shr_64.imp', respectively, for the Import File. 354 | # Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag, 355 | # the AIX toolchain works better with OBJECT_MODE set (default 32). 356 | if test 64 = "${OBJECT_MODE-32}"; then 357 | shared_archive_member_spec=shr_64 358 | else 359 | shared_archive_member_spec=shr 360 | fi 361 | fi 362 | ;; 363 | *) 364 | with_aix_soname=aix 365 | ;; 366 | esac 367 | 368 | _LT_DECL([], [shared_archive_member_spec], [0], 369 | [Shared archive member basename, for filename based shared library versioning on AIX])dnl 370 | ])# _LT_WITH_AIX_SONAME 371 | 372 | LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])]) 373 | LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])]) 374 | LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])]) 375 | 376 | 377 | # _LT_WITH_PIC([MODE]) 378 | # -------------------- 379 | # implement the --with-pic flag, and support the 'pic-only' and 'no-pic' 380 | # LT_INIT options. 381 | # MODE is either 'yes' or 'no'. If omitted, it defaults to 'both'. 382 | m4_define([_LT_WITH_PIC], 383 | [AC_ARG_WITH([pic], 384 | [AS_HELP_STRING([--with-pic@<:@=PKGS@:>@], 385 | [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], 386 | [lt_p=${PACKAGE-default} 387 | case $withval in 388 | yes|no) pic_mode=$withval ;; 389 | *) 390 | pic_mode=default 391 | # Look at the argument we got. We use all the common list separators. 392 | lt_save_ifs=$IFS; IFS=$IFS$PATH_SEPARATOR, 393 | for lt_pkg in $withval; do 394 | IFS=$lt_save_ifs 395 | if test "X$lt_pkg" = "X$lt_p"; then 396 | pic_mode=yes 397 | fi 398 | done 399 | IFS=$lt_save_ifs 400 | ;; 401 | esac], 402 | [pic_mode=m4_default([$1], [default])]) 403 | 404 | _LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl 405 | ])# _LT_WITH_PIC 406 | 407 | LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])]) 408 | LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])]) 409 | 410 | # Old name: 411 | AU_DEFUN([AC_LIBTOOL_PICMODE], 412 | [_LT_SET_OPTION([LT_INIT], [pic-only]) 413 | AC_DIAGNOSE([obsolete], 414 | [$0: Remove this warning and the call to _LT_SET_OPTION when you 415 | put the 'pic-only' option into LT_INIT's first parameter.]) 416 | ]) 417 | 418 | dnl aclocal-1.4 backwards compatibility: 419 | dnl AC_DEFUN([AC_LIBTOOL_PICMODE], []) 420 | 421 | ## ----------------- ## 422 | ## LTDL_INIT Options ## 423 | ## ----------------- ## 424 | 425 | m4_define([_LTDL_MODE], []) 426 | LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive], 427 | [m4_define([_LTDL_MODE], [nonrecursive])]) 428 | LT_OPTION_DEFINE([LTDL_INIT], [recursive], 429 | [m4_define([_LTDL_MODE], [recursive])]) 430 | LT_OPTION_DEFINE([LTDL_INIT], [subproject], 431 | [m4_define([_LTDL_MODE], [subproject])]) 432 | 433 | m4_define([_LTDL_TYPE], []) 434 | LT_OPTION_DEFINE([LTDL_INIT], [installable], 435 | [m4_define([_LTDL_TYPE], [installable])]) 436 | LT_OPTION_DEFINE([LTDL_INIT], [convenience], 437 | [m4_define([_LTDL_TYPE], [convenience])]) 438 | -------------------------------------------------------------------------------- /m4/ltsugar.m4: -------------------------------------------------------------------------------- 1 | # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software 4 | # Foundation, Inc. 5 | # Written by Gary V. Vaughan, 2004 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 6 ltsugar.m4 12 | 13 | # This is to help aclocal find these macros, as it can't see m4_define. 14 | AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) 15 | 16 | 17 | # lt_join(SEP, ARG1, [ARG2...]) 18 | # ----------------------------- 19 | # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their 20 | # associated separator. 21 | # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier 22 | # versions in m4sugar had bugs. 23 | m4_define([lt_join], 24 | [m4_if([$#], [1], [], 25 | [$#], [2], [[$2]], 26 | [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) 27 | m4_define([_lt_join], 28 | [m4_if([$#$2], [2], [], 29 | [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) 30 | 31 | 32 | # lt_car(LIST) 33 | # lt_cdr(LIST) 34 | # ------------ 35 | # Manipulate m4 lists. 36 | # These macros are necessary as long as will still need to support 37 | # Autoconf-2.59, which quotes differently. 38 | m4_define([lt_car], [[$1]]) 39 | m4_define([lt_cdr], 40 | [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], 41 | [$#], 1, [], 42 | [m4_dquote(m4_shift($@))])]) 43 | m4_define([lt_unquote], $1) 44 | 45 | 46 | # lt_append(MACRO-NAME, STRING, [SEPARATOR]) 47 | # ------------------------------------------ 48 | # Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. 49 | # Note that neither SEPARATOR nor STRING are expanded; they are appended 50 | # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). 51 | # No SEPARATOR is output if MACRO-NAME was previously undefined (different 52 | # than defined and empty). 53 | # 54 | # This macro is needed until we can rely on Autoconf 2.62, since earlier 55 | # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. 56 | m4_define([lt_append], 57 | [m4_define([$1], 58 | m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) 59 | 60 | 61 | 62 | # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) 63 | # ---------------------------------------------------------- 64 | # Produce a SEP delimited list of all paired combinations of elements of 65 | # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list 66 | # has the form PREFIXmINFIXSUFFIXn. 67 | # Needed until we can rely on m4_combine added in Autoconf 2.62. 68 | m4_define([lt_combine], 69 | [m4_if(m4_eval([$# > 3]), [1], 70 | [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl 71 | [[m4_foreach([_Lt_prefix], [$2], 72 | [m4_foreach([_Lt_suffix], 73 | ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, 74 | [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) 75 | 76 | 77 | # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) 78 | # ----------------------------------------------------------------------- 79 | # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited 80 | # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. 81 | m4_define([lt_if_append_uniq], 82 | [m4_ifdef([$1], 83 | [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], 84 | [lt_append([$1], [$2], [$3])$4], 85 | [$5])], 86 | [lt_append([$1], [$2], [$3])$4])]) 87 | 88 | 89 | # lt_dict_add(DICT, KEY, VALUE) 90 | # ----------------------------- 91 | m4_define([lt_dict_add], 92 | [m4_define([$1($2)], [$3])]) 93 | 94 | 95 | # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) 96 | # -------------------------------------------- 97 | m4_define([lt_dict_add_subkey], 98 | [m4_define([$1($2:$3)], [$4])]) 99 | 100 | 101 | # lt_dict_fetch(DICT, KEY, [SUBKEY]) 102 | # ---------------------------------- 103 | m4_define([lt_dict_fetch], 104 | [m4_ifval([$3], 105 | m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), 106 | m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) 107 | 108 | 109 | # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) 110 | # ----------------------------------------------------------------- 111 | m4_define([lt_if_dict_fetch], 112 | [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], 113 | [$5], 114 | [$6])]) 115 | 116 | 117 | # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) 118 | # -------------------------------------------------------------- 119 | m4_define([lt_dict_filter], 120 | [m4_if([$5], [], [], 121 | [lt_join(m4_quote(m4_default([$4], [[, ]])), 122 | lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), 123 | [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl 124 | ]) 125 | -------------------------------------------------------------------------------- /m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 4179 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.6]) 16 | m4_define([LT_PACKAGE_REVISION], [2.4.6]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.6' 20 | macro_revision='2.4.6' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /m4/lt~obsolete.m4: -------------------------------------------------------------------------------- 1 | # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software 4 | # Foundation, Inc. 5 | # Written by Scott James Remnant, 2004. 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 5 lt~obsolete.m4 12 | 13 | # These exist entirely to fool aclocal when bootstrapping libtool. 14 | # 15 | # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), 16 | # which have later been changed to m4_define as they aren't part of the 17 | # exported API, or moved to Autoconf or Automake where they belong. 18 | # 19 | # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN 20 | # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us 21 | # using a macro with the same name in our local m4/libtool.m4 it'll 22 | # pull the old libtool.m4 in (it doesn't see our shiny new m4_define 23 | # and doesn't know about Autoconf macros at all.) 24 | # 25 | # So we provide this file, which has a silly filename so it's always 26 | # included after everything else. This provides aclocal with the 27 | # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything 28 | # because those macros already exist, or will be overwritten later. 29 | # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 30 | # 31 | # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. 32 | # Yes, that means every name once taken will need to remain here until 33 | # we give up compatibility with versions before 1.7, at which point 34 | # we need to keep only those names which we still refer to. 35 | 36 | # This is to help aclocal find these macros, as it can't see m4_define. 37 | AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) 38 | 39 | m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) 40 | m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) 41 | m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) 42 | m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) 43 | m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) 44 | m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) 45 | m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) 46 | m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) 47 | m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) 48 | m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) 49 | m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) 50 | m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) 51 | m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) 52 | m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) 53 | m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) 54 | m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) 55 | m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) 56 | m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) 57 | m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) 58 | m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) 59 | m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) 60 | m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) 61 | m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) 62 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) 63 | m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) 64 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) 65 | m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) 66 | m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) 67 | m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) 68 | m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) 69 | m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) 70 | m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) 71 | m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) 72 | m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) 73 | m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) 74 | m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) 75 | m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) 76 | m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) 77 | m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) 78 | m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) 79 | m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) 80 | m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) 81 | m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) 82 | m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) 83 | m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) 84 | m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) 85 | m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) 86 | m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) 87 | m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) 88 | m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) 89 | m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) 90 | m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) 91 | m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) 92 | m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) 93 | m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) 94 | m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) 95 | m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) 96 | m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) 97 | m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) 98 | m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) 99 | m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) 100 | -------------------------------------------------------------------------------- /tools/configure_hugepages.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | function configure_linux { 6 | if mount | grep -qv hugetlbfs; then 7 | mkdir -p /mnt/huge 8 | mount -t hugetlbfs nodev /mnt/huge 9 | fi 10 | 11 | echo $NRHUGE > /proc/sys/vm/nr_hugepages 12 | } 13 | 14 | if [ "$1" = "" ]; then 15 | NRHUGE=128 16 | else 17 | NRHUGE="$1" 18 | fi 19 | 20 | configure_linux 21 | 22 | -------------------------------------------------------------------------------- /tools/info/Makemodule.am: -------------------------------------------------------------------------------- 1 | bin_PROGRAMS += nvme_info 2 | nvme_info_SOURCES = tools/info/nvme_info.c 3 | 4 | nvme_info_LDADD = $(libnvme_ldadd) 5 | nvme_info_LDADD += -lrt -lpthread -lpciaccess -lnvme 6 | -------------------------------------------------------------------------------- /tools/info/nvme_info.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 3 | * 4 | * SPDX-License-Identifier: BSD-2-Clause 5 | * Please see COPYING file for license text. 6 | */ 7 | 8 | #define _GNU_SOURCE 9 | 10 | #include 11 | #include 12 | 13 | #include "libnvme/nvme.h" 14 | 15 | static unsigned long long nvme_info_strsize(unsigned long long val, 16 | char *unit) 17 | { 18 | unsigned long long uval = val; 19 | 20 | if (uval < 1024) { 21 | strcpy(unit, ""); 22 | return uval; 23 | } 24 | 25 | uval /= 1024; 26 | if (uval < 1024) { 27 | strcpy(unit, "Ki"); 28 | return uval; 29 | } 30 | 31 | uval /= 1024; 32 | if (uval < 1024) { 33 | strcpy(unit, "Mi"); 34 | return uval; 35 | } 36 | 37 | uval /= 1024; 38 | strcpy(unit, "Gi"); 39 | 40 | return uval; 41 | } 42 | 43 | static int nvme_info_ctrlr(struct nvme_ctrlr *ctrlr, 44 | struct nvme_ctrlr_stat *cstat) 45 | { 46 | struct nvme_register_data rdata; 47 | unsigned long long uval; 48 | char unit[16]; 49 | 50 | /* Get information */ 51 | if (nvme_ctrlr_stat(ctrlr, cstat) != 0) { 52 | fprintf(stderr, "Get controller info failed\n"); 53 | return -1; 54 | } 55 | 56 | if (nvme_ctrlr_data(ctrlr, NULL, &rdata) != 0) { 57 | fprintf(stderr, "Get controller HW data failed\n"); 58 | return -1; 59 | } 60 | 61 | printf(" Model name: %s\n", cstat->mn); 62 | printf(" Serial number: %s\n", cstat->sn); 63 | printf(" HW maximum queue entries: %u\n", rdata.mqes + 1); 64 | printf(" Maximum queue depth: %u\n", cstat->max_qd); 65 | 66 | uval = nvme_info_strsize(cstat->max_xfer_size, unit); 67 | printf(" Maximum request size: %llu %sB\n", uval, unit); 68 | 69 | return 0; 70 | } 71 | 72 | static int nvme_info_ns(struct nvme_ctrlr *ctrlr, 73 | struct nvme_ctrlr_stat *cstat) 74 | { 75 | struct nvme_ns_stat nsstat; 76 | struct nvme_ns *ns; 77 | unsigned long long uval; 78 | char unit[16]; 79 | unsigned int i; 80 | 81 | printf("%u namespaces:\n", cstat->nr_ns); 82 | 83 | for (i = 0; i < cstat->nr_ns; i++) { 84 | 85 | ns = nvme_ns_open(ctrlr, cstat->ns_ids[i]); 86 | if (!ns) { 87 | fprintf(stderr, "Open namespace %u failed\n", 88 | cstat->ns_ids[i]); 89 | return -1; 90 | } 91 | 92 | if (nvme_ns_stat(ns, &nsstat) != 0) { 93 | fprintf(stderr, "Get namespace %u info failed\n", 94 | cstat->ns_ids[i]); 95 | nvme_ns_close(ns); 96 | return -1; 97 | } 98 | 99 | uval = nvme_info_strsize(nsstat.sector_size * nsstat.sectors, 100 | unit); 101 | printf(" Namespace %u/%u: %lu bytes sectors, %lu sectors (%llu %sB)\n", 102 | nsstat.id, cstat->nr_ns, 103 | nsstat.sector_size, nsstat.sectors, 104 | uval, unit); 105 | 106 | nvme_ns_close(ns); 107 | 108 | } 109 | 110 | return 0; 111 | } 112 | 113 | static int nvme_info_qpair(struct nvme_ctrlr *ctrlr, 114 | struct nvme_ctrlr_stat *cstat) 115 | { 116 | struct nvme_qpair_stat *qpstat; 117 | struct nvme_qpair **qp; 118 | unsigned int i; 119 | int ret; 120 | 121 | printf("%u I/O queue pairs:\n", cstat->max_io_qpairs); 122 | 123 | qpstat = calloc(cstat->max_io_qpairs, sizeof(struct nvme_qpair_stat)); 124 | qp = calloc(cstat->max_io_qpairs, sizeof(struct nvme_qpair *)); 125 | if (!qpstat || !qp) { 126 | fprintf(stderr, "No memory for I/O qpairs info\n"); 127 | return -1; 128 | } 129 | 130 | for (i = 0; i < cstat->max_io_qpairs; i++) { 131 | 132 | qp[i] = nvme_ioqp_get(ctrlr, 0, 0); 133 | if (!qp[i]) { 134 | fprintf(stderr, "Get I/O qpair %d failed\n", i); 135 | break; 136 | } 137 | 138 | ret = nvme_qpair_stat(qp[i], &qpstat[i]); 139 | if (ret) { 140 | fprintf(stderr, 141 | "Get I/O qpair %d information failed\n", i); 142 | break; 143 | } 144 | 145 | printf(" qpair %u/%u: ID %u, max qd %u, prio %u\n", 146 | i + 1, cstat->max_io_qpairs, 147 | qpstat[i].id, qpstat[i].qd, qpstat[i].qprio); 148 | 149 | } 150 | 151 | for (i = 0; i < cstat->max_io_qpairs; i++) 152 | if (qp[i]) 153 | nvme_ioqp_release(qp[i]); 154 | 155 | free(qp); 156 | free(qpstat); 157 | 158 | return 0; 159 | } 160 | 161 | int main(int argc, char **argv) 162 | { 163 | struct nvme_ctrlr *ctrlr; 164 | struct nvme_ctrlr_stat cstat; 165 | int log_level = -1; 166 | char *dev; 167 | int i, ret; 168 | 169 | if (argc < 2) { 170 | printf("Usage: %s [options] \n" 171 | "Options:\n" 172 | " -v : verbose mode (debug log levelNVME_LOG_NOTICE)\n", 173 | argv[0]); 174 | exit(1); 175 | } 176 | 177 | for (i = 1; i < argc - 1; i++) { 178 | if (strcmp(argv[i], "-v") == 0) { 179 | log_level = NVME_LOG_DEBUG; 180 | } else { 181 | fprintf(stderr, 182 | "Unknown option \"%s\"\n", 183 | argv[i]); 184 | exit(1); 185 | } 186 | } 187 | 188 | dev = argv[i]; 189 | 190 | ret = nvme_lib_init(log_level, -1, NULL); 191 | if (ret != 0) { 192 | fprintf(stderr, 193 | "libnvme init failed %d (%s)\n", 194 | ret, strerror(-ret)); 195 | exit(1); 196 | } 197 | 198 | printf("Opening NVMe controller %s\n", dev); 199 | ctrlr = nvme_ctrlr_open(dev, NULL); 200 | if (!ctrlr) { 201 | fprintf(stderr, "Open NVMe controller %s failed\n", 202 | dev); 203 | return -1; 204 | } 205 | 206 | ret = nvme_info_ctrlr(ctrlr, &cstat); 207 | if (ret != 0) 208 | goto out; 209 | 210 | ret = nvme_info_ns(ctrlr, &cstat); 211 | if (ret != 0) 212 | goto out; 213 | 214 | ret = nvme_info_qpair(ctrlr, &cstat); 215 | 216 | out: 217 | nvme_ctrlr_close(ctrlr); 218 | 219 | return ret; 220 | } 221 | -------------------------------------------------------------------------------- /tools/perf/Makemodule.am: -------------------------------------------------------------------------------- 1 | bin_PROGRAMS += nvme_perf 2 | nvme_perf_SOURCES = tools/perf/nvme_perf.c 3 | 4 | nvme_perf_LDADD = $(libnvme_ldadd) 5 | nvme_perf_LDADD += -lrt -lpthread -lpciaccess -lnvme 6 | -------------------------------------------------------------------------------- /tools/perf/nvme_perf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 3 | * 4 | * SPDX-License-Identifier: BSD-2-Clause 5 | * Please see COPYING file for license text. 6 | */ 7 | 8 | #define _GNU_SOURCE 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "nvme_perf.h" 30 | 31 | static nvme_perf_t nt; 32 | 33 | /* 34 | * Get current time in nano seconds. 35 | */ 36 | static inline unsigned long long nvme_perf_time_nsec(void) 37 | { 38 | struct timespec ts; 39 | 40 | clock_gettime(CLOCK_REALTIME, &ts); 41 | 42 | return (unsigned long long) ts.tv_sec * 1000000000LL 43 | + (unsigned long long) ts.tv_nsec; 44 | } 45 | 46 | /* 47 | * Elapsed test time in seconds. 48 | */ 49 | static inline int nvme_perf_elapsed_secs(void) 50 | { 51 | return (nvme_perf_time_nsec() - nt.start) / 1000000000; 52 | } 53 | 54 | static void nvme_perf_usage(char *cmd) 55 | { 56 | 57 | printf("Usage: %s [options] \n" 58 | "Options:\n" 59 | " -h | --help : Print this message\n" 60 | " -l : Specify a log level between 0 and 8\n" 61 | " 0 = none (disable all messages)\n" 62 | " 1 = emergency (system is unusable)\n" 63 | " 2 = alert (action must be taken immediately)\n" 64 | " 3 = critical (critical conditions)\n" 65 | " 4 = error (error conditions)\n" 66 | " 5 = warning (warning conditions)\n" 67 | " 6 = notice (normal but significant condition) (default)\n" 68 | " 7 = info (informational messages)\n" 69 | " 8 = debug (debug-level messages */\n" 70 | " -t : Set the run time (default: 10 seconds)\n" 71 | " -cpu : Run on the specified CPU (default: 0)\n" 72 | " -ns : Access the specified namespace (default: 1)\n" 73 | " -rw : %% reads and (100 - ) %% writes\n" 74 | " -qd : Issue I/Os with queue depth of \n" 75 | " Default is 1, maximum depends on the device\n" 76 | " -rnd : Do random I/Os (default: sequential)\n", 77 | cmd); 78 | 79 | exit(1); 80 | } 81 | 82 | static void nvme_perf_get_params(int argc, char **argv) 83 | { 84 | int i; 85 | 86 | if (argc < 3) 87 | nvme_perf_usage(argv[0]); 88 | 89 | /* Initialize defaults */ 90 | memset(&nt, 0, sizeof(nvme_perf_t)); 91 | nt.log_level = -1; 92 | nt.cpu = 0; 93 | nt.run_secs = 10; 94 | nt.ns_id = 1; 95 | nt.qd = 1; 96 | nt.rw = 100; 97 | srand(getpid()); 98 | 99 | /* Parse options */ 100 | for (i = 1; i < argc - 1; i++) { 101 | 102 | if (strcmp(argv[i], "-h") == 0 || 103 | strcmp(argv[i], "--help") == 0) { 104 | 105 | nvme_perf_usage(argv[0]); 106 | 107 | } else if (strcmp(argv[i], "-l") == 0) { 108 | 109 | i++; 110 | if (i == (argc - 1)) 111 | nvme_perf_usage(argv[0]); 112 | 113 | nt.log_level = atoi(argv[i]); 114 | 115 | } else if (strcmp(argv[i], "-t") == 0) { 116 | 117 | i++; 118 | if (i == (argc - 1)) 119 | nvme_perf_usage(argv[0]); 120 | 121 | nt.run_secs = atoi(argv[i]); 122 | if (nt.run_secs <= 0) { 123 | fprintf(stderr, 124 | "Invalid run time %s\n", 125 | argv[i]); 126 | exit(1); 127 | } 128 | 129 | } else if (strcmp(argv[i], "-cpu") == 0) { 130 | 131 | i++; 132 | if (i == (argc - 1)) 133 | nvme_perf_usage(argv[0]); 134 | 135 | nt.cpu = atoi(argv[i]); 136 | if (nt.cpu < 0) { 137 | fprintf(stderr, 138 | "Invalid CPU number %s\n", 139 | argv[i]); 140 | exit(1); 141 | } 142 | 143 | } else if (strcmp(argv[i], "-ns") == 0) { 144 | 145 | i++; 146 | if (i == (argc - 1)) 147 | nvme_perf_usage(argv[0]); 148 | 149 | nt.ns_id = atoi(argv[i]); 150 | if (nt.ns_id <= 0) { 151 | fprintf(stderr, 152 | "Invalid namespace ID %s\n", 153 | argv[i]); 154 | exit(1); 155 | } 156 | 157 | } else if (strcmp(argv[i], "-rw") == 0) { 158 | 159 | i++; 160 | if (i == (argc - 1)) 161 | nvme_perf_usage(argv[0]); 162 | 163 | nt.rw = atoi(argv[i]); 164 | if ((nt.rw < 0) || (nt.rw > 100)) { 165 | fprintf(stderr, 166 | "Invalid read percentage %s\n", 167 | argv[i]); 168 | exit(1); 169 | } 170 | 171 | } else if (strcmp(argv[i], "-qd") == 0) { 172 | 173 | i++; 174 | if (i == (argc - 1)) 175 | goto err; 176 | 177 | nt.qd = atoi(argv[i]); 178 | if (nt.qd <= 0) { 179 | fprintf(stderr, 180 | "Invalid queue depth %s\n", 181 | argv[i]); 182 | exit(1); 183 | } 184 | 185 | } else if (strcmp(argv[i], "-rnd") == 0) { 186 | 187 | nt.rnd = 1; 188 | 189 | } else if (argv[i][0] == '-') { 190 | 191 | fprintf(stderr, 192 | "Unknown option %s\n", 193 | argv[i]); 194 | exit(1); 195 | 196 | } else { 197 | 198 | break; 199 | 200 | } 201 | 202 | } 203 | 204 | /* There should be 2 arguments left ( ) */ 205 | if ((argc - 1) - i != 1) 206 | nvme_perf_usage(argv[0]); 207 | 208 | /* Get path */ 209 | nt.path = argv[argc - 2]; 210 | 211 | /* Get I/O size */ 212 | nt.io_size = atoi(argv[argc - 1]); 213 | if (nt.io_size <= 0) { 214 | fprintf(stderr, 215 | "Invalid I/O size %s\n", 216 | argv[argc - 1]); 217 | exit(1); 218 | } 219 | 220 | return; 221 | err: 222 | 223 | fprintf(stderr, "Invalid command line\n"); 224 | nvme_perf_usage(argv[0]); 225 | } 226 | 227 | static int nvme_perf_open_device(struct nvme_ctrlr_opts *opts) 228 | { 229 | struct nvme_ctrlr_stat cstat; 230 | struct nvme_ns_stat nsstat; 231 | 232 | /* Probe the NVMe controller */ 233 | printf("Opening NVMe controller %s\n", 234 | nt.path); 235 | 236 | nt.ctrlr = nvme_ctrlr_open(nt.path, opts); 237 | if (!nt.ctrlr) { 238 | fprintf(stderr, "Open NVMe controller %s failed\n", 239 | nt.path); 240 | return -1; 241 | } 242 | 243 | /* Get information */ 244 | if (nvme_ctrlr_stat(nt.ctrlr, &cstat) != 0) { 245 | fprintf(stderr, "Get NVMe controller %s info failed\n", 246 | nt.path); 247 | return -1;; 248 | } 249 | 250 | nt.slot.domain = cstat.domain; 251 | nt.slot.bus = cstat.bus; 252 | nt.slot.dev = cstat.dev; 253 | nt.slot.func = cstat.func; 254 | nt.nr_ns = cstat.nr_ns; 255 | nt.max_qd = cstat.max_qd; 256 | 257 | if (cstat.io_qpairs != opts->io_queues) 258 | printf("Number of IO qpairs limited to %u\n", 259 | cstat.io_qpairs); 260 | 261 | sprintf(nt.ctrlr_name, "%s (%s)", cstat.mn, 262 | cstat.sn); 263 | 264 | printf("Attached NVMe controller %s (%u namespace%s)\n", 265 | nt.ctrlr_name, nt.nr_ns, (nt.nr_ns> 1) ? "s" : ""); 266 | 267 | /* Open the name space */ 268 | nt.ns = nvme_ns_open(nt.ctrlr, nt.ns_id); 269 | if (!nt.ns) { 270 | printf("Open NVMe controller %04x:%02x:%02x.%1u " 271 | "name space %u failed\n", 272 | nt.slot.domain, 273 | nt.slot.bus, 274 | nt.slot.dev, 275 | nt.slot.func, 276 | nt.ns_id); 277 | return -1; 278 | } 279 | 280 | if (nvme_ns_stat(nt.ns, &nsstat) != 0) { 281 | fprintf(stderr, "Get name space %u info failed\n", 282 | nt.ns_id); 283 | return -1; 284 | } 285 | 286 | nt.sectsize = nsstat.sector_size; 287 | nt.nr_sectors = nsstat.sectors; 288 | 289 | return 0; 290 | } 291 | 292 | static void 293 | nvme_perf_sigcatcher(int sig) 294 | { 295 | nt.abort = sig; 296 | } 297 | 298 | static int 299 | nvme_perf_init(void) 300 | { 301 | nvme_perf_io_t *io; 302 | cpu_set_t cpu_mask; 303 | struct nvme_ctrlr_opts opts; 304 | struct nvme_qpair_stat qpstat; 305 | int i, ret; 306 | 307 | /* Setup signal handler */ 308 | signal(SIGQUIT, nvme_perf_sigcatcher); 309 | signal(SIGINT, nvme_perf_sigcatcher); 310 | signal(SIGTERM, nvme_perf_sigcatcher); 311 | 312 | /* Pin down the process on the target CPU */ 313 | CPU_ZERO(&cpu_mask); 314 | CPU_SET(nt.cpu, &cpu_mask); 315 | ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), 316 | &cpu_mask); 317 | if (ret ) { 318 | fprintf(stderr, 319 | "pthread_setaffinity_np failed %d (%s)\n", 320 | ret, 321 | strerror(ret)); 322 | return -1; 323 | } 324 | sched_yield(); 325 | 326 | /* Initialize libnvme */ 327 | ret = nvme_lib_init(nt.log_level, -1, NULL); 328 | if (ret != 0) { 329 | fprintf(stderr, 330 | "libnvme init failed %d (%s)\n", 331 | ret, strerror(-ret)); 332 | exit(1); 333 | } 334 | 335 | /* Initialize the controller options */ 336 | memset(&opts, 0, sizeof(struct nvme_ctrlr_opts)); 337 | opts.io_queues = 1; 338 | 339 | /* Grab the device */ 340 | ret = nvme_perf_open_device(&opts); 341 | if (ret) 342 | return -1; 343 | 344 | if (nt.io_size % nt.sectsize) { 345 | fprintf(stderr, 346 | "Invalid I/O size %zu B: must be a multiple " 347 | "of the sector size %zu B\n", 348 | nt.io_size, 349 | nt.sectsize); 350 | return -1; 351 | } 352 | 353 | if (nt.max_qd < (unsigned int)nt.qd) { 354 | fprintf(stderr, 355 | "Queue depth has to be less than the maximum queue " 356 | "entries authorized (%u)\n", 357 | nt.max_qd); 358 | return -1; 359 | } 360 | 361 | /* Get an I/O queue pair */ 362 | nt.qpair = nvme_ioqp_get(nt.ctrlr, 0, 0); 363 | if (!nt.qpair) { 364 | fprintf(stderr, "Allocate I/O qpair failed\n"); 365 | return -1; 366 | } 367 | 368 | ret = nvme_qpair_stat(nt.qpair, &qpstat); 369 | if (ret) { 370 | fprintf(stderr, "Get I/O qpair information failed\n"); 371 | return -1; 372 | } 373 | printf("Qpair %u, depth: %u\n", qpstat.id, qpstat.qd); 374 | 375 | /* Allocate I/Os */ 376 | nt.io = calloc(nt.qd, sizeof(nvme_perf_io_t)); 377 | if (!nt.io) { 378 | fprintf(stderr, "Allocate I/O array failed\n"); 379 | return -1; 380 | } 381 | 382 | /* Allocate I/O buffers */ 383 | for (i = 0; i < nt.qd; i++) { 384 | io = &nt.io[i]; 385 | io->size = nt.io_size / nt.sectsize; 386 | io->buf = nvme_zmalloc(nt.io_size, nt.sectsize); 387 | if (!io->buf) { 388 | fprintf(stderr, "io buffer allocation failed\n"); 389 | return -1; 390 | } 391 | nvme_perf_ioq_add(&nt.free_ioq, io); 392 | } 393 | 394 | return 0; 395 | } 396 | 397 | static void 398 | nvme_perf_end(void) 399 | { 400 | nvme_perf_io_t *io; 401 | int i; 402 | 403 | /* Close device file */ 404 | if (nt.ctrlr) { 405 | 406 | printf("Detaching NVMe controller %04x:%02x:%02x.%x\n", 407 | nt.slot.domain, 408 | nt.slot.bus, 409 | nt.slot.dev, 410 | nt.slot.func); 411 | 412 | if (nt.qpair) 413 | nvme_ioqp_release(nt.qpair); 414 | 415 | if (nt.ns) 416 | nvme_ns_close(nt.ns); 417 | 418 | nvme_ctrlr_close(nt.ctrlr); 419 | 420 | } 421 | 422 | if (nt.io) { 423 | for (i = 0; i < nt.qd; i++) { 424 | io = &nt.io[i]; 425 | if (io->buf) 426 | nvme_free(io->buf); 427 | } 428 | free(nt.io); 429 | } 430 | 431 | return; 432 | } 433 | 434 | static void 435 | nvme_perf_io_end(void *arg, 436 | const struct nvme_cpl *cpl) 437 | { 438 | nvme_perf_io_t *io = arg; 439 | 440 | nvme_perf_ioq_remove(&nt.pend_ioq, io); 441 | nvme_perf_ioq_add(&nt.free_ioq, io); 442 | 443 | nt.io_count++; 444 | nt.io_bytes += nt.io_size; 445 | } 446 | 447 | static int 448 | nvme_perf_set_io(nvme_perf_io_t *io) 449 | { 450 | unsigned long long ofst; 451 | int rw; 452 | 453 | /* Decide on read or write based on read/write ratio */ 454 | if ( nt.rw == 100 ) { 455 | rw = NVME_TEST_READ; 456 | } else if ( nt.rw == 0 ) { 457 | rw = NVME_TEST_WRITE; 458 | } else { 459 | rw = (int)((100UL * (unsigned long)rand()) 460 | / (unsigned long)RAND_MAX); 461 | if (rw <= nt.rw) 462 | rw = NVME_TEST_READ; 463 | else 464 | rw = NVME_TEST_WRITE; 465 | } 466 | 467 | /* Setup I/O offset */ 468 | if (nt.rnd) 469 | /* Random I/O offset */ 470 | ofst = (double)(nt.nr_sectors - io->size) * (double) rand() 471 | / (double) RAND_MAX; 472 | else { 473 | ofst = nt.io_ofst / nt.sectsize; 474 | nt.io_ofst += nt.io_size; 475 | if (nt.io_ofst >= nt.nr_sectors * nt.sectsize) 476 | nt.io_ofst = 0; 477 | } 478 | io->ofst = ofst; 479 | 480 | return rw; 481 | } 482 | 483 | static int 484 | nvme_perf_submit_io(void) 485 | { 486 | nvme_perf_io_t *io; 487 | ssize_t ret; 488 | int rw; 489 | 490 | /* Prepare I/Os */ 491 | while ((io = (nvme_perf_io_t *) nvme_perf_ioq_get(&nt.free_ioq)) && 492 | !nt.abort) { 493 | 494 | nvme_perf_ioq_add(&nt.pend_ioq, io); 495 | 496 | rw = nvme_perf_set_io(io); 497 | if (rw == NVME_TEST_READ) 498 | ret = nvme_ns_read(nt.ns, nt.qpair, 499 | io->buf, 500 | io->ofst, 501 | io->size, 502 | nvme_perf_io_end, io, 0); 503 | else 504 | ret = nvme_ns_write(nt.ns, nt.qpair, 505 | io->buf, 506 | io->ofst, 507 | io->size, 508 | nvme_perf_io_end, io, 0); 509 | 510 | if (ret) { 511 | fprintf(stderr, "Submit I/O failed\n"); 512 | nvme_perf_ioq_remove(&nt.pend_ioq, io); 513 | nvme_perf_ioq_add(&nt.free_ioq, io); 514 | nt.abort = 1; 515 | return -1; 516 | } 517 | 518 | } 519 | 520 | return 0; 521 | } 522 | 523 | /** 524 | * Run the test: do I/Os. 525 | */ 526 | static void 527 | nvme_perf_run(void) 528 | { 529 | 530 | /* Start */ 531 | nt.start = nvme_perf_time_nsec(); 532 | 533 | /* Run for requested time */ 534 | while(nvme_perf_elapsed_secs() < nt.run_secs && 535 | !nt.abort) { 536 | 537 | if (nvme_perf_submit_io() != 0) 538 | break; 539 | 540 | while (nvme_perf_ioq_empty(&nt.free_ioq)) 541 | nvme_ioqp_poll(nt.qpair, nt.qd); 542 | 543 | } 544 | 545 | /* Wait for remaining started I/Os */ 546 | while (!nvme_perf_ioq_empty(&nt.pend_ioq)) 547 | nvme_ioqp_poll(nt.qpair, nt.qd); 548 | 549 | /* Stop */ 550 | nt.end = nvme_perf_time_nsec(); 551 | } 552 | 553 | int main(int argc, char **argv) 554 | { 555 | unsigned long long elapsed; 556 | long long rate; 557 | double sz; 558 | char *unit; 559 | int ret; 560 | 561 | /* Parse command line */ 562 | nvme_perf_get_params(argc, argv); 563 | 564 | /* Initialize */ 565 | ret = nvme_perf_init(); 566 | if (ret) 567 | goto out; 568 | 569 | sz = (double)(nt.nr_sectors * nt.sectsize) / (1024 * 1024 * 1024); 570 | if (sz > 1) { 571 | unit = "Gi"; 572 | } else { 573 | unit = "Mi"; 574 | sz = (double)(nt.nr_sectors * nt.sectsize) / (1024 *1024); 575 | } 576 | 577 | printf("Device %04x:%02x:%02x.%x, namespace %d:\n" 578 | " %.03F %sB capacity (%llu sectors of %zu B)\n", 579 | nt.slot.domain, nt.slot.bus, nt.slot.dev, nt.slot.func, 580 | nt.ns_id, 581 | sz, unit, 582 | nt.nr_sectors, 583 | nt.sectsize); 584 | 585 | printf("Starting test on CPU %d for %d seconds:\n" 586 | " %d %% read I/O, %d %% write I/Os\n" 587 | " %zu B I/O size, %s access, qd %d\n", 588 | nt.cpu, nt.run_secs, 589 | nt.rw, 100 - nt.rw, 590 | nt.io_size, 591 | nt.rnd ? "random" : "sequential", 592 | nt.qd); 593 | 594 | /* Run test */ 595 | nvme_perf_run(); 596 | 597 | elapsed = nt.end - nt.start; 598 | if (elapsed && nt.io_count) { 599 | 600 | rate = nt.io_bytes * 1000000000 / elapsed; 601 | 602 | printf("-> %lld I/Os in %.03F secs\n" 603 | " %.03F MB/sec, %lld IOPS\n" 604 | " %.03F usecs average I/O latency\n", 605 | nt.io_count, 606 | (double)elapsed / 1000000000.0, 607 | (double)rate / 1000000.0, 608 | nt.io_count * 1000000000 / elapsed, 609 | ((double)elapsed / nt.io_count) / 1000.0); 610 | 611 | } 612 | 613 | out: 614 | nvme_perf_end(); 615 | 616 | return ret; 617 | } 618 | -------------------------------------------------------------------------------- /tools/perf/nvme_perf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2017, Western Digital Corporation or its affiliates. 3 | * 4 | * SPDX-License-Identifier: BSD-2-Clause 5 | * Please see COPYING file for license text. 6 | */ 7 | 8 | #include "libnvme/nvme.h" 9 | 10 | /* 11 | * I/O types. 12 | */ 13 | #define NVME_TEST_READ 0 14 | #define NVME_TEST_WRITE 1 15 | 16 | /* 17 | * I/O descriptor. 18 | */ 19 | typedef struct nvme_perf_io { 20 | 21 | /* For I/O queueing */ 22 | struct nvme_perf_io *next; 23 | struct nvme_perf_io *prev; 24 | 25 | /* I/O offset, size and buffer */ 26 | void *buf; 27 | long long ofst; 28 | size_t size; 29 | 30 | } nvme_perf_io_t; 31 | 32 | /* 33 | * I/O descriptor queue. 34 | */ 35 | typedef struct nvme_perf_ioq { 36 | nvme_perf_io_t *head; 37 | nvme_perf_io_t *tail; 38 | } nvme_perf_ioq_t; 39 | 40 | /* 41 | * Run parameters. 42 | */ 43 | typedef struct nvme_perf { 44 | 45 | /* 46 | * Log level to run at. 47 | */ 48 | int log_level; 49 | 50 | /* 51 | * Terminate on signal. 52 | */ 53 | int abort; 54 | 55 | /* 56 | * Device and I/O parameters. 57 | */ 58 | char *path; 59 | int cpu; 60 | int ns_id; 61 | int qd; 62 | int rw; 63 | int rnd; 64 | size_t io_size; 65 | int run_secs; 66 | int memstat; 67 | 68 | /* 69 | * Device data. 70 | */ 71 | struct pci_slot_match slot; 72 | char ctrlr_name[1024]; 73 | size_t sectsize; 74 | unsigned long long nr_sectors; 75 | unsigned int max_qd; 76 | unsigned int nr_ns; 77 | 78 | struct nvme_ctrlr *ctrlr; 79 | struct nvme_ns *ns; 80 | struct nvme_qpair *qpair; 81 | 82 | /* 83 | * I/O control. 84 | */ 85 | unsigned long long io_ofst; 86 | nvme_perf_io_t *io; 87 | nvme_perf_ioq_t free_ioq; 88 | nvme_perf_ioq_t pend_ioq; 89 | 90 | /* 91 | * I/O stats. 92 | */ 93 | unsigned long long start; 94 | unsigned long long end; 95 | unsigned long long io_count; 96 | unsigned long long io_bytes; 97 | 98 | } nvme_perf_t; 99 | 100 | /* 101 | * Test if an I/O queue is empty. 102 | */ 103 | static inline int nvme_perf_ioq_empty(nvme_perf_ioq_t *ioq) 104 | { 105 | return ioq->head == NULL; 106 | } 107 | 108 | /* 109 | * Add an I/Os at the end of a queue. 110 | */ 111 | static inline void nvme_perf_ioq_add(nvme_perf_ioq_t *ioq, 112 | nvme_perf_io_t *io) 113 | { 114 | 115 | io->next = NULL; 116 | if (ioq->head) { 117 | ioq->tail->next = io; 118 | io->prev = ioq->tail; 119 | } else { 120 | ioq->head = io; 121 | io->prev = NULL; 122 | } 123 | 124 | ioq->tail = io; 125 | 126 | return; 127 | } 128 | 129 | /* 130 | * Get the first I/Os in a queue. 131 | */ 132 | static inline nvme_perf_io_t *nvme_perf_ioq_get(nvme_perf_ioq_t *ioq) 133 | { 134 | nvme_perf_io_t *io; 135 | 136 | if (ioq->head) { 137 | 138 | io = ioq->head; 139 | ioq->head = io->next; 140 | if (ioq->head) 141 | ioq->head->prev = NULL; 142 | else 143 | ioq->tail = NULL; 144 | 145 | io->prev = NULL; 146 | io->next = NULL; 147 | 148 | } else 149 | io = NULL; 150 | 151 | return io; 152 | } 153 | 154 | /* 155 | * Remove an I/O from a queue. 156 | */ 157 | static inline void nvme_perf_ioq_remove(nvme_perf_ioq_t *ioq, 158 | nvme_perf_io_t *io) 159 | { 160 | nvme_perf_io_t *iop; 161 | 162 | if (ioq->head == io) { 163 | nvme_perf_ioq_get(ioq); 164 | } else if (ioq->tail == io) { 165 | ioq->tail = io->prev; 166 | ioq->tail->next = NULL; 167 | } else { 168 | iop = io->next; 169 | iop->prev = io->prev; 170 | io->prev->next = iop; 171 | } 172 | 173 | io->prev = NULL; 174 | io->next = NULL; 175 | } 176 | --------------------------------------------------------------------------------