├── CREDITS ├── HOWTO_Use_These_Tools ├── LICENSE ├── Makefile ├── README.md ├── TODO ├── doc ├── GPFS Multi-Cluster Routing HOWTO.pdf ├── GPFS Multi-Cluster Routing HOWTO.vsd ├── examples │ ├── 99-IBM-FlashSystem.udev.rules │ └── mmremotecluster-sample_execution_session.txt ├── half-baked_goodies │ ├── README.half-baked_goodies │ ├── ds35xx_run_luall │ └── ds35xx_run_luall.evaluate_output ├── multipath.conf-creator.EXAMPLE_Execution └── multipath.conf-creator.EXAMPLE_Output_File ├── etc ├── gpfs_goodies │ └── multipath.conf-creator_config_chunks │ │ ├── defaults │ │ ├── device-DDN,S2A_6620 │ │ ├── device-IBM,1726 │ │ ├── device-IBM,1746 │ │ ├── device-IBM,1813 │ │ ├── device-IBM,1814,1815 │ │ ├── device-IBM,1818 │ │ ├── device-IBM,2107900 │ │ ├── device-IBM,2145 │ │ ├── device-IBM,2851-DR1 │ │ ├── device-IBM,FlashSystem │ │ ├── device-IBM,dcs95xx,dcs99xx │ │ └── device-NETAPP,Tahoe ├── init.d │ └── opensmd.for_direct_connect_storage └── modprobe.d │ └── scsi_dh_alua.conf ├── gpfs_goodies.spec ├── sbin ├── brians_own_hot-add_script ├── gpfs_goodies ├── gpfs_stanzafile-creator ├── multipath.conf-creator ├── test_block_device_settings └── tune_block_device_settings └── var └── mmfs └── etc └── nsddevices /CREDITS: -------------------------------------------------------------------------------- 1 | N: Brian Elliott Finley 2 | E: bfinley@lenovo.com 3 | E: brian@thefinleys.com 4 | D: creator 5 | D: upstream maintainer 6 | 7 | N: Jarrod Johnson 8 | E: jjohnson2@lenovo.com 9 | D: 2nd generation multipath.conf-creator engine: getluns.pl 10 | 11 | N: Lerone Latouche 12 | E: llatouche@lenovo.com 13 | D: opensmd.for_direct_connect_storage patches/tweaks 14 | 15 | N: Christian Caruthers 16 | E: ccaruthers@lenovo.com 17 | D: patches & bug fixes 18 | 19 | N: Ray Paden 20 | E: rpaden@lenovo.com 21 | D: Critical testing, feedback, guidance, and tuning advice 22 | 23 | N: Robert de Rooy 24 | E: rderooy@lenovo.com 25 | D: GPFS Multi Cluster HOWTO improvements 26 | -------------------------------------------------------------------------------- /HOWTO_Use_These_Tools: -------------------------------------------------------------------------------- 1 | doc/HOWTO_Use_These_Tools -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GPFS Goodies 2 | ------------------------------------------------------------------------ 3 | GPFS Goodies is released under the Apache License, Version 2.0. 4 | 5 | Copyright 2014-2015 Brian E. Finley 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # 2013.10.11 Brian Finley 3 | # - improve version handling 4 | # 2013.11.07 Brian Finley 5 | # - include ./var/* 6 | # - better handling of version setting in included progs 7 | # - put user docs, etc all in one place 8 | # 9 | 10 | package := gpfs_goodies 11 | 12 | MAJOR_VER := $(shell git describe --tags | sed -e 's/^v//' -e 's/[.-].*//') 13 | MAJOR_VER ?= 0 14 | 15 | MINOR_VER := $(shell git describe --tags | sed -e 's/^v[0-9]*[.-]//' -e 's/[.-].*//') 16 | MINOR_VER ?= 0 17 | 18 | PATCH_VER := $(shell git describe --tags | sed -e 's/^v[0-9]*[.-][0-9]*[.-]//' -e 's/[.-].*//') 19 | PATCH_VER ?= 0 20 | 21 | AUTO_VER := $(shell git describe --tags | sed -e 's/^v[0-9]*[.-][0-9]*[.-][0-9]*[.-]//' -e 's/[.-].*//') 22 | AUTO_VER ?= 0 23 | 24 | # In case we have made commits (AUTO_VER) since updating a PATCH_VER in 25 | # the tag. -BEF- 26 | PATCH_VER := $(shell echo "$$(( $(PATCH_VER) + $(AUTO_VER) ))" ) 27 | 28 | VERSION := ${MAJOR_VER}.${MINOR_VER}.${PATCH_VER} 29 | 30 | TMPDIR := $(shell mktemp -d) 31 | SPECFILE := $(shell mktemp) 32 | PKG_DIR := ${package}-${VERSION} 33 | 34 | TOPDIR := $(CURDIR) 35 | rpmbuild = ~/rpmbuild 36 | 37 | TARBALL := $(TOPDIR)/tmp/${PKG_DIR}.tar.bz2 38 | 39 | .PHONY += all 40 | all: tarball 41 | 42 | .PHONY += install 43 | install: 44 | mkdir -p ${PREFIX}/usr/sbin/ 45 | install -m 755 sbin/* ${PREFIX}/usr/sbin/ 46 | 47 | mkdir -p ${PREFIX}/etc/gpfs_goodies/multipath.conf-creator_config_chunks/ 48 | install -m 755 etc/gpfs_goodies/multipath.conf-creator_config_chunks/* ${PREFIX}/etc/gpfs_goodies/multipath.conf-creator_config_chunks/ 49 | 50 | mkdir -p ${PREFIX}/usr/share/${package}/ 51 | rsync -av doc/* ${PREFIX}/usr/share/${package}/ 52 | find ${PREFIX}/usr/share/${package}/ -type d -exec chmod 755 '{}' \; 53 | find ${PREFIX}/usr/share/${package}/ -type f -exec chmod 644 '{}' \; 54 | 55 | mkdir -p ${PREFIX}/usr/share/${package}/etc/modprobe.d/ 56 | install -m 644 etc/modprobe.d/* ${PREFIX}/usr/share/${package}/etc/modprobe.d/ 57 | 58 | mkdir -p ${PREFIX}/usr/share/${package}/var/mmfs/etc/ 59 | install -m 644 var/mmfs/etc/* ${PREFIX}/usr/share/${package}/var/mmfs/etc/ 60 | 61 | mkdir -p ${PREFIX}/etc/init.d/ 62 | install -m 755 etc/init.d/* ${PREFIX}/etc/init.d/ 63 | 64 | mkdir -p ${PREFIX}/usr/share/doc/${PKG_DIR}/ 65 | echo "See the files in /usr/share/${package}/" > ${PREFIX}/usr/share/doc/${PKG_DIR}/README 66 | 67 | .PHONY += tarball 68 | tarball: 69 | 70 | mkdir -p $(TOPDIR)/tmp/ 71 | 72 | # 73 | # Make a copy of the repo 74 | git clone . ${TMPDIR}/${PKG_DIR} 75 | /bin/rm -fr ${TMPDIR}/${PKG_DIR}/.git 76 | # 77 | # Vizio files may be quite large, and can be found in the repo if 78 | # needed. -BEF- 79 | /bin/rm -fr ${TMPDIR}/${PKG_DIR}/doc/*.vsd 80 | 81 | # 82 | # Copy this file over (for testing only) 83 | /bin/cp Makefile ${TMPDIR}/${PKG_DIR}/ 84 | 85 | # 86 | # Create an RPM appropriate Makefile 87 | cp ${TMPDIR}/${PKG_DIR}/Makefile ${TMPDIR}/${PKG_DIR}/Makefile.rpm 88 | perl -pi -e "s/^MAJOR_VER\s+.*/MAJOR_VER := ${MAJOR_VER}/g" ${TMPDIR}/${PKG_DIR}/Makefile.rpm 89 | perl -pi -e "s/^MINOR_VER\s+.*/MINOR_VER := ${MINOR_VER}/g" ${TMPDIR}/${PKG_DIR}/Makefile.rpm 90 | perl -pi -e "s/^PATCH_VER\s+.*/PATCH_VER := ${PATCH_VER}/g" ${TMPDIR}/${PKG_DIR}/Makefile.rpm 91 | 92 | # 93 | # Version the Files 94 | perl -pi -e "s/__VERSION__/${VERSION}/g" ${TMPDIR}/${PKG_DIR}/${package}.spec 95 | perl -pi -e "s/^VERSION=.*/VERSION=${VERSION}/g" ${TMPDIR}/${PKG_DIR}/sbin/gpfs_goodies 96 | perl -pi -e "s/^VERSION=.*/VERSION=${VERSION}/g" ${TMPDIR}/${PKG_DIR}/sbin/brians_own_hot-add_script 97 | perl -pi -e "s/version_number = .*/version_number = '${VERSION}';/g" ${TMPDIR}/${PKG_DIR}/sbin/multipath.conf-creator 98 | perl -pi -e "s/version_number = .*/version_number = '${VERSION}';/g" ${TMPDIR}/${PKG_DIR}/sbin/tune_block_device_settings 99 | perl -pi -e "s/version_number = .*/version_number = '${VERSION}';/g" ${TMPDIR}/${PKG_DIR}/sbin/gpfs_stanzafile-creator 100 | 101 | # 102 | # Tar it up 103 | cd ${TMPDIR} && tar -cvjf ${TARBALL} ${PKG_DIR} 104 | 105 | .PHONY += rpms 106 | rpms: rpm 107 | 108 | .PHONY += rpm 109 | rpm: tarball 110 | rpmbuild -ta --sign ${TARBALL} 111 | /bin/cp -i ${rpmbuild}/RPMS/*/${package}-$(VERSION)-*.rpm $(TOPDIR)/tmp/ 112 | /bin/cp -i ${rpmbuild}/SRPMS/${package}-$(VERSION)-*.rpm $(TOPDIR)/tmp/ 113 | 114 | .PHONY: release 115 | release: 116 | @echo "Please try 'make test_release' or 'make stable_release'" 117 | 118 | .PHONY: test_release 119 | test_release: rpms 120 | @echo 121 | @echo "I'm about to upload the following files to:" 122 | @echo " ~/src/www.systemimager.org/testing/${package}/" 123 | @echo "-----------------------------------------------------------------------" 124 | @/bin/ls -1 $(TOPDIR)/tmp/${package}[-_]$(VERSION)*.* 125 | @echo 126 | @echo "Hit to continue..." 127 | @read i 128 | rsync -av --progress $(TOPDIR)/tmp/${package}[-_]$(VERSION)*.* ~/src/www.systemimager.org/testing/${package}/ 129 | @echo 130 | @echo "Now run: cd ~/src/www.systemimager.org/ && make upload" 131 | @echo 132 | 133 | .PHONY: stable_release 134 | stable_release: rpms 135 | @echo 136 | @echo "I'm about to upload the following files to:" 137 | @echo " ~/src/www.systemimager.org/stable/${package}/" 138 | @echo "-----------------------------------------------------------------------" 139 | @/bin/ls -1 $(TOPDIR)/tmp/${package}[-_]$(VERSION)*.* 140 | @echo 141 | @echo "Hit to continue..." 142 | @read i 143 | rsync -av --progress $(TOPDIR)/tmp/${package}[-_]$(VERSION)*.* ~/src/www.systemimager.org/stable/${package}/ 144 | @echo 145 | @echo "Now run: cd ~/src/www.systemimager.org/ && make upload" 146 | @echo 147 | 148 | .PHONY += help 149 | help: 150 | @echo "Targets include:" 151 | @echo " help" 152 | @echo " rpm" 153 | @echo " tarball" 154 | @echo " all" 155 | @echo " install" 156 | 157 | .PHONY: clean 158 | clean: 159 | rm -fr $(TOPDIR)/tmp/ 160 | 161 | # vi: set ts=4 noet ai tw=0: 162 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Please see the wiki: 2 | 3 | * [GPFS Goodies Wiki](https://github.com/finley/GPFS-Goodies/wiki) 4 | 5 | 6 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - rename package to gpfs-goodies 2 | (debian compliant) 3 | 4 | - for SMcli capable storage, use "SMcli -d -w" to capture the info, then 5 | use "SMcli -w $WWID" to invoke the commands. If multiple controllers 6 | with the same name, append a number to the end of them: 7 | 8 | [root@x36n01 ~]# SMcli -d -w 9 | DCS3860 600a09800056e8d9000000005375f2aa dcs3860.a dcs3860.b 172.16.0.19 172.16.0.18 10 | DCS3860 600a09800056e9180000000053ad6572 dcs3860.a 11 | Tahoe 600a09800056e8d90000000053ad64cd dcs3860.a dcs3860.b 12 | 13 | -> Names like so: 14 | 15 | dcs3860_1 -> 600a09800056e8d9000000005375f2aa 16 | dcs3860_2 -> 600a09800056e9180000000053ad6572 17 | 18 | - consider adding libnet-mac-vendor-perl to look up mac address for 19 | storage to map into method (SMCli, etc.) 20 | 21 | - add ping test to each --deploy server as part of the options 22 | validation at the beginning 23 | 24 | - add test for multipathd and multipath on each --deploy server as part 25 | of options validation at beginning 26 | 27 | - consider how name is derived from non-SMClient devices. Example: 28 | 29 | Trying 10.73.1.23... 30 | Connected to flashctl1-a. 31 | Escape character is '^]'. 32 | 33 | flashctl1-a login: admin 34 | Password: 35 | 36 | IBM - Intelligent Cluster 37 | FlashSystem-820 Management Interface 38 | 39 | Firmware Version: 6.3.1 40 | Storage Capacity: 9.37 TiB 41 | Management Controller: mc-1 42 | Eth0 IP Address: 10.73.1.23 43 | Hostname: flashctl1-a 44 | Chassis Serial: T-XSXS 45 | 46 | 47 | - Add bit to Makefile that creates GPFS_Goodies web page: 48 | 49 | echo "Section Header info" 50 | ./multipath.conf-creator --help >> webpage 51 | 52 | echo "Section Header info" 53 | ./tune --help >> webpage 54 | 55 | etc... 56 | 57 | 58 | 59 | - Do Videos: 60 | Part 1: Introduction to GPFS Goodies 61 | Part 2: Auto-configuring Linux Kernel Multipath 62 | Part 3: Hot Adding and Removing SCSI Subsystem Devices 63 | Part 4: Auto-tuning your Block Devices 64 | 65 | - Break out multipath.conf file chunks by device type 66 | 67 | /etc/gpfs_goodies/multipath.conf_source_files/ 68 | 69 | 70 | - Add a "--scheduler [cfq|noop|deadline|as]" option to 71 | tune_block_device_settings... 72 | 73 | 74 | - Comments from Scott Denham: 75 | 76 | In the longer run, I'd like to see it be more "Linux-like" in storing 77 | the specific parameter values in /etc/sysconfig/ or the 78 | like, and to be able to dynamically expand / contract which 79 | /sys/block// paramters are tuned, as opposed to storing the values 80 | directly in the scripts. This would allow updating without 81 | re-customizing, and puts the tunables in a logical place instead of 82 | buried inside /etc/udev/rules. 83 | 84 | I like your idea about probing the local Ethernet for candidates for 85 | management interfaces for storage devices. The possible concerns here 86 | would be time, should this be a system with a large, flat Layer2 87 | management net, or reachability should the storage devices actually be 88 | on a separate, routeable subnet from the GPFS system where gpfs_goodies 89 | is being run. Perhaps a simple old-xCAT style "tab" with IP:DEVICE TYPE 90 | as a complement to the discovery logic? 91 | 92 | 93 | - Incorporate "Net::SSH::Expect" into this code. It does not appear to 94 | be commonly available in either Debian or Red Hat based distros. 95 | 96 | http://search.cpan.org/~bnegrao/Net-SSH-Expect-1.09/lib/Net/SSH/Expect.pod 97 | 98 | - Which of these should we use in a multi-node GPFS environment? 99 | 100 | no_path_retry 15 101 | 102 | or 103 | 104 | no_path_retry fail 105 | 106 | 107 | - In the nsddevices file, is this statement true? 108 | 109 | # 110 | # dmm vs. generic is used by GPFS to prioritize internal order of 111 | # searching through available disks, then later GPFS discards other 112 | # disk device names that it finds that match as the same NSD device 113 | # by a different path. For this reason, dmm vs. generic is an 114 | # important distinction if you are not explicitly producing the 115 | # entire and exclusive set of disks that GPFS should use, as output 116 | # from this script (nsddevices) _and_ exiting this script with a 117 | # "return 0". -Brian Finley 118 | # 119 | 120 | -- 121 | 122 | - Add "HOWTO Convert from LSI RDAC to Linux Multipath with GPFS Goodies" 123 | 124 | - create a new tool 125 | 126 | - tune_mmrestripefs_bandwidth 127 | 128 | - This tool gives you a way to specify the rate at which your 129 | data migration operation proceeds. For example, you can use 130 | it to achieve a slow migration over an extended time frame 131 | to avoid impacting performance for production operations. 132 | 133 | When used without this tool, mmrestripefs will consume all the 134 | bandwidth it can to complete it's operation as fast as 135 | possible, limited only by the performance of the participating 136 | nodes' network connectivity and/or the storage server disk 137 | performance. This can result in noticably lower performance 138 | of the file system for users and/or other applications. 139 | 140 | - Uses iptables to impose a bandwidth limit on rate of re-stripe 141 | activity. 142 | 143 | - Could use maxMBpS instead of iptables... 144 | 145 | OPTIONS: 146 | -h 147 | -v 148 | --MBps NUMBER 149 | 150 | Limit the bandwidth to a maximum of this many megabytes 151 | per second. 152 | 153 | --Nodes NODE1[,NODE2,...] 154 | 155 | The list of nodes that will be used for this operation. 156 | 157 | If multiple nodes are included in this list, the 158 | bandwidth specified via the "--MBps" option will be 159 | divided equally among them, so that the aggregate 160 | bandwidth stays within that limit. 161 | 162 | --mmrestripefs-options "OPTION1[,OPTION2,...]" 163 | 164 | A list options, within quotation marks, that should be 165 | passed to the mmrestripefs command. The -N option can 166 | be excluded and will be ignored if it is included. 167 | 168 | Example: 169 | 170 | --mmrestripefs-options "fs1 -m" 171 | 172 | 173 | - prio_callout vs. prio 174 | 175 | - Add "man gpfs_goodies" 176 | 177 | - sort multipath.conf LUN entires same way as test_block_devices 178 | 179 | - use SMcli to determine alua vs. rdac 180 | - provide warning if not recommended setting by controller type 181 | 182 | - add option "--sm-password PASSWORD" 183 | 184 | Blacklist 185 | 186 | LSSCSI output for devices to blacklist: 187 | 188 | [0:2:0:0] disk IBM ServeRAID M5015 2.12 /dev/sda 189 | [4:0:0:0] cd/dvd HL-DT-ST DVDRAM GT30N IS09 /dev/sr0 190 | 191 | devnode "^usbsd*" 192 | 193 | device { 194 | vendor "ServeRA" 195 | product * 196 | } 197 | device { 198 | vendor "LSILOGIC" 199 | product "Logical Volume" 200 | } 201 | device { 202 | vendor "IBM-ESXS" 203 | product * 204 | } 205 | 206 | device { 207 | vendor "ServeRA" 208 | product "8k-l Mirror" 209 | } 210 | device { 211 | vendor "IBM" 212 | product "VirtualDisk" 213 | 214 | 215 | 216 | 217 | 218 | LSSCSI output for devices to handle: 219 | 220 | [3:0:0:0] disk IBM 1813 FAStT 0786 /dev/sdb 221 | [3:0:0:1] disk IBM 1813 FAStT 0786 /dev/sdc 222 | [3:0:0:2] disk IBM 1813 FAStT 0786 /dev/sdd 223 | 224 | 225 | 226 | 227 | FAQ 228 | 229 | Where do I need to run multipath.conf-creator? 230 | 231 | A. Any node that can use SMclient to connect to the storage 232 | subsystems. 233 | 234 | 235 | Where do I need to copy the multipath.conf files? 236 | 237 | A. All storage nodes. 238 | 239 | 240 | 241 | 242 | 243 | CONTRIBUTORS 244 | 245 | Ray Paden 246 | Scott Denham 247 | Jarrod Johnson 248 | 249 | 250 | -------------------------------------------------------------------------------- /doc/GPFS Multi-Cluster Routing HOWTO.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/finley/GPFS-Goodies/a337093c025d841a9cab7cd2c7b45ad8adb0662c/doc/GPFS Multi-Cluster Routing HOWTO.pdf -------------------------------------------------------------------------------- /doc/GPFS Multi-Cluster Routing HOWTO.vsd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/finley/GPFS-Goodies/a337093c025d841a9cab7cd2c7b45ad8adb0662c/doc/GPFS Multi-Cluster Routing HOWTO.vsd -------------------------------------------------------------------------------- /doc/examples/99-IBM-FlashSystem.udev.rules: -------------------------------------------------------------------------------- 1 | # --------------------------------------------------------------------------- 2 | # The following is excerpted from "Implementing IBM FlashSystem 840" Redbook. 3 | # --------------------------------------------------------------------------- 4 | # 5 | # Linux tuning 6 | # 7 | # The Linux kernel buffer file system writes data before it sends the data to the storage system. 8 | # With the FlashSystem 840, better performance can be achieved when the data is not buffered 9 | # but directly sent to the FlashSystem 840. When setting the scheduling policy to no operation 10 | # (NOOP), the fewest CPU instructions possible are used for each I/O. Setting the scheduler to 11 | # NOOP gives the best write performance on Linux systems. You can use the following setting 12 | # in most Linux distributions as a boot parameter: elevator=noop. 13 | # 14 | # Current Linux devices are managed by the device manager Udev. You can define how Udev 15 | # will manage devices by adding rules to the /etc/udev/rules.d directory. Example 5-24 16 | # shows a rule for the FlashSystem 840. 17 | # 18 | # Example 5-24 Linux device rules 19 | # 20 | # [root@flashnsd1 ~]# cat 99-IBM-FlashSystem.rules 21 | 22 | ACTION=="add|change", SUBSYSTEM=="block", ATTRS{device/model}=="FlashSystem", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0", ATTR{device/timeout}="5" 23 | ACTION=="add|change", SUBSYSTEM=="block", ATTRS{device/model}=="FlashSystem-9840", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0", ATTR{device/timeout}="10" 24 | 25 | ACTION=="add|change", KERNEL=="dm-*", PROGRAM="/bin/bash -c 'cat /sys/block/$name/slaves/*/device/model | grep FlashSystem'", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0" 26 | ACTION=="add|change", KERNEL=="dm-*", PROGRAM="/bin/bash -c 'cat /sys/block/$name/slaves/*/device/model | grep FlashSystem-9840'", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0" 27 | 28 | 29 | -------------------------------------------------------------------------------- /doc/examples/mmremotecluster-sample_execution_session.txt: -------------------------------------------------------------------------------- 1 | 2 | # *************************************************************************** 3 | Definitions: 4 | - cluster1 = cluster owning the FS (e.g., GSS.cluster) 5 | - cluster2 = remote cluster that mounts cluster1's FS (e.g., nxt1102.cluster) 6 | # *************************************************************************** 7 | 8 | # ---------------------------- 9 | # Do the following on cluster1 10 | # ---------------------------- 11 | 12 | [root@mgt2 ssl]# cd /var/mmfs/ssl 13 | 14 | [root@mgt2 ssl]# mmauth genkey new 15 | Generating RSA private key, 512 bit long modulus 16 | ............++++++++++++ 17 | ...++++++++++++ 18 | e is 65537 (0x10001) 19 | writing RSA key 20 | mmauth: Command successfully completed 21 | mmauth: Propagating the cluster configuration data to all affected nodes. 22 | 23 | ### Sanity Checks 24 | 25 | [root@mgt2 ssl]# ls -l 26 | total 20 27 | -rw------- 1 root root 497 Feb 20 20:11 id_rsa1 28 | lrwxrwxrwx 1 root root 7 Feb 20 20:11 id_rsa_committed -> id_rsa1 29 | -rw-r--r-- 1 root root 449 Feb 20 20:11 id_rsa_committed.cert 30 | -rw-r--r-- 1 root root 825 Feb 20 20:11 id_rsa_committed.pub 31 | lrwxrwxrwx 1 root root 34 Feb 20 20:11 id_rsa.pub -> /var/mmfs/ssl/id_rsa_committed.pub 32 | -rw-r--r-- 1 root root 183 Feb 20 20:11 openssl.conf 33 | drwx------ 2 root root 4096 Feb 20 20:11 stage 34 | [root@mgt2 ssl]# ssh gss6 'ls -l /var/mmfs/ssl' 35 | total 16 36 | -rw------- 1 root root 497 Feb 20 20:11 id_rsa1 37 | lrwxrwxrwx 1 root root 7 Feb 20 20:11 id_rsa_committed -> id_rsa1 38 | -rw-r--r-- 1 root root 449 Feb 20 20:11 id_rsa_committed.cert 39 | -rw-r--r-- 1 root root 825 Feb 20 20:11 id_rsa_committed.pub 40 | lrwxrwxrwx 1 root root 34 Feb 20 20:11 id_rsa.pub -> /var/mmfs/ssl/id_rsa_committed.pub 41 | drwx------ 2 root root 4096 Feb 20 20:11 stage 42 | 43 | ### Stop GPFS on all nodes in cluster1 44 | 45 | [root@mgt2 ssl]# mmauth update . -l AUTHONLY 46 | Verifying GPFS is stopped on all nodes ... 47 | mmauth: Command successfully completed 48 | mmauth: Propagating the cluster configuration data to all affected nodes. 49 | 50 | [root@mgt2 ssl]# scp id_rsa_committed.pub nxt1102:/var/mmfs/ssl/. 51 | id_rsa_committed.pub 100% 825 0.8KB/s 00:00 52 | [root@mgt2 ssl]# 53 | 54 | 55 | # ---------------------------- 56 | # Do the following on cluster2 57 | # ---------------------------- 58 | 59 | root@nxt1102 ssl]# cd /var/mmfs/ssl/ 60 | [root@nxt1102 ssl]# mv id_rsa_committed.pub GSS.cluster.id_rsa_committed.pub 61 | [root@nxt1102 ssl]# dir 62 | total 4 63 | -rw-r--r-- 1 root root 825 Feb 20 20:28 GSS.cluster.id_rsa_committed.pub 64 | drwx------ 2 root root 40 Feb 20 18:14 stage 65 | 66 | [root@nxt1102 ssl]# mmauth genkey new 67 | Generating RSA private key, 512 bit long modulus 68 | ....++++++++++++ 69 | ...........++++++++++++ 70 | e is 65537 (0x10001) 71 | writing RSA key 72 | mmauth: Command successfully completed 73 | mmauth: Propagating the cluster configuration data to all 74 | affected nodes. This is an asynchronous process. 75 | 76 | ### Sanity check 77 | 78 | [root@nxt1102 ssl]# dir 79 | total 20 80 | -rw-r--r-- 1 root root 825 Feb 20 20:28 GSS.cluster.id_rsa_committed.pub 81 | -rw------- 1 root root 493 Feb 20 20:33 id_rsa1 82 | lrwxrwxrwx 1 root root 7 Feb 20 20:33 id_rsa_committed -> id_rsa1 83 | -rw-r--r-- 1 root root 457 Feb 20 20:33 id_rsa_committed.cert 84 | -rw-r--r-- 1 root root 837 Feb 20 20:33 id_rsa_committed.pub 85 | lrwxrwxrwx 1 root root 34 Feb 20 20:33 id_rsa.pub -> /var/mmfs/ssl/id_rsa_committed.pub 86 | -rw-r--r-- 1 root root 187 Feb 20 20:33 openssl.conf 87 | drwx------ 2 root root 60 Feb 20 20:33 stage 88 | 89 | ### Stop GPFS on all nodes in cluster1 90 | 91 | [root@nxt1102 ssl]# mmauth update . -l AUTHONLY 92 | Verifying GPFS is stopped on all nodes ... 93 | mmauth: Command successfully completed 94 | mmauth: Propagating the cluster configuration data to all 95 | affected nodes. This is an asynchronous process. 96 | 97 | [root@nxt1102 ssl]# scp id_rsa_committed.pub mgt2-10g:/var/mmfs/ssl/nxt1102.cluster.id_rsa_committed.pub 98 | id_rsa_committed.pub 100% 837 0.8KB/s 00:00 99 | 100 | ### restart GPFS on cluster2 101 | 102 | # ---------------------------- 103 | # Do the following on cluster1 104 | # ---------------------------- 105 | 106 | ### Sanity check 107 | 108 | [root@mgt2 ssl]# dir 109 | total 28 110 | -rw-r--r-- 1 root root 370 Feb 20 20:16 authorized_keys 111 | -rw------- 1 root root 497 Feb 20 20:11 id_rsa1 112 | lrwxrwxrwx 1 root root 7 Feb 20 20:11 id_rsa_committed -> id_rsa1 113 | -rw-r--r-- 1 root root 449 Feb 20 20:11 id_rsa_committed.cert 114 | -rw-r--r-- 1 root root 825 Feb 20 20:11 id_rsa_committed.pub 115 | lrwxrwxrwx 1 root root 34 Feb 20 20:11 id_rsa.pub -> /var/mmfs/ssl/id_rsa_committed.pub 116 | -rw-r--r-- 1 root root 837 Feb 20 20:38 nxt1102.cluster.id_rsa_committed.pub 117 | -rw-r--r-- 1 root root 183 Feb 20 20:11 openssl.conf 118 | drwx------ 2 root root 4096 Feb 20 20:11 stage 119 | 120 | [root@mgt2 ssl]# mmauth add nxt1102.cluster -k nxt1102.cluster.id_rsa_committed.pub 121 | mmauth: Command successfully completed 122 | mmauth: Propagating the cluster configuration data to all affected nodes. 123 | 124 | ### restart GPFS on cluster1 125 | 126 | [root@mgt2 ssl]# mmmount all -a 127 | Thu Feb 20 20:47:25 CST 2014: mmmount: Mounting file systems ... 128 | [root@mgt2 ssl]# mmlsmount all 129 | File system fs12 is mounted on 7 nodes. 130 | File system fs34 is mounted on 7 nodes. 131 | File system fs56 is mounted on 7 nodes. 132 | 133 | [root@mgt2 ssl]# mmauth show all 134 | Cluster name: nxt1102.cluster 135 | Cipher list: AUTHONLY 136 | SHA digest: ee7db3f7ed142024d3901b20c7d9d3ee992ef61f 137 | File system access: (none authorized) 138 | 139 | Cluster name: GSS.cluster (this cluster) 140 | Cipher list: AUTHONLY 141 | SHA digest: 2ea65f84a39ac25dea10d6e9eac39469148087fb 142 | File system access: (all rw) 143 | 144 | [root@mgt2 ssl]# mmauth grant nxt1102.cluster -f all 145 | 146 | mmauth: Granting cluster nxt1102.cluster access to file system fs12: 147 | access type rw; root credentials will not be remapped. 148 | 149 | mmauth: Granting cluster nxt1102.cluster access to file system fs34: 150 | access type rw; root credentials will not be remapped. 151 | 152 | mmauth: Granting cluster nxt1102.cluster access to file system fs56: 153 | access type rw; root credentials will not be remapped. 154 | 155 | mmauth: Command successfully completed 156 | mmauth: Propagating the cluster configuration data to all affected nodes. 157 | [root@mgt2 ssl]# mmauth show all 158 | Cluster name: nxt1102.cluster 159 | Cipher list: AUTHONLY 160 | SHA digest: ee7db3f7ed142024d3901b20c7d9d3ee992ef61f 161 | File system access: fs12 (rw, root allowed) 162 | fs34 (rw, root allowed) 163 | fs56 (rw, root allowed) 164 | 165 | Cluster name: GSS.cluster (this cluster) 166 | Cipher list: AUTHONLY 167 | SHA digest: 2ea65f84a39ac25dea10d6e9eac39469148087fb 168 | File system access: (all rw) 169 | 170 | # ---------------------------- 171 | # Do the following on cluster2 172 | # ---------------------------- 173 | 174 | [root@nxt1102 ssl]# mmremotecluster add GSS.cluster -n gss1-10g,gss4-10g -k GSS.cluster.id_rsa_committed.pub 175 | mmremotecluster: Command successfully completed 176 | mmremotecluster: Propagating the cluster configuration data to all 177 | affected nodes. This is an asynchronous process. 178 | 179 | [root@nxt1102 ssl]# mmremotecluster show 180 | Cluster name: GSS.cluster 181 | Contact nodes: gss1-10g,gss4-10g 182 | SHA digest: 2ea65f84a39ac25dea10d6e9eac39469148087fb 183 | File systems: (none defined) 184 | 185 | [root@nxt1102 ssl]# mmremotefs add fs12 -f fs12 -C GSS.cluster -A no -T /gss/fs12 186 | mmremotefs: Propagating the cluster configuration data to all 187 | affected nodes. This is an asynchronous process. 188 | [root@nxt1102 ssl]# mmremotefs add fs34 -f fs34 -C GSS.cluster -A no -T /gss/fs34 189 | mmremotefs: Propagating the cluster configuration data to all 190 | affected nodes. This is an asynchronous process. 191 | [root@nxt1102 ssl]# mmremotefs add fs56 -f fs56 -C GSS.cluster -A no -T /gss/fs56 192 | mmremotefs: Propagating the cluster configuration data to all 193 | affected nodes. This is an asynchronous process. 194 | 195 | [root@nxt1102 ssl]# mmremotefs show 196 | Local Name Remote Name Cluster name Mount Point Mount Options Automount Drive Priority 197 | fs12 fs12 GSS.cluster /gss/fs12 rw no - 0 198 | fs34 fs34 GSS.cluster /gss/fs34 rw no - 0 199 | fs56 fs56 GSS.cluster /gss/fs56 rw no - 0 200 | 201 | [root@nxt1102 ssl]# mmremotecluster show 202 | Cluster name: GSS.cluster 203 | Contact nodes: gss1-10g,gss4-10g 204 | SHA digest: 2ea65f84a39ac25dea10d6e9eac39469148087fb 205 | File systems: fs12 (fs12) fs34 (fs34) fs56 (fs56) 206 | 207 | [root@nxt1102 ssl]# mmmount all -a 208 | Thu Feb 20 21:02:26 CST 2014: mmmount: Mounting file systems ... 209 | [root@nxt1102 ssl]# mmlsmount all 210 | File system fs12 (GSS.cluster:fs12) is mounted on 31 nodes. 211 | File system fs34 (GSS.cluster:fs34) is mounted on 31 nodes. 212 | File system fs56 (GSS.cluster:fs56) is mounted on 31 nodes. 213 | 214 | [root@nxt1102 ssl]# chmod 1777 /gss 215 | 216 | [root@nxt1102 ssl]# dir /gss/ 217 | total 192 218 | drwxrwxrwt 2 root root 32768 Feb 20 17:13 fs12 219 | drwxrwxrwt 2 root root 32768 Feb 20 17:50 fs34 220 | drwxrwxrwt 2 root root 32768 Feb 20 17:49 fs56 221 | 222 | [root@nxt1102 ssl]# dir /gss/fs* 223 | /gss/fs12: 224 | total 2097152 225 | -rw-r--r-- 1 root root 2147483648 Feb 20 17:13 buggs 226 | 227 | /gss/fs34: 228 | total 4194304 229 | -rw-r--r-- 1 root root 2147483648 Feb 20 17:13 buggs 230 | -rw-r--r-- 1 root root 2147483648 Feb 20 17:50 bunny 231 | 232 | /gss/fs56: 233 | total 4194304 234 | -rw-r--r-- 1 root root 2147483648 Feb 20 17:13 buggs 235 | -rw-r--r-- 1 root root 2147483648 Feb 20 17:49 bunny 236 | 237 | 238 | -------------------------------------------------------------------------------- /doc/half-baked_goodies/README.half-baked_goodies: -------------------------------------------------------------------------------- 1 | The tools in this directory may provide valuable functionality, but may 2 | not yet be ready to run in your environment without modification. 3 | 4 | In this case, access to half-baked goodies is considered better than 5 | access to no goodies. ;-) 6 | 7 | Use your own judgement before executing -- these are _not_ guaranteed to 8 | be "safe" in your environment. 9 | 10 | Have fun! -Brian 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /doc/half-baked_goodies/ds35xx_run_luall: -------------------------------------------------------------------------------- 1 | #!/usr/bin/expect -- 2 | 3 | # 4 | # Telnet to a raid controller on a ds35xx storage subsystem, and 5 | # retrieve the stats for individual disks via the "luall" (show lun 6 | # all) command. 7 | # 8 | 9 | # 10 | # 2013.07.30 Brian Elliott Finley 11 | # - created 12 | # 13 | 14 | 15 | set timeout -1 16 | set user "shellUsr" 17 | set pass "wy3oo&w4" 18 | 19 | # Get hostname from first arg on the command line 20 | set hostname [lrange $argv 0 0] 21 | 22 | spawn telnet $hostname 23 | expect "login" 24 | send "$user\r" 25 | expect "assword" 26 | send "$pass\r" 27 | expect ">" 28 | 29 | # Log this session to a file (default is to append) 30 | log_file ds35xx_run_luall.out 31 | 32 | send "luall 3\r" 33 | expect ">" 34 | 35 | send "exit\r" 36 | 37 | -------------------------------------------------------------------------------- /doc/half-baked_goodies/ds35xx_run_luall.evaluate_output: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | /root/bef_scripts/ds35xx_run_luall sc-cap3-b1 4 | 5 | f=ds35xx_run_luall.out 6 | 7 | echo 8 | head -n 3 $f 9 | 10 | ( 11 | for i in $(grep -B 9 'Associated array: 18' ~bfinley/SM_Data/storage-cap3.profile.2013.06.27-11.12 | grep Drive | awk '{print "t"$4 "s"$6}') 12 | do 13 | grep $i $f 14 | done 15 | ) | tail -n 13 16 | 17 | -------------------------------------------------------------------------------- /doc/multipath.conf-creator.EXAMPLE_Execution: -------------------------------------------------------------------------------- 1 | # 2 | # 2012.10.27 Brian Finley 3 | # - executed on a storage cluster's primary managment node 4 | # 5 | 6 | [root@glademgt1 ~]# ./multipath.conf-creator -a -o /tmp/multipath.conf.test_run 7 | Looks like we're dealing with 76 controllers. Let's get going: 8 | Querying dcs10 9 | Querying dcs11 10 | Querying dcs12 11 | Querying dcs13 12 | Querying dcs14 13 | Querying dcs15 14 | Querying dcs16 15 | Querying dcs17 16 | Querying dcs18 17 | Querying dcs19 18 | Querying dcs1 19 | Querying dcs20 20 | Querying dcs21 21 | Querying dcs22 22 | Querying dcs23 23 | Querying dcs24 24 | Querying dcs25 25 | Querying dcs26 26 | Querying dcs27 27 | Querying dcs28 28 | Querying dcs29 29 | Querying dcs2 30 | Querying dcs3 31 | Querying dcs4 32 | Querying dcs5 33 | Querying dcs6 34 | Querying dcs7 35 | Querying dcs8 36 | Querying dcs71 37 | Querying dcs51 38 | Querying dcs60 39 | Querying dcs67 40 | Querying dcs56 41 | Querying dcs76 42 | Querying dcs44 43 | Querying dcs73 44 | Querying dcs57 45 | Querying dcs30 46 | Querying dcs46 47 | Querying dcs37 48 | Querying dcs39 49 | Querying dcs55 50 | Querying dcs36 51 | Querying dcs38 52 | Querying dcs69 53 | Querying dcs41 54 | Querying dcs53 55 | Querying dcs52 56 | Querying dcs40 57 | Querying dcs66 58 | Querying dcs45 59 | Querying dcs54 60 | Querying dcs65 61 | Querying dcs33 62 | Querying dcs34 63 | Querying dcs42 64 | Querying dcs48 65 | Querying dcs32 66 | Querying dcs68 67 | Querying dcs64 68 | Querying dcs72 69 | Querying dcs9 70 | Querying dcs61 71 | Querying dcs31 72 | Querying dcs35 73 | Querying dcs43 74 | Querying dcs47 75 | Querying dcs49 76 | Querying dcs50 77 | Querying dcs58 78 | Querying dcs59 79 | Querying dcs62 80 | Querying dcs63 81 | Querying dcs74 82 | Querying dcs75 83 | Querying dcs70 84 | Done! You can find your fresh multipath.conf file here: 85 | 86 | /tmp/multipath.conf.test_run 87 | 88 | Your next step is to copy it out to /etc/multipath.conf on all of the 89 | storage servers connected to these storage controllers. Then execute 90 | the multipathd command to re-read the configuration file: 91 | 92 | # 93 | # WARNING! Make sure you have performed an "mmshutdown" on each NSD 94 | # server prior to copying out the new file and doing the reconfigure 95 | # below. If there are any differences between this config and your 96 | # prior config, taking that precaution will ensure you don't provoke 97 | # any corruption in any pre-existing GPFS filesystems. 98 | # 99 | # If you have NSD servers in building blocks as HA pairs, you should be 100 | # able to do this on one server at a time (mmshutdown, reconfigure, 101 | # mmstartup). But, you _must_ use your own judgement -- don't 102 | # blindly follow this part of these instructions. (You can blindly 103 | # follow the new file creation part. ;-) 104 | # 105 | 106 | multipathd -k'reconfigure' 107 | multipathd -k'reconfigure' 108 | 109 | # 110 | # Yes -- do it twice. With the first invocation, it's possible that 111 | # not all device mapper name changes will show up in the 112 | # /dev/mapper/ directory. 113 | # 114 | 115 | 116 | You can see the results with these commands (and others): 117 | 118 | ls /dev/mapper/ 119 | multipath -ll 120 | 121 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/defaults: -------------------------------------------------------------------------------- 1 | # 2 | # Default entries 3 | # 4 | defaults { 5 | udev_dir /dev 6 | polling_interval 5 7 | # checker_timeout 10 # uncomment for FlashStorage connected systems 8 | user_friendly_names yes 9 | log_checker_err once 10 | queue_without_daemon "no" 11 | max_fds 65535 12 | } 13 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-DDN,S2A_6620: -------------------------------------------------------------------------------- 1 | device { 2 | vendor "DDN" 3 | product "S2A 6620" 4 | getuid_callout "/sbin/scsi_id --page=0x83 --whitelisted --device=/dev/%n" 5 | path_grouping_policy group_by_prio 6 | prio alua 7 | no_path_retry 12 8 | #no_path_retry fail 9 | failback "10" 10 | } 11 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,1726: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # DS3200 => 1726-2xx 6 | # DS3400 => 1726-4xx 7 | # 8 | vendor "IBM" 9 | product "1726-[24]xx" 10 | failback immediate 11 | no_path_retry fail 12 | path_checker rdac 13 | path_grouping_policy group_by_prio 14 | prio rdac 15 | } 16 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,1746: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # DS3512 => 1746 6 | # DS3524 => 1746 7 | # 8 | vendor "IBM" 9 | product "1746" 10 | path_grouping_policy group_by_prio 11 | getuid_callout "/lib/udev/scsi_id -g -u -d /dev/%n" 12 | # 13 | # -g Treat the device as white listed. 14 | # -u Turn whitespace to underscores. 15 | # -d Instead of determining and creating a device node based on a 16 | # sysfs dev entry as done for the -s, send SG_IO commands to 17 | # device, such as /dev/sdc. NOTE: This is important when 18 | # using friendly multipath device names. -Brian 19 | # 20 | path_selector "round-robin 0" 21 | path_checker rdac 22 | features "2 pg_init_retries 50" 23 | hardware_handler "1 rdac" 24 | prio rdac 25 | failback immediate 26 | no_path_retry 15 27 | #no_path_retry fail 28 | rr_min_io 100 29 | rr_weight priorities 30 | } 31 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,1813: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # DCS3860 6 | # 7 | vendor "IBM" 8 | product "1813" 9 | path_grouping_policy group_by_prio 10 | getuid_callout "/lib/udev/scsi_id -g -u -d /dev/%n" 11 | # 12 | # -g Treat the device as white listed. 13 | # -u Turn whitespace to underscores. 14 | # -d Instead of determining and creating a device node based on a 15 | # sysfs dev entry as done for the -s, send SG_IO commands to 16 | # device, such as /dev/sdc. NOTE: This is important when 17 | # using friendly multipath device names. -Brian 18 | # 19 | path_selector "round-robin 0" 20 | path_checker rdac 21 | features "2 pg_init_retries 50" 22 | hardware_handler "1 rdac" 23 | prio rdac 24 | failback immediate 25 | no_path_retry 15 26 | #no_path_retry fail 27 | rr_min_io 100 28 | rr_weight priorities 29 | } 30 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,1814,1815: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # DS4700 => 1814 6 | # DS4800 => 1815 7 | # 8 | vendor "IBM" 9 | product "181[45]" 10 | failback immediate 11 | no_path_retry fail 12 | path_checker rdac 13 | path_grouping_policy group_by_prio 14 | prio rdac 15 | } 16 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,1818: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # DCS3700 => 1818 6 | # 7 | vendor "IBM" 8 | product "1818" 9 | path_grouping_policy group_by_prio 10 | getuid_callout "/lib/udev/scsi_id -g -u -d /dev/%n" 11 | # 12 | # -g Treat the device as white listed. 13 | # -u Turn whitespace to underscores. 14 | # -d Instead of determining and creating a device node based on a 15 | # sysfs dev entry as done for the -s, send SG_IO commands to 16 | # device, such as /dev/sdc. NOTE: This is important when 17 | # using friendly multipath device names. -Brian 18 | # 19 | path_selector "round-robin 0" 20 | path_checker rdac 21 | features "2 pg_init_retries 50" 22 | hardware_handler "1 rdac" 23 | prio rdac 24 | failback immediate 25 | no_path_retry 15 26 | #no_path_retry fail 27 | rr_min_io 100 28 | rr_weight priorities 29 | } 30 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,2107900: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # DS8000 6 | # 7 | vendor "IBM" 8 | product "2107900" 9 | path_grouping_policy group_by_serial 10 | flush_on_last_del "yes" 11 | features "0" 12 | no_path_retry 5 13 | #no_path_retry fail 14 | fast_io_fail_tmo 5 15 | dev_loss_tmo 120 16 | } 17 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,2145: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # SVC - IBM SAN Volume Controller 6 | # 7 | vendor "IBM" 8 | product "2145" 9 | path_grouping_policy group_by_prio 10 | prio alua 11 | features "0" 12 | no_path_retry 5 13 | #no_path_retry fail 14 | rr_min_io 1 15 | dev_loss_tmo 3600 16 | fast_io_fail_tmo 5 17 | flush_on_last_del "yes" 18 | } 19 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,2851-DR1: -------------------------------------------------------------------------------- 1 | device { 2 | vendor "IBM" 3 | product "2851-DR1" 4 | path_checker tur 5 | getuid_callout "/sbin/scsi_id --page=0x83 --whitelisted --device=/dev/%n" 6 | path_grouping_policy group_by_prio 7 | prio alua 8 | no_path_retry 12 9 | #no_path_retry fail 10 | failback "10" 11 | } 12 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,FlashSystem: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # FlashSystem 710,720,810,820 6 | # 7 | # 8 | # These settings have been taken from the following documents: 9 | # 10 | # 1 - "IBM FlashSystem Integration Guide, Revision 3, 3/2014" 11 | # 2 - "IBM FlashSystem 710, 720, 810, and 820 Service Pack 1 Release Notes" 12 | # 13 | vendor "IBM" # doc 1 14 | product "FlashSystem" # doc 1 15 | path_selector "round-robin 0" # doc 1 16 | path_grouping_policy group_by_prio # doc 2 17 | path_checker tur 18 | 19 | # 20 | # The rr_min_(io|io_rq) of 4 may not offer the best 21 | # performance on the FlashSystem 710/810, but is not harmful 22 | # to those systems. 23 | # 24 | # rr_min_io_rq 1 # doc 1, sles11 & rhel6, FlashSystem/710/810 25 | rr_min_io_rq 4 # doc 1, sles11 & rhel6, FlashSystem/820 26 | # rr_min_io 1 # doc 1, sles10 & rhel5, FlashSystem/710/810 27 | # rr_min_io 4 # doc 1, sles10 & rhel5, FlashSystem/820 28 | 29 | rr_weight uniform # doc 1 30 | no_path_retry fail # doc 1 31 | failback immediate # doc 1 32 | dev_loss_tmo 300 # doc 2 33 | fast_io_fail_tmo 25 # doc 2 34 | } 35 | 36 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-IBM,dcs95xx,dcs99xx: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # DCS9550 6 | # DCS9900 7 | # 8 | # These settings are taken from multipath.conf.ddn provided by 9 | # ddn_mpath_RHEL5_SLES10-1.5-5.x86_64.rpm 10 | # 11 | vendor "IBM" 12 | product "DCS9[59]*" 13 | getuid_callout "/sbin/scsi_id -u -g -p 0x80 -s /block/%n" 14 | prio "/sbin/mpath_prio_alua %d" 15 | path_grouping_policy group_by_prio 16 | path_checker tur 17 | failback immediate 18 | no_path_retry fail 19 | 20 | # 21 | # These settings augment those above, and are taken from 22 | # SONAS_multipath_1.4.0.1-5c.conf. 23 | # 24 | features "1 queue_if_no_path" 25 | rr_min_io 63 26 | rr_weight priorities 27 | } 28 | -------------------------------------------------------------------------------- /etc/gpfs_goodies/multipath.conf-creator_config_chunks/device-NETAPP,Tahoe: -------------------------------------------------------------------------------- 1 | device { 2 | # 3 | # This device section will apply to the following subsystems: 4 | # 5 | # Code name Tahoe 6 | # 7 | # NETAPP INF-01-00 6810 8 | # 9 | vendor "NETAPP" 10 | product "INF-01-00" 11 | path_grouping_policy group_by_prio 12 | getuid_callout "/lib/udev/scsi_id -g -u -d /dev/%n" 13 | # 14 | # -g Treat the device as white listed. 15 | # -u Turn whitespace to underscores. 16 | # -d Instead of determining and creating a device node based on a 17 | # sysfs dev entry as done for the -s, send SG_IO commands to 18 | # device, such as /dev/sdc. NOTE: This is important when 19 | # using friendly multipath device names. -Brian 20 | # 21 | path_selector "round-robin 0" 22 | path_checker rdac 23 | features "2 pg_init_retries 50" 24 | hardware_handler "1 rdac" 25 | prio rdac 26 | failback immediate 27 | no_path_retry 15 28 | #no_path_retry fail 29 | rr_min_io 100 30 | rr_weight priorities 31 | } 32 | -------------------------------------------------------------------------------- /etc/init.d/opensmd.for_direct_connect_storage: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # 2014.01.24 Brian Elliott Finley 5 | # - Modified to handle multiple instances on specific ports, say for 6 | # example use with a direct connect IB disk subsystem. Currently 7 | # hard coded to work with all ports on mlx4_1 and mlx4_2. 8 | # 2014.06.17 Lerone Latouche (w/minor tweaks by Brian) 9 | # - Add a sleep after start of each opensm instance 10 | # - Add comments on required srptools package 11 | # 12 | 13 | # 14 | # Bring up/down opensm 15 | # 16 | # chkconfig: - 15 85 17 | # description: Activates/Deactivates InfiniBand Subnet Manager 18 | # 19 | ### BEGIN INIT INFO 20 | # Provides: opensm 21 | # Required-Start: $syslog openibd 22 | # Required-Stop: $syslog openibd 23 | # Default-Start: null 24 | # Default-Stop: 0 1 6 25 | # Description: Manage OpenSM 26 | ### END INIT INFO 27 | # 28 | # Copyright (c) 2008 Voltaire, Inc. All rights reserved. 29 | # Copyright (c) 2006 Mellanox Technologies. All rights reserved. 30 | # 31 | # This Software is licensed under one of the following licenses: 32 | # 33 | # 1) under the terms of the "Common Public License 1.0" a copy of which is 34 | # available from the Open Source Initiative, see 35 | # http://www.opensource.org/licenses/cpl.php. 36 | # 37 | # 2) under the terms of the "The BSD License" a copy of which is 38 | # available from the Open Source Initiative, see 39 | # http://www.opensource.org/licenses/bsd-license.php. 40 | # 41 | # 3) under the terms of the "GNU General Public License (GPL) Version 2" a 42 | # copy of which is available from the Open Source Initiative, see 43 | # http://www.opensource.org/licenses/gpl-license.php. 44 | # 45 | # Licensee has the right to choose one of the above licenses. 46 | # 47 | # Redistributions of source code must retain the above copyright 48 | # notice and one of the license notices. 49 | # 50 | # Redistributions in binary form must reproduce both the above copyright 51 | # notice, one of the license notices in the documentation 52 | # and/or other materials provided with the distribution. 53 | # 54 | # 55 | # $Id: openib-1.0-opensm.init,v 1.5 2006/08/02 18:18:23 dledford Exp $ 56 | # 57 | # processname: /usr/sbin/opensm 58 | # config: /etc/sysconfig/opensm 59 | # pidfile: /var/run/opensm.pid 60 | 61 | prefix=/usr 62 | exec_prefix=/usr 63 | 64 | . /etc/rc.d/init.d/functions 65 | 66 | CONFIG=/etc/sysconfig/opensm 67 | if [ -f $CONFIG ]; then 68 | . $CONFIG 69 | fi 70 | 71 | DIRECT_CONNECT_STORAGE_PORTS=/etc/opensm/direct_connect_storage_ports.conf 72 | if [ ! -r "$DIRECT_CONNECT_STORAGE_PORTS" ]; then 73 | echo " 74 | $0: $DIRECT_CONNECT_STORAGE_PORTS doesn't exist. 75 | 76 | Please create it, and add a list of the ports that are directly connected to 77 | your storage. The format should look like the example below. 78 | 79 | # Lines that start with '#' will be treated as comments: 80 | mlx4_0/1 81 | mlx4_1/1 82 | mlx4_4/1 83 | mlx4_4/2 84 | etc. 85 | 86 | 87 | Also, you'll need to make sure that you have the srptools RPM installed. 88 | Just scrape-n-paste this command to verify: 89 | 90 | yum install srptools 91 | 92 | 93 | And you'll need to have the following options specified in 94 | /etc/infiniband/openib.conf: 95 | 96 | # Load SRP module 97 | SRP_LOAD=yes 98 | 99 | # Enable SRP High Availability daemon 100 | SRPHA_ENABLE=no 101 | SRP_DAEMON_ENABLE=yes 102 | 103 | The above settings were taken from section '5.4.4 FlashSystem 840 and Linux 104 | client hosts' of the 'Implementing IBM FlashSystem 840' Redbook, which can 105 | be found here: http://www.redbooks.ibm.com/redbooks/pdfs/sg248189.pdf 106 | 107 | " 108 | fi 109 | 110 | prog=/usr/sbin/opensm 111 | bin=${prog##*/} 112 | 113 | # Handover daemon for updating guid2lid cache file 114 | sldd_prog=/usr/sbin/sldd.sh 115 | sldd_bin=${sldd_prog##*/} 116 | sldd_pid_file=/var/run/sldd.pid 117 | 118 | ACTION=$1 119 | 120 | # 121 | # For direct connect storage, we set the PID_FILE per daemon instance 122 | # in the "case" section down below. -BEF- 123 | # 124 | # Setting OpenSM start parameters 125 | #PID_FILE=/var/run/${bin}.pid 126 | #touch $PID_FILE 127 | 128 | if [[ -n "${OSM_HOSTS}" && $(echo -n ${OSM_HOSTS} | wc -w | tr -d '[:space:]') -gt 1 ]]; then 129 | HONORE_GUID2LID="--honor_guid2lid" 130 | fi 131 | 132 | ######################################################################### 133 | 134 | get_guids() 135 | { 136 | for PORT in $(egrep -v '^(#|$)' "$DIRECT_CONNECT_STORAGE_PORTS" | grep mlx) 137 | do 138 | # 139 | # Make sure there's a space in the port (ie. mlx4_1/1 => "mlx4_1 1") 140 | # 141 | PORT=$( echo "$PORT" | tr '/' ' ' ) 142 | ibstat $PORT 2>/dev/null | grep 'Port GUID' | awk '{print $NF}' 143 | done 144 | } 145 | 146 | start_sldd() 147 | { 148 | if [ -f $sldd_pid_file ]; then 149 | local line p 150 | read line < $sldd_pid_file 151 | for p in $line ; do 152 | [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" 153 | done 154 | fi 155 | 156 | if [ -z "$sldd_pid" ]; then 157 | sldd_pid=`pidof -x $sldd_bin` 158 | fi 159 | 160 | if [ -n "${sldd_pid:-}" ] ; then 161 | kill -9 ${sldd_pid} > /dev/null 2>&1 162 | fi 163 | 164 | $sldd_prog > /dev/null 2>&1 & 165 | sldd_pid=$! 166 | 167 | echo ${sldd_pid} > $sldd_pid_file 168 | # Sleep is needed in order to update local gid2lid cache file before running opensm 169 | sleep 3 170 | } 171 | 172 | stop_sldd() 173 | { 174 | if [ -f $sldd_pid_file ]; then 175 | local line p 176 | read line < $sldd_pid_file 177 | for p in $line ; do 178 | [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && sldd_pid="$sldd_pid $p" 179 | done 180 | fi 181 | 182 | if [ -z "$sldd_pid" ]; then 183 | sldd_pid=`pidof -x $sldd_bin` 184 | fi 185 | 186 | if [ -n "${sldd_pid:-}" ] ; then 187 | kill -15 ${sldd_pid} > /dev/null 2>&1 188 | fi 189 | 190 | } 191 | 192 | start() 193 | { 194 | local OSM_PID= 195 | 196 | pid="" 197 | 198 | 199 | if [ -f $PID_FILE ]; then 200 | local line p 201 | read line < $PID_FILE 202 | for p in $line ; do 203 | [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid="$pid $p" 204 | done 205 | fi 206 | 207 | # if [ -z "$pid" ]; then 208 | # 209 | # pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` 210 | # fi 211 | 212 | if [ -n "${pid:-}" ] ; then 213 | echo $"${bin} (pid $pid) is already running..." 214 | 215 | else 216 | 217 | if [ -n "${HONORE_GUID2LID}" ]; then 218 | # Run sldd daemod 219 | start_sldd 220 | fi 221 | 222 | # Start opensm 223 | echo -n "Starting IB Subnet Manager for port $GUID." 224 | $prog --daemon ${HONORE_GUID2LID} ${OPTIONS} > /dev/null 225 | 226 | checkpid $(cat $PID_FILE) 227 | RC=$? 228 | [ $RC -eq 0 ] && echo_success || echo_failure 229 | [ $RC -eq 0 ] && touch /var/lock/subsys/opensm 230 | echo 231 | 232 | fi 233 | return $RC 234 | } 235 | 236 | stop() 237 | { 238 | local pid= 239 | local pid1= 240 | local pid2= 241 | 242 | # Stop sldd daemon 243 | stop_sldd 244 | 245 | # if [ -f $PID_FILE ]; then 246 | # local line p 247 | # read line < $PID_FILE 248 | # for p in $line ; do 249 | # [ -z "${p//[0-9]/}" -a -d "/proc/$p" ] && pid1="$pid1 $p" 250 | # done 251 | # fi 252 | # 253 | # pid2=`pidof -o $$ -o $PPID -o %PPID -x $bin` 254 | # 255 | # pid=`echo "$pid1 $pid2" | sed -e 's/\ /\n/g' | sort -n | uniq | sed -e 's/\n/\ /g'` 256 | # 257 | # if [ -n "${pid:-}" ] ; then 258 | # # Kill opensm 259 | # echo -n "Stopping IB Subnet Manager." 260 | # kill -15 $pid > /dev/null 2>&1 261 | # cnt=0; alive=1 262 | # while [ $cnt -lt 6 -a $alive -ne 0 ]; do 263 | # echo -n "."; 264 | # alive=0 265 | # for p in $pid; do 266 | # if checkpid $p ; then alive=1; echo -n "-"; fi 267 | # done 268 | # let cnt++; 269 | # sleep $alive 270 | # done 271 | 272 | if [ -f $PID_FILE ]; then 273 | echo -n "Stopping IB Subnet Manager." 274 | for p in $(cat $PID_FILE) 275 | do 276 | while checkpid $p ; do 277 | kill -KILL $p > /dev/null 2>&1 278 | echo -n "+" 279 | sleep 1 280 | done 281 | done 282 | checkpid $pid 283 | RC=$? 284 | [ $RC -eq 0 ] && echo_failure || echo_success 285 | echo 286 | RC=$((! $RC)) 287 | else 288 | echo -n "Stopping IB Subnet Manager for port $GUID." 289 | echo_failure 290 | echo 291 | RC=1 292 | fi 293 | 294 | # Remove pid file if any. 295 | rm -f $PID_FILE 296 | rm -f /var/lock/subsys/opensm 297 | return $RC 298 | } 299 | 300 | status() 301 | { 302 | local pid 303 | 304 | # First try "pidof" 305 | pid=`pidof -o $$ -o $PPID -o %PPID -x ${bin}` 306 | if [ -n "$pid" ]; then 307 | echo $"${bin} (pid $pid) is running..." 308 | return 0 309 | fi 310 | 311 | # Next try "/var/run/opensm.pid" files 312 | if [ -f $PID_FILE ] ; then 313 | read pid < $PID_FILE 314 | if [ -n "$pid" ]; then 315 | echo $"${bin} dead but pid file $PID_FILE exists" 316 | return 1 317 | fi 318 | fi 319 | echo $"${bin} is stopped" 320 | return 3 321 | } 322 | 323 | 324 | 325 | case $ACTION in 326 | start) 327 | for GUID in $(get_guids); do 328 | PID_FILE=/var/run/opensm.pid.$GUID 329 | touch $PID_FILE 330 | OPTIONS="--guid $GUID --pidfile $PID_FILE" 331 | start 332 | 333 | ################# llatouche @ lenovo.com ################ 334 | # Load the IB SRP kernel module and initiate/connect IB SRP storage devices 335 | sleep 5 336 | # srp_discover 337 | ######################################################## 338 | done 339 | ;; 340 | 341 | stop) 342 | for GUID in $(get_guids); do 343 | PID_FILE=/var/run/opensm.pid.$GUID 344 | stop 345 | done 346 | ;; 347 | 348 | restart) 349 | # 350 | # re-invoking the init script ensures we get the full "case" behavior, 351 | # which unfortunately have the same names as the functions they invoke. 352 | # Ie.: case start) invokes the function 'start'. 353 | # 354 | $0 stop 355 | $0 start 356 | ;; 357 | 358 | status) 359 | status 360 | ;; 361 | 362 | condrestart) 363 | pid=`pidof -o $$ -o $PPID -o %PPID -x $bin` 364 | if [ -n "$pid" ]; then 365 | $0 stop 366 | sleep 1 367 | $0 start 368 | fi 369 | ;; 370 | 371 | *) 372 | echo 373 | echo "Usage: `basename $0` {start|stop|restart|status}" 374 | echo 375 | exit 1 376 | ;; 377 | esac 378 | 379 | RC=$? 380 | exit $RC 381 | 382 | 383 | -------------------------------------------------------------------------------- /etc/modprobe.d/scsi_dh_alua.conf: -------------------------------------------------------------------------------- 1 | alias scsi_hostadapter99 scsi_dh_rdac 2 | -------------------------------------------------------------------------------- /gpfs_goodies.spec: -------------------------------------------------------------------------------- 1 | Summary: GPFS Goodies -- Tools to deploy GPFS on Linux with Device Mapper Multipath 2 | Name: gpfs_goodies 3 | Version: __VERSION__ 4 | Release: 1 5 | Source: %{name}-%{version}.tar.bz2 6 | BuildRoot: /tmp/%{name}-buildroot 7 | BuildArchitectures: noarch 8 | Requires: lsscsi >= 0.21, expect 9 | License: Apache License, Version 2.0 10 | 11 | 12 | %description 13 | GPFS Goodies includes a set of tools and a HOWTO for using the included 14 | "multipath.conf-creator", a simple command-line tool to create a 15 | multipath.conf configuration appropriate for an entire GPFS storage 16 | cluster. Includes step-by-step guidance on deployment. Start by 17 | viewing the HOWTO. 18 | . 19 | Includes: 20 | Tools 21 | - multipath.conf-creator 22 | - brians_own_hot-add_script 23 | - gpfs_stanzafile-creator 24 | - test_block_device_settings 25 | - tune_block_device_settings 26 | OpenSM init script for direct connect InfiniBand storage 27 | - /etc/init.d/opensmd.for_direct_connect_storage 28 | (installed inactive) 29 | HOWTOs 30 | - GPFS Multi-Cluster Routing HOWTO 31 | Some half-baked goodies and examples 32 | 33 | 34 | 35 | %prep 36 | %setup -q 37 | 38 | %build 39 | 40 | %install 41 | rm -rf $RPM_BUILD_ROOT 42 | mkdir -p $RPM_BUILD_ROOT/ 43 | make -f Makefile.rpm PREFIX=$RPM_BUILD_ROOT install 44 | 45 | %clean 46 | rm -rf $RPM_BUILD_ROOT 47 | 48 | %files 49 | %defattr(-,root,root) 50 | %config(noreplace) /etc/gpfs_goodies/* 51 | %config(noreplace) /etc/init.d/* 52 | %{_prefix}/sbin/* 53 | %{_prefix}/share/* 54 | 55 | %changelog 56 | * Fri Sep 20 2013 Brian Elliott Finley 57 | - created this spec file 58 | 59 | -------------------------------------------------------------------------------- /sbin/brians_own_hot-add_script: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # 4 | # This script will hot-add LUNs from DS35xx and DCS37xx series 5 | # devices. It will also hot-remove any devices that no longer exist. 6 | # It will _NOT_ hot-remove any devices that do still exist. -BEF- 7 | # 8 | 9 | # 10 | # 2012.06.28 Brian Elliott Finley 11 | # - created 12 | # 2013.09.27 Brian Elliott Finley 13 | # - add --help, --status, and --yes options 14 | # 2013.11.07 Brian Elliott Finley 15 | # - added version 16 | # 2013.11.10 Brian Elliott Finley 17 | # - genericize to no be limited to specific controller models 18 | # - improve status output to list per-controller type counts 19 | # 20 | 21 | PROGNAME=$(basename $0) 22 | VERSION=20.5.19 23 | 24 | version() { 25 | echo 26 | echo "$PROGNAME v$VERSION" 27 | echo 28 | echo ' Part of the "gpfs_goodies" package' 29 | echo 30 | } 31 | 32 | 33 | help() { 34 | 35 | echo 36 | echo "$PROGNAME [--help|--version|--status|--yes]" 37 | echo 38 | 39 | cat <<"EOF" 40 | All options can be abbreviated to minimum uniqueness. 41 | 42 | This program will hot delete any devices that don't have disk 43 | devices associated with them (stale LUNs), and hot-add any new 44 | devices. 45 | 46 | --help 47 | 48 | Show this help output. 49 | 50 | --version 51 | 52 | Show this program's version number. 53 | 54 | --status 55 | 56 | Show the current LUN count, but don't actually do anything or 57 | make any changes. This count will include all LUNs connected to 58 | the system, including local devices. 59 | 60 | --yes 61 | 62 | Hot delete any devices that don't have disk devices associated 63 | with them (stale LUNs), and hot-add any new devices. 64 | 65 | It's important to perform the hot-delete prior to the hot-add, 66 | as a disk that's no longer present may be still be considered as 67 | connected, and thus prevent detection of a new disk in that same 68 | position. 69 | 70 | The hot-delete function should be considered "safe", in that it 71 | won't remove the representation of any disk that is actually 72 | present and in use, but will only operate on disks that have 73 | already been removed. If a disk has been re-located, it's stale 74 | SCSI representation will be removed from it's old location and 75 | then hot-added in it's new location. 76 | 77 | If it all in doubt, look at the code in this script, and make 78 | your own determination as to the impact of it's operation and/or 79 | try it in your test environment first. 80 | 81 | You do have a test environment, right? 82 | 83 | 84 | Support: 85 | 86 | This software is provided as-is, with no express or implied 87 | support. However, the author would love to receive your 88 | patches. Please contact Brian E. Finley 89 | with patches and/or suggestions. 90 | 91 | 92 | EOF 93 | 94 | } 95 | 96 | 97 | hot_delete() { 98 | # 99 | # Hot delete any devices that don't have disk devices associated with them 100 | # 101 | 102 | delete_me=$(lsscsi | egrep -v '/dev/(sd|sr)' | sed -e 's/.*\[//' -e 's/].*//') 103 | if [ -z "$delete_me" ]; then 104 | echo 105 | echo "No stale LUNs entries to hot-delete." 106 | else 107 | echo 108 | echo "Hot removing stale LUN entries:" 109 | echo 110 | 111 | for i in $delete_me 112 | do 113 | file="/sys/class/scsi_device/${i}/device/delete" 114 | echo " echo 1 > $file" 115 | echo 1 > $file 116 | done 117 | echo 118 | fi 119 | } 120 | 121 | hot_add() { 122 | # 123 | # Hot add all new devices 124 | # 125 | 126 | add_me=$(/bin/ls /sys/class/scsi_host/host*/scan) 127 | if [ ! -z "$add_me" ]; then 128 | echo 129 | echo "Discovering and hot-adding new LUNs:" 130 | echo 131 | fi 132 | 133 | for i in $add_me 134 | do 135 | echo " echo '- - -' > $i" 136 | echo '- - -' > $i 137 | done 138 | echo 139 | 140 | } 141 | 142 | status() { 143 | echo "Total disk based LUN count by product type:" 144 | # 145 | # Example lsscsi output: 146 | # 147 | # [$] lsscsi 148 | # [0:0:0:0] disk ATA ST750LX003-1AC15 SM12 /dev/sda 149 | # [1:0:0:0] disk ATA KINGSTON SV100S2 D100 /dev/sdb 150 | # [6:0:0:0] disk WD My Passport 0740 1003 /dev/sdc 151 | # [6:0:0:1] enclosu WD SES Device 1003 - 152 | # [7:0:0:0] cd/dvd TSSTcorp CDDVDW SE-S084D TS01 /dev/sr0 153 | # 154 | lsscsi | egrep '/dev/sd' | perl -pi -e 's/\S+\s+\S+\s+\S+\s+(.*)\s+\S+\s+\S+\s*$/$1\n/' | uniq -c 155 | echo 156 | } 157 | 158 | test_for_root() { 159 | EFFECTIVE_UID=$(id | perl -pi -e 's/^uid=(\d+).*/$1/') 160 | if [ $EFFECTIVE_UID != 0 ]; then 161 | help 162 | echo "--> Please run as root" 163 | echo 164 | exit 1 165 | fi 166 | } 167 | 168 | 169 | if [ ! -z "$1" -a ! -z "$2" ]; then 170 | help 171 | echo 172 | echo "SUGGESTION: Please only specify one option." 173 | echo 174 | exit 1 175 | fi 176 | 177 | OPTION=$(echo $1 | sed 's/^--/-/') 178 | 179 | case $OPTION in 180 | -v*) 181 | version 182 | exit 0 183 | ;; 184 | -h*) 185 | version 186 | help 187 | exit 0 188 | ;; 189 | -s*) 190 | test_for_root 191 | status 192 | exit 0 193 | ;; 194 | -y*) 195 | test_for_root 196 | hot_delete 197 | hot_add 198 | status 199 | exit 0 200 | ;; 201 | *) 202 | help 203 | echo 204 | echo "SUGGESTION: Please specify either --status or --yes." 205 | echo 206 | exit 1 207 | ;; 208 | esac 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /sbin/gpfs_goodies: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # 2013.09.27 Brian Elliott Finley 5 | # 2013.11.07 Brian Elliott Finley 6 | # 7 | 8 | VERSION=3.3.3.3 9 | 10 | echo " 11 | gpfs_goodies v$VERSION 12 | 13 | Please try one of the following commands. They're listed in the order 14 | of their typical use, from start to finish. It's OK -- they're all 15 | safe, and won't do anything but show help information if run with either 16 | no arguments, or with --help or -h as an argument. 17 | 18 | brians_own_hot-add_script 19 | 20 | Hot delete any devices that don't have disk devices associated 21 | with them (stale LUNs), and hot-add any new devices. 22 | 23 | multipath.conf-creator 24 | 25 | Create and optionally deploy a multipath configuration 26 | appropriate for your LUNs and storage servers. 27 | 28 | gpfs_stanzafile-creator 29 | 30 | This script will auto-create a GPFS StanzaFile (written to 31 | STDOUT) that should be considered an example of how you can 32 | appropriately balance your disk devices across your NSD servers 33 | for best performance. It may be used as-is in many cases. 34 | 35 | test_block_device_settings 36 | 37 | This program will summarize key block device settings that 38 | impact the performance when accessing your disk subsystems. It 39 | makes no changes to your system, and is safe to run on live 40 | production systems. 41 | 42 | tune_block_device_settings 43 | 44 | This program will examine your environment including GPFS, 45 | storage servers, disk subsystems, and LUNs, to calculate best 46 | practice block device tuning settings. It will create a single 47 | udev rules file with the tuning settings, using one entry for 48 | each LUN, and optionally deploy it to each participating storage 49 | server. 50 | 51 | 52 | Be sure to take a look at the HOWTO as well as other docs, examples, 53 | and half-baked goodies in: 54 | 55 | /usr/share/gpfs_goodies 56 | 57 | " 58 | 59 | -------------------------------------------------------------------------------- /sbin/gpfs_stanzafile-creator: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # 4 | # gpfs_stanzafile-creator 5 | # 6 | 7 | # 8 | # 2014.05.23 Brian Elliott Finley 9 | # - Created for demo at Edge2014 10 | # 2014.07.01 Brian Elliott Finley 11 | # - Improved help output and options validation 12 | # 2014.07.09 Brian Elliott Finley & Lerone Latouche 13 | # - Identified and fixed bug where remote commands were not properly 14 | # escaped 15 | # 2014.09.13 Brian Elliott Finley 16 | # - Converted to perl 17 | # - Added support for arbitrary numbers of servers per building-block 18 | # (GPFS currently only supports a max of 8) 19 | # - Added intelligence to ensure that LUNs on subsystems that are 20 | # truly active-active are balanced across all paths. 21 | # 22 | 23 | use strict; 24 | use Getopt::Long; 25 | use File::Basename; 26 | 27 | my $progname = basename($0); 28 | my $version_number = '20.9.5'; 29 | my @output; 30 | my %disks_by_raid_controller; 31 | my $disk_regex = '(.*[ab])_lun[0-9]+'; 32 | 33 | GetOptions( 34 | "help" => \my $help, 35 | "version" => \my $version, 36 | "sg|server-group=s" => \my @server_groups, 37 | "paths=s" => \my $paths, 38 | "out-file=s" => \my $out_file, 39 | "v|verbose" => \my $verbose, 40 | ) or usage() and exit(1); 41 | 42 | if ( defined $help ) { 43 | usage(); 44 | exit 0; 45 | } 46 | 47 | if ( defined $version ) { 48 | version(); 49 | exit 0; 50 | } 51 | 52 | if( ! @server_groups ) { 53 | usage(); 54 | print "--> Please try --server-group\n\n"; 55 | exit 1; 56 | } 57 | 58 | if( ! defined $out_file ) { 59 | $out_file = get_tmp_file(); 60 | } 61 | 62 | if( ! defined $paths ) { 63 | $paths = 2; 64 | } 65 | 66 | 67 | ######################################################################## 68 | # 69 | # BEGIN main 70 | # 71 | foreach my $servers (@server_groups) { 72 | create_stanza_chunk($servers); 73 | } 74 | 75 | write_stanza_file($out_file); 76 | 77 | print "\nYour new file: $out_file\n\n"; 78 | 79 | exit 0; 80 | 81 | # 82 | # END main 83 | # 84 | ######################################################################## 85 | 86 | 87 | ######################################################################## 88 | # 89 | # BEGIN subroutines 90 | # 91 | sub write_stanza_file { 92 | 93 | my $file = shift; 94 | 95 | open(FILE,">$file") or die("Couldn't open $file for writing"); 96 | print FILE "#\n"; 97 | print FILE "# Created by $progname v$version_number\n"; 98 | print FILE "#\n"; 99 | print FILE "\n"; 100 | print FILE @output; 101 | close(FILE); 102 | 103 | return 1; 104 | } 105 | 106 | 107 | sub create_stanza_chunk { 108 | 109 | my $servers = shift; 110 | 111 | my %disks; 112 | 113 | my ($server, $remainder) = split(/,/, $servers, 2); 114 | my $cmd = qq(ssh $server ) . q(ls -1 /dev/mapper/\*); 115 | 116 | my $disk; 117 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input"); 118 | while() { 119 | chomp; 120 | if(m/$disk_regex/) { 121 | $disk = $_; 122 | my $lun = $1 if( $disk =~ m/lun(\d+)/); 123 | $disks{$lun} = $disk; 124 | } 125 | } 126 | close(INPUT); 127 | 128 | foreach my $lun (sort numerically (keys %disks)) { 129 | 130 | my $disk = $disks{$lun}; 131 | 132 | my $nsdname = basename $disk; 133 | $nsdname =~ m/$disk_regex/; 134 | 135 | my $raid_controller = $1; 136 | 137 | push @{$disks_by_raid_controller{$raid_controller}}, $disk; 138 | } 139 | 140 | 141 | my $path_count = 0; 142 | while( %disks_by_raid_controller ) { 143 | 144 | foreach my $raid_controller (sort keys %disks_by_raid_controller) { 145 | 146 | print "\$raid_controller $raid_controller\n" if($verbose); 147 | 148 | my $disk = shift @{$disks_by_raid_controller{$raid_controller}}; 149 | print " \$disk $disk\n" if($verbose and $disk); 150 | if(defined $disk) { 151 | 152 | my $nsdname = basename $disk; 153 | $nsdname =~ m/$disk_regex/; 154 | 155 | push @output, "%nsd: device=$disk nsd=$nsdname servers=$servers\n"; 156 | 157 | $path_count++; 158 | 159 | } else { 160 | 161 | delete $disks_by_raid_controller{$raid_controller}; 162 | } 163 | } 164 | 165 | # 166 | # Make sure each server is assigned one lun per path each 167 | # go-round 168 | # 169 | if( $path_count >= $paths ) { 170 | $servers = list_shift($servers); 171 | $path_count = 0; 172 | } 173 | } 174 | 175 | return 1; 176 | } 177 | 178 | 179 | sub list_shift { 180 | 181 | my $servers = shift; 182 | 183 | my ($firstling, $remainder) = split(/,/, $servers, 2); 184 | 185 | # And the first shall be last... 186 | return "$remainder,$firstling"; 187 | } 188 | 189 | 190 | sub usage { 191 | version(); 192 | print << "EOF"; 193 | Description: 194 | 195 | $progname will auto-create a GPFS StanzaFile that will appropriately 196 | balance NSD device access across your NSD servers for best 197 | performance. 198 | 199 | The resultant StanzaFile can be used as-is with the mmcrnsd command, 200 | and should provide balanced NSD device access from clients (good 201 | performance) if the following assumptions are true: 202 | 203 | That each group of NSD servers specified with each 204 | --server-group argument: 205 | 206 | a) have access to all of the same shared disk devices 207 | 208 | b) have been prepared for multipath use with the GPFS Goodies 209 | multipath.conf-creator tool 210 | 211 | If the assumptions above are not true for your environment, you may 212 | need to hand edit the StanzaFile before use. If you find you need 213 | to do this, please email the author(s) with before and after copies 214 | of your StanzaFile, and any other relevant details, and we will try 215 | to improve the tool to handle your situation in a future release. 216 | 217 | If you are satisfied with the StanzaFile you've created, you can use 218 | mmcrnsd to initialize the disks (see "man mmcrnsd" for more details): 219 | 220 | mmcrnsd -F GPFS_Goodies.StanzaFile 221 | 222 | Have fun! -Brian Finley 223 | 224 | 225 | Usage: $progname [OPTION...] --servers SERVER[,SERVER,...] 226 | 227 | Options can be abbreviated to minimum uniqueness. For example, you 228 | could use "-h" or "--h" instead of "--help". 229 | 230 | --help 231 | 232 | --version 233 | 234 | -sg, --server-group SERVER[,SERVER,...] 235 | 236 | A comma delimited list of servers that are all connected to the 237 | same multi-pathed disk subsystem(s) (a building block). Make 238 | sure that you use the names of the servers as they appear in the 239 | 'Admin node name' column of the output from the 'mmlscluster' 240 | command. 241 | 242 | May be specified multiple times if you have multiple building 243 | blocks. 244 | 245 | Example: --sg nsd1,nsd2 --sg nsd3,nsd4 --sg nsd5,nsd6 246 | or: --server-group s1,s2,s3 --server-group s4,s5,s6 247 | 248 | 249 | --paths N 250 | 251 | Number of paths each server has to each disk. 252 | 253 | For example, if each server has 2x cables connected to each disk 254 | subsystem, then you would specify 2. 255 | 256 | Default: 2 257 | 258 | 259 | --out-file FILE 260 | 261 | Where FILE is the name you want to use for your shiny new 262 | multipath.conf file. 263 | 264 | Default: I'll choose one for you and tell you what I've named it. 265 | 266 | Example: --out-file /tmp/$progname.StanzaFile 267 | 268 | 269 | Support: 270 | 271 | This software is provided as-is, with no express or implied 272 | support. However, the author would love to receive your 273 | patches. Please contact Brian E. Finley 274 | with patches and/or suggestions. 275 | 276 | To request support for an additional storage subsystem, please 277 | email the output from 'lsscsi' to . Emails 278 | that have an actual storage subsystem attached (or at least 279 | remote access to one) are likely to get the most attention. ;-) 280 | 281 | 282 | EOF 283 | return 1; 284 | } 285 | 286 | sub get_tmp_file { 287 | 288 | my $file; 289 | 290 | my $template_base = '/tmp/gpfs_goodies.StanzaFile'; 291 | my $cmd = qq(mktemp $template_base.XXX); 292 | 293 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input"); 294 | while() { 295 | if( m#^($template_base.*)# ) { 296 | $file = $1; 297 | } 298 | } 299 | close(INPUT); 300 | 301 | return $file; 302 | } 303 | 304 | sub version { 305 | print qq(\n); 306 | print qq($progname v$version_number\n); 307 | print qq(\n); 308 | print qq( Part of the "gpfs_goodies" package\n); 309 | print qq(\n); 310 | 311 | return 1; 312 | } 313 | 314 | # Description: 315 | # Modify a sort so that 10 comes after 2. 316 | # Standard sort: (sort $numbers); # 1,10,2,3,4,5,6,7,8,9 317 | # Numerically: (sort numerically $numbers); # 1,2,3,4,5,6,7,8,9,10 318 | # 319 | # Usage: 320 | # foreach my $line (sort numerically (keys ( %{hash} ))) 321 | # 322 | sub numerically { 323 | $a <=> $b; 324 | } 325 | 326 | sub multisort { 327 | my ($a1, $a2, $a3) = split(/\s+/, $a); 328 | my ($b1, $b2, $b3) = split(/\s+/, $b); 329 | 330 | # Stringwise compare field 2 (element name) 331 | # then numeric field 1 (priority) 332 | # then stringwise field 3 (bundle) 333 | $a2 cmp $b2 334 | || 335 | $b1 <=> $a1 336 | || 337 | $a3 cmp $b3 338 | } 339 | 340 | 341 | 342 | 343 | # 344 | # END subroutines 345 | # 346 | ######################################################################## 347 | 348 | -------------------------------------------------------------------------------- /sbin/multipath.conf-creator: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # 4 | # "multipath.conf-creator" 5 | # 6 | 7 | # 8 | # Originally created by: Jarrod Johnson as "getluns.pl" 9 | # 10 | # 2012.10.16 Brian Elliott Finley 11 | # - added invocation options and help output 12 | # 2012.10.18 Brian Elliott Finley 13 | # - added additional info in resultant configuration 14 | # 2013.02.14 Brian Elliott Finley 15 | # - added blacklist feature 16 | # 2013.06.14 Brian Elliott Finley 17 | # - modify blacklist to use vendor/product info instead of disk name 18 | # 2013.07.18 Brian Elliott Finley 19 | # - add section entries for DCS3700 and DS35xx subsystems 20 | # - handle case where subsystem name has spaces 21 | # - handle case where SMCli or subsystem doesn't return a "lun number" 22 | # 2013.09.22 Brian Elliott Finley 23 | # - add --version output 24 | # 2013.11.06 Brian Elliott Finley 25 | # - use proper grammar for plural and singular controller(s) 26 | # - improve regex used to identify controller name 27 | # - incorporate numerous additional controllers in the multipath.conf 28 | # devices section 29 | # 2013.11.07 Brian Elliott Finley 30 | # - test for existence of SMcli 31 | # 2013.11.19 Brian Elliott Finley 32 | # - improve --help verbiage 33 | # 2014.07.14 Brian Elliott Finley & Christian Caruthers 34 | # - Identify and fix bug in handling device names that include hyphens 35 | # 2015.02.10 Brian Elliott Finley & Lerone Latouche 36 | # - Lerone identified a bug in the handling of SMcli API system names 37 | # - Brian fixed the bug 38 | # 39 | 40 | 41 | use strict; 42 | use Socket; 43 | use Getopt::Long; 44 | use File::Basename; 45 | 46 | my $progname = basename($0); 47 | my $version_number = '20.8.3'; 48 | 49 | GetOptions( 50 | "help" => \my $help, 51 | "version" => \my $version, 52 | "auto-detect" => \my $auto_detect, 53 | "out-file=s" => \my $out_file, 54 | "deploy=s" => \my $deploy, 55 | "dry-run" => \my $dry_run, 56 | "no-blacklist" => \my $no_blacklist, 57 | "v|verbose" => \my $verbose, 58 | ) or usage() and exit(1); 59 | 60 | if ( defined $help ) { 61 | usage(); 62 | exit 0; 63 | } 64 | 65 | if ( defined $version ) { 66 | version(); 67 | exit 0; 68 | } 69 | 70 | unless ( user_is_root() ) { 71 | usage(); 72 | print qq(\n--> Please run as root\n\n); 73 | exit 1; 74 | } 75 | 76 | if( ! defined $auto_detect and ! defined $ARGV[0] ) { 77 | usage(); 78 | print "\n"; 79 | print qq(--> Make sure you specify one of "--auto-detect" or "a list of subsystems".\n); 80 | print "\n"; 81 | exit 1; 82 | } 83 | 84 | #if( defined $blacklist and (($blacklist =~ m/^\-+n/) or (defined $no_blacklist)) ) { 85 | # usage(); 86 | # print "\n"; 87 | # print qq(--> You can specify either "--blacklist" or "--no-blacklist" but not both."\n); 88 | # print "\n"; 89 | # exit 1; 90 | #} 91 | 92 | if( ! defined $out_file ) { 93 | $out_file = get_tmp_file(); 94 | } 95 | 96 | my $smcli = which("SMcli"); 97 | if( ! defined $smcli ) { 98 | usage(); 99 | print "\n"; 100 | print qq(--> I can't find SMcli. Please make sure that the IBM DS Storage Manager\n); 101 | print qq( software is installed, and is in your path.\n); 102 | print "\n"; 103 | exit 1; 104 | } 105 | 106 | 107 | 108 | #my @blacklist_devices; 109 | #if( defined $no_blacklist ) { 110 | # undef @blacklist_devices; 111 | # 112 | #} elsif( defined $blacklist ) { 113 | # @blacklist_devices = split(/,/, $blacklist); 114 | # 115 | #} else { 116 | # # If we got to here, then we default to adding sda to the blacklist 117 | # @blacklist_devices = 'sda'; 118 | #} 119 | 120 | 121 | my @subsystems; 122 | if( defined $auto_detect ) { 123 | my $cmd = "SMcli -d"; 124 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input."); 125 | while() { 126 | if(m/^There are currently no storage systems listed/) { 127 | print << 'EOF'; 128 | 129 | There are currently no storage systems listed in the configuration file. Add 130 | storage systems using the Add Storage System option in the storage management 131 | software or by command line. 132 | 133 | Perhaps try running 'SMcli -A' to have SMclient auto-discover your storage 134 | subsystems. That would look something like this: 135 | 136 | [root@disktree ~]# SMcli -A 137 | Starting auto discovery. 138 | ........................................................................ 139 | .......................... 140 | Auto discovery operation successful. 141 | SMcli completed successfully. 142 | 143 | 144 | Then try running this command again. :-) 145 | 146 | EOF 147 | exit 2; 148 | } 149 | 150 | next if(m/^SMcli completed successfully/); 151 | next if(m/^(\s|$)/); 152 | 153 | if( m/^(\S+)\s+.*/ ) { 154 | my $subsystem = $1; 155 | push @subsystems, $subsystem; 156 | }; 157 | } 158 | close(INPUT); 159 | } else { 160 | push @subsystems, @ARGV; 161 | } 162 | 163 | my $quantity = scalar @subsystems; 164 | 165 | my $plural = ""; 166 | if( $quantity ne 1 ) { 167 | $plural = 's'; 168 | } 169 | 170 | print "\nLooks like we're dealing with $quantity subsystem$plural. Let's get going:\n\n"; 171 | 172 | my %aliases; 173 | foreach my $subsystem (@subsystems) { 174 | get_subsystem_info("$subsystem"); 175 | } 176 | 177 | 178 | sub get_subsystem_info { 179 | 180 | my $subsystem = shift; 181 | 182 | print qq( Querying "$subsystem"\n); 183 | print qq( -> Detecting API: ); 184 | my ($api, $assigned_name) = detect_subsystem_api("$subsystem"); 185 | print qq($api\n); 186 | $assigned_name = $subsystem unless($assigned_name); 187 | print qq( -> Subsystem Name: $assigned_name\n); 188 | print qq( -> Detecting LUNs\n); 189 | if($api eq 'SMcli') { 190 | get_subsystem_info_via_SMcli("$assigned_name"); 191 | } 192 | elsif($api eq 'IBM FlashSystem') { 193 | get_subsystem_info_via_IBM_FlashSystem("$assigned_name"); 194 | } 195 | print qq(\n); 196 | 197 | return 1; 198 | } 199 | 200 | 201 | sub detect_subsystem_api { 202 | 203 | my $subsystem = shift; 204 | 205 | my $cmd; 206 | 207 | if($auto_detect) { return 'SMcli'; } 208 | 209 | # 210 | # See if it's an SMcli device. If it is, update the subsystem name, and 211 | # return SMcli. 212 | my $assigned_name = test_for_host_in_smcli_list($subsystem); 213 | if($assigned_name) { 214 | $subsystem = $assigned_name; 215 | return 'SMcli', $assigned_name; 216 | } 217 | 218 | # 219 | # See if it has a resolvable hostname 220 | my $ip = hostname2ip($subsystem); 221 | if(! $ip) { 222 | 223 | usage(); 224 | print " WARNING: I couldn't find an IP address for $subsystem\n"; 225 | print "\n"; 226 | print " Hint --> Are you sure that's the right hostname?\n"; 227 | print "\n"; 228 | 229 | exit 1; 230 | } 231 | 232 | # 233 | # See if it is pingable 234 | if( ! test_for_host_is_pingable($ip) ) { 235 | 236 | usage(); 237 | print " WARNING: I couldn't ping IP address $ip for $subsystem\n"; 238 | print "\n"; 239 | print " Hint --> Are you sure that system is online?\n"; 240 | print "\n"; 241 | 242 | exit 1; 243 | } 244 | 245 | # 246 | # See if we can detect the model based on the MAC address 247 | $cmd = "ip neigh"; 248 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input"); 249 | while() { 250 | # 251 | # Example input 252 | # 253 | # 10.72.15.65 dev ib0 lladdr a0:00:01:00:fe:80:00:00:00:00:00:00:50:05:07:00:5b:00:f9:a5 STALE 254 | # 10.70.17.7 dev bond-10g lladdr 6c:ae:8b:08:2e:78 DELAY 255 | # 10.70.17.61 dev bond-10g lladdr 6c:ae:8b:08:34:84 STALE 256 | # 10.70.11.30 dev bond-10g lladdr 6c:ae:8b:08:35:7a STALE 257 | # 10.70.20.40 dev bond-10g lladdr 6c:ae:8b:08:2e:60 STALE 258 | # 10.72.13.17 dev ib0 lladdr a0:00:01:00:fe:80:00:00:00:00:00:00:50:05:07:00:5b:01:00:91 STALE 259 | # 260 | if(m/^$ip\s+dev\s+\S+\s+lladdr\s+(\S+)\s+/) { 261 | # ^^^ 262 | # | 263 | # mac address of the subsystem 264 | # 265 | my $mac = $1; 266 | 267 | # 268 | # We've matched the subsystem's IP and grabbed it's MAC, now let's dig deeper 269 | if($mac =~ m/^00:20:c2/) { 270 | return 'IBM FlashSystem'; 271 | } 272 | } 273 | } 274 | close(INPUT); 275 | 276 | # Assume SMcli for everything else (which may be incorrect, but 277 | # we'll know soon enough. ;-) 278 | return 'SMcli'; 279 | } 280 | 281 | 282 | sub get_subsystem_info_via_IBM_FlashSystem { 283 | 284 | my $subsystem = shift; 285 | 286 | my @input = (); 287 | my @lun_names; 288 | my $file; 289 | my $cmd; 290 | my %lun_numbers_by_lun_name; 291 | 292 | ######################################################################## 293 | # 294 | # BEGIN get list of luns 295 | # 296 | $file = get_tmp_file(); 297 | open(FILE,">$file") or die("Couldn't open $file for writing"); 298 | 299 | print FILE q|set timeout -1 | . qq|\n|; 300 | print FILE q|spawn $env(SHELL) | . qq|\n|; 301 | print FILE q|match_max 100000 | . qq|\n|; 302 | print FILE qq|send -- "telnet $subsystem\r" | . qq|\n|; 303 | print FILE q|expect " login:" | . qq|\n|; 304 | print FILE q|send -- "admin\r" | . qq|\n|; 305 | print FILE q|expect "Password:" | . qq|\n|; 306 | print FILE q|send -- "password\r" | . qq|\n|; 307 | print FILE q|expect "admin #:" | . qq|\n|; 308 | print FILE q|send -- "lu\r" | . qq|\n|; 309 | print FILE q|expect "admin #:" | . qq|\n|; 310 | 311 | close(FILE); 312 | 313 | $cmd = "expect -f $file"; 314 | open(INPUT,"$cmd|") or die $!; 315 | while() { 316 | push @input, (); 317 | } 318 | close(INPUT); 319 | unlink $file; 320 | 321 | # 322 | # Example input: 323 | # 324 | # admin #: lu 325 | # 326 | # ------------------------------Storage Information----------------------------- 327 | # Total Size: 18.75 TiB 328 | # Free Space: 23.97 GiB 329 | # Storage Mode: RAID 5 330 | # 331 | # ------------------------------Logical Unit Table------------------------------ 332 | # 333 | # --------------Name-------------- --Number-- --State-- ---Size--- --Log Lun-- 334 | # fs820-3_lun0 0 Good 3.12 TiB no 335 | # fs820-3_lun1 1 Good 3.12 TiB no 336 | # fs820-3_lun2 2 Good 3.12 TiB no 337 | # fs820-3_lun3 3 Good 3.12 TiB no 338 | # fs820-3_lun4 4 Good 3.12 TiB no 339 | # fs820-3_lun5 5 Good 3.12 TiB no 340 | # 341 | 342 | # 343 | # Skip past the cruft at the top 344 | # 345 | while(@input) { 346 | 347 | $_ = shift @input; 348 | 349 | last if(m/^--------------Name--------------/); 350 | } 351 | 352 | # 353 | # Now gather the actual LUN names 354 | # 355 | while(@input) { 356 | 357 | $_ = shift @input; 358 | if( m/^\S+/ ) { 359 | 360 | my $lun_name = substr($_, 0, 32); # We have to do it this way, as lun names may contain spaces. 361 | $lun_name =~ s/\s+$//; # Then we strip off any trailing spaces... 362 | 363 | my $lun_number = substr($_, 33, 10); 364 | $lun_number =~ s/\s+$//; 365 | 366 | push @lun_names, $lun_name; 367 | $lun_numbers_by_lun_name{$lun_name} = $lun_number; 368 | } 369 | 370 | last if( m/^(\s+|$)/ ); 371 | } 372 | 373 | # 374 | # END get list of luns 375 | # 376 | ######################################################################## 377 | 378 | ######################################################################## 379 | # 380 | # BEGIN get lun info 381 | # 382 | 383 | $file = get_tmp_file(); 384 | open(FILE,">$file") or die("Couldn't open $file for writing"); 385 | 386 | print FILE q|set timeout -1 | . qq|\n|; 387 | print FILE q|spawn $env(SHELL) | . qq|\n|; 388 | print FILE q|match_max 100000 | . qq|\n|; 389 | print FILE qq|send -- "telnet $subsystem\r" | . qq|\n|; 390 | print FILE q|expect " login:" | . qq|\n|; 391 | print FILE q|send -- "admin\r" | . qq|\n|; 392 | print FILE q|expect "Password:" | . qq|\n|; 393 | print FILE q|send -- "password\r" | . qq|\n|; 394 | 395 | foreach my $lun_name (@lun_names) { 396 | print FILE q|expect "admin #:" | . qq|\n|; 397 | print FILE q|send -- "lu scsi_id \"| . $lun_name . q|\"\r" | . qq|\n|; 398 | } 399 | 400 | print FILE q|expect "admin #:" | . qq|\n|; 401 | 402 | close(FILE); 403 | 404 | @input = (); 405 | $cmd = "expect -f $file"; 406 | open(INPUT,"$cmd|") or die $!; 407 | while() { 408 | push @input, (); 409 | } 410 | close(INPUT); 411 | unlink $file; 412 | 413 | # 414 | # END get lun info 415 | # 416 | ######################################################################## 417 | 418 | ######################################################################## 419 | # 420 | # BEGIN format stanzas 421 | # 422 | my $lun_name; 423 | my $wwid; 424 | my $key; 425 | while(@input) { 426 | # 427 | # Input looks like: 428 | # 429 | # flashctl1-a login: admin 430 | # Password: 431 | # 432 | # IBM - Intelligent Cluster 433 | # FlashSystem-820 Management Interface 434 | # 435 | # Firmware Version: 6.3.1 436 | # Storage Capacity: 9.37 TiB 437 | # Management Controller: mc-1 438 | # Eth0 IP Address: 10.73.1.23 439 | # Hostname: flashctl1-a 440 | # Chassis Serial: T-6A2C 441 | # 442 | # 443 | # admin #: lu scsi_id "LogicalUnit 0" 444 | # 445 | # Page: 83 446 | # Type: 2 447 | # ID: 00 20 c2 40 00 14 6a 2c 448 | # 449 | # admin #: lu scsi_id "LogicalUnit 1" 450 | # 451 | # Page: 83 452 | # Type: 2 453 | # ID: 00 20 c2 40 01 14 6a 2c 454 | # 455 | # admin #: lu scsi_id "LogicalUnit 2" 456 | # 457 | # Page: 83 458 | # Type: 2 459 | # ID: 00 20 c2 40 02 14 6a 2c 460 | # 461 | # admin #: lu scsi_id "LogicalUnit 3" 462 | # 463 | # Page: 83 464 | # Type: 2 465 | # ID: 00 20 c2 40 03 14 6a 2c 466 | # 467 | # admin #: # 468 | 469 | $_ = shift @input; 470 | 471 | if( m/^admin #: lu scsi_id "(.*)"/ ) { 472 | 473 | $lun_name = $1; 474 | 475 | $key = "${subsystem}_${lun_name}"; 476 | 477 | $aliases{$key}->{subsystem} = $subsystem; 478 | $aliases{$key}->{lun_name} = $lun_name; 479 | $aliases{$key}->{lun_number} = $lun_numbers_by_lun_name{$lun_name}; 480 | 481 | } 482 | elsif( m/^Type:\s+(\d+)/ ) { 483 | 484 | # 485 | # Now we've got Type: 486 | # 487 | my $type = $1; 488 | 489 | # 490 | # Now get ID: 491 | # 492 | $_ = shift @input; 493 | m/^ID:\s+(.*)/; 494 | my $id = $1; 495 | $id =~ s/\s+//g; 496 | 497 | # 498 | # Take the Type (2) and append the ID (00 20 c2 40 00 14 bd 28) 499 | # ^ ^ ^ 500 | # | | | 501 | # ^-------------------v v-^ v-------^ 502 | # | | | 503 | # v v v 504 | # $wwid should end up looking like: 20020c2400014bd28 505 | # 506 | $wwid = $type . $id; 507 | 508 | $aliases{$key}->{wwid} = $wwid; 509 | $aliases{$key}->{owner} = "ab"; # There is no preferred owner with IBM FlashStorage, so we say they both own it 510 | } 511 | } 512 | 513 | # 514 | # END format stanzas 515 | # 516 | ######################################################################## 517 | 518 | return 1; 519 | } 520 | 521 | 522 | sub get_subsystem_info_via_SMcli { 523 | 524 | my $subsystem = shift; 525 | 526 | my $lun_name; 527 | my $key; 528 | 529 | my $cmd = qq(SMcli -n "$subsystem" -c 'show logicalDrives;'); 530 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input."); 531 | while() { 532 | 533 | # 534 | # Get the LUN's Name (from the SMclient perspective) 535 | # 536 | if (/Logical Drive name:\s*(\S*)/) { 537 | 538 | my $lun_name = $1; 539 | 540 | $key = "${subsystem}_${lun_name}"; 541 | 542 | $aliases{$key}->{subsystem} = $subsystem; 543 | $aliases{$key}->{lun_name} = $lun_name; 544 | 545 | } 546 | # 547 | # Get the LUN's WWID 548 | # 549 | elsif (/Logical Drive ID:\s*(\S*)/) { 550 | 551 | my $wwid = $1; 552 | 553 | $wwid =~ s/^/3/; 554 | $wwid =~ s/://g; 555 | 556 | $aliases{$key}->{wwid} = $wwid; 557 | 558 | } 559 | # 560 | # Get the LUN's ID (Instance number -- Ie.: lun0) 561 | # 562 | elsif (/LUN:\s*(\S*)/) { 563 | 564 | my $lun_number = $1; 565 | 566 | $aliases{$key}->{lun_number} = $lun_number; 567 | 568 | } 569 | # 570 | # Get the LUN's SSID 571 | # 572 | elsif (/\s+Subsystem ID \(SSID\):\s+(\d+)/) { 573 | 574 | my $ssid = $1; 575 | 576 | $aliases{$key}->{ssid} = $ssid; 577 | 578 | } 579 | # 580 | # Get the LUN's ownership (at the raid controller level) 581 | # 582 | elsif (/Preferred owner:.*slot\s*(\S*)/) { 583 | 584 | my $owner = lc($1); 585 | 586 | $aliases{$key}->{owner} = $owner; 587 | 588 | } 589 | 590 | } 591 | close(INPUT); 592 | } 593 | 594 | 595 | ######################################################################## 596 | # 597 | # BEGIN write out the multipath.conf file 598 | # 599 | my $file = $out_file; 600 | open(FILE,">$file") or die("Couldn't open $file for writing"); 601 | 602 | print FILE << "EOF"; 603 | # 604 | # /etc/multipath.conf 605 | # 606 | # Created by $progname v$version_number, which is part of the 607 | # 608 | # "gpfs_goodies" package which can be found at http://snurl.com/gpfs_goodies 609 | # 610 | EOF 611 | 612 | my $chunk = '/etc/gpfs_goodies/multipath.conf-creator_config_chunks/defaults'; 613 | if( -e "$chunk" ) { 614 | open(CHUNK,"<$chunk") or die("Couldn't open $chunk for reading"); 615 | print FILE (); 616 | close(CHUNK); 617 | } 618 | 619 | print FILE qq(\n); 620 | 621 | # 622 | # Blacklist entries 623 | # 624 | unless(defined $no_blacklist) { 625 | 626 | my @blacklist_entries; 627 | push @blacklist_entries, 'LSI,Logical Volume'; 628 | push @blacklist_entries, '*,Universal Xport'; 629 | 630 | # 631 | # How to determine which device to blacklist on a node: 632 | # 633 | # Figure out which device is your root device (sda in this case) 634 | # and use lsscsi to find out how it's classified. Pay attention 635 | # to the vendor and product information as indicated below. These 636 | # are the two pieces we use to blacklist root devices across a 637 | # class of machines (Ie.: all of your storage servers with the 638 | # exact same hardware configuration). 639 | # 640 | # [root@storage06 ~]# lsscsi | grep -w sda 641 | # [0:1:0:0] disk LSI Logical Volume 3000 /dev/sda 642 | # 643 | # ^^^ ^^^^^^^^^^^^^^ 644 | # | | 645 | # | --- product 646 | # --- vendor 647 | # 648 | # XXXX Note to self: at some point, accept a user provided blacklist 649 | # entry of the form: 650 | # 651 | # --blacklist "vendor,product" 652 | # --blacklist "LSI,Logical Volume" --blacklist "MyRaid,Illogical Volume" 653 | # 654 | 655 | print FILE qq(blacklist {\n); 656 | foreach my $entry (@blacklist_entries) { 657 | my ($vendor, $product) = split(/,/, $entry, 2); 658 | 659 | print FILE qq( device {\n); 660 | print FILE qq( vendor "$vendor"\n); 661 | print FILE qq( product "$product"\n); 662 | print FILE qq( }\n); 663 | 664 | } 665 | 666 | print FILE qq(}\n); 667 | print FILE qq(\n); 668 | } 669 | 670 | ######################################################################## 671 | # 672 | # BEGIN devices 673 | # 674 | 675 | print FILE << 'EOF'; 676 | # 677 | # For references on many of these entries, please see: 678 | # 679 | # The "IBM System Storage DS Storage Manager Version Installation and Host Support Guide.pdf" 680 | # (which can be found at: https://ibm.biz/Bdxk4t) 681 | # 682 | devices { 683 | 684 | EOF 685 | 686 | my $dir = '/etc/gpfs_goodies/multipath.conf-creator_config_chunks/'; 687 | opendir(my $dh, $dir) || die "can't opendir $dir: $!"; 688 | my @chunks = grep { /^device-/ && -f "$dir/$_" } readdir($dh); 689 | closedir $dh; 690 | 691 | foreach(@chunks) { 692 | $chunk = "$dir/$_"; 693 | if( -e "$chunk" ) { 694 | open(CHUNK,"<$chunk") or die("Couldn't open $chunk for reading"); 695 | print FILE (); 696 | close(CHUNK); 697 | } 698 | print FILE "\n"; 699 | } 700 | 701 | print FILE "}\n"; 702 | print FILE "\n"; 703 | 704 | # 705 | # END devices 706 | # 707 | ######################################################################## 708 | 709 | 710 | # 711 | # Multipath entries 712 | # 713 | print FILE qq(multipaths {\n); 714 | foreach my $key (sort keys %aliases) { 715 | 716 | 717 | # multipath { 718 | # # Controller Name: pershing-ctrl-14-3 719 | # # Raid Controller: a 720 | # # LUN Name: PA14_3_0 721 | # # LUN Number: 0 722 | # wwid 360080e50002e34b8000002fd506c631e 723 | # alias pershing_ctrl_14_3a_lun0 724 | # } 725 | 726 | my $subsystem = $aliases{$key}->{subsystem}; 727 | my $owner = $aliases{$key}->{owner}; 728 | my $lun_name = $aliases{$key}->{lun_name} if( defined $aliases{$key}->{lun_name} ); 729 | my $lun_number = $aliases{$key}->{lun_number} if( defined $aliases{$key}->{lun_number} ); 730 | my $ssid = $aliases{$key}->{ssid} if( defined $aliases{$key}->{ssid} ); 731 | 732 | my $lun; 733 | if(defined $lun_number) { 734 | $lun = $lun_number; 735 | } else { 736 | $lun = $ssid; 737 | } 738 | 739 | my $alias = "${subsystem}${owner}_lun${lun}"; 740 | 741 | my $wwid = $aliases{$key}->{wwid}; 742 | 743 | # 744 | # (10:18:23 AM) Scott Fadden (GPFS): 745 | # typeset bannedFromDeviceNames='/?$&*()' 746 | # typeset bannedFromDiskNames='.<>{}/?`~!@#$%^&*()+=-' 747 | # 748 | # GPFS Doesn't like hyphens in NSD disk names 749 | # 750 | $alias =~ s/-/_/g; 751 | # 752 | # And spaces introduced (perhaps by a subsystem name) are right 753 | # out! 754 | # 755 | $alias =~ s/ /_/g; 756 | 757 | print FILE " multipath {\n"; 758 | print FILE " #\n"; 759 | print FILE " # Controller Name: $subsystem\n"; 760 | print FILE " # Raid Controller: $owner\n"; 761 | print FILE " # LUN Name: $lun_name\n" if( defined $lun_name); 762 | print FILE " # LUN Number: $lun_number\n" if( defined $lun_number); 763 | print FILE " # Subsystem ID (SSID): $ssid\n" if( defined $ssid); 764 | print FILE " #\n"; 765 | print FILE " alias $alias\n"; 766 | print FILE " wwid $wwid\n"; 767 | print FILE " }\n"; 768 | } 769 | 770 | print FILE "}\n"; 771 | close(FILE); 772 | # 773 | # END write out the multipath.conf file 774 | # 775 | ######################################################################## 776 | 777 | print "Done! You can find your fresh multipath.conf file here:\n"; 778 | print "\n"; 779 | print " $out_file\n"; 780 | print "\n"; 781 | 782 | if( defined $deploy ) { 783 | 784 | print "\n"; 785 | my @servers = split(/,/, $deploy); 786 | 787 | foreach my $server (sort @servers) { 788 | 789 | my $cmd; 790 | my $file; 791 | 792 | #my $OS = determine_server_os($server); 793 | my $OS = "RHEL6"; 794 | 795 | print "Processing ${server}...\n\n"; 796 | if( $dry_run ) { 797 | 798 | print " I'm in --dry-run mode so I won't do anything, but here are the commands I would\n"; 799 | print " normally run:\n\n"; 800 | 801 | } 802 | 803 | # Copy out config 804 | $cmd = "scp $out_file $server:/etc/multipath.conf"; 805 | $cmd =~ s|/+|/|g; 806 | print " $cmd\n"; 807 | unless( $dry_run ) { 808 | !system( "$cmd >/dev/null" ) or die("Couldn't run $cmd"); 809 | } 810 | 811 | # Copy out nsddevices 812 | $file = "/var/mmfs/etc/nsddevices"; 813 | $cmd = "scp /usr/share/gpfs_goodies/$file $server:$file"; 814 | $cmd =~ s|/+|/|g; 815 | print " $cmd\n"; 816 | unless( $dry_run) { 817 | !system( "$cmd >/dev/null" ) or die("Couldn't run $cmd"); 818 | } 819 | 820 | $file = "/etc/modprobe.d/scsi_dh_alua.conf"; 821 | $cmd = "scp /usr/share/gpfs_goodies/$file $server:$file"; 822 | $cmd =~ s|/+|/|g; 823 | print " $cmd\n"; 824 | unless( $dry_run) { 825 | !system( "$cmd >/dev/null" ) or die("Couldn't run $cmd"); 826 | } 827 | 828 | # Start it all up. Should we also do an mmnsddiscover here? 829 | $cmd = qq(ssh $server ) . q(" 830 | chkconfig multipathd on 831 | service multipathd start 832 | multipathd -k'reconfigure' 833 | multipathd -k'reconfigure' 834 | chmod 755 /var/mmfs/etc/nsddevices 835 | chown root.root /var/mmfs/etc/nsddevices 836 | test -e /boot/initramfs-\$(uname -r).img.orig || rsync -a /boot/initramfs-\$(uname -r).img /boot/initramfs-\$(uname -r).img.orig 837 | dracut -f /boot/initramfs-\$(uname -r).img \$(uname -r) 838 | ls -l /boot/initramfs-\$(uname -r).img /boot/initramfs-\$(uname -r).img.orig 839 | "); 840 | $cmd =~ s|/+|/|g; 841 | 842 | print " $cmd\n"; 843 | unless( $dry_run) { 844 | #!system( $cmd ) or die("Couldn't run $cmd"); 845 | !system( "$cmd >/dev/null" ) or die("Couldn't run $cmd"); 846 | } 847 | 848 | print " done!\n\n"; 849 | 850 | } 851 | 852 | 853 | 854 | } else { 855 | print << 'EOF'; 856 | 857 | Consider trying the "--deploy" option. It will install your new multipath.conf 858 | file on your storage servers, and complete the steps necessary to enable 859 | multipath support. If you're unsure, you can run it with the "--dry-run" 860 | option, and I'll show you the commands that I would have run. 861 | 862 | See "--help" for details. 863 | 864 | EOF 865 | } 866 | 867 | exit 0; 868 | 869 | ######################################################################## 870 | # 871 | # BEGIN Subroutines 872 | # 873 | sub version { 874 | print qq(\n); 875 | print qq($progname v$version_number\n); 876 | print qq(\n); 877 | print qq( Part of the "gpfs_goodies" package\n); 878 | print qq(\n); 879 | 880 | return 1; 881 | } 882 | 883 | sub usage { 884 | version(); 885 | print << "EOF"; 886 | Usage: $progname [OPTION...] [--auto-detect | subsystem1 subsystem2...] 887 | 888 | Options can be abbreviated to minimum uniqueness. For example, you 889 | could use "-h" or "--h" instead of "--help". 890 | 891 | --help 892 | 893 | --version 894 | 895 | --auto-detect 896 | 897 | Highly recomended! 898 | 899 | This command will use the output from "SMcli -d" to determine the list 900 | of subsystems to use. 901 | 902 | If you _don't_ specify this option, you'll need to specify a list of 903 | subsystems on the command line instead: 904 | 905 | $progname subsystem1 subsystem2 etc... 906 | 907 | 908 | --deploy SERVER[,SERVER,...] 909 | 910 | Install the following files on each specified server: 911 | 912 | - /etc/multipath.conf 913 | The file generated by this tool. 914 | 915 | - /etc/modprobe.d/scsi_dh_alua.conf 916 | Reduce boot time when connected to multipath devices, and eliminate 917 | some harmless, but noisy SCSI errors that may be displayed. 918 | 919 | - /var/mmfs/etc/nsddevices 920 | Tells GPFS to use your new multipath devices as NSD disks (and to 921 | not use any other devices). Once installed, just run it as a 922 | script to see which devices it's choosing. 923 | 924 | I'll also re-build the initrd or initramfs to include your new 925 | multipath.conf and scsi_dh_alua.conf files. 926 | 927 | 928 | --out-file FILE 929 | 930 | Where FILE is the name you want to use for your shiny new 931 | multipath.conf file. 932 | 933 | Default: I'll choose one for you and tell you what I've named it. 934 | 935 | Example: --out-file /tmp/multipath.conf.test_run 936 | 937 | 938 | --no-blacklist 939 | 940 | Don't blacklist any disks. By default, this tool will create a 941 | multipath.conf file that blacklists the local disks. 942 | 943 | Please verify before rebooting your nodes by running 944 | 'multipath -v3' and examining the output for blacklisted devices. 945 | 946 | 947 | Currently Supported Storage Controllers should include all IBM DS 948 | Storage Manager (SMClient) compatible subsystems and IBM FlashSystem 949 | storage. Testing has been performed on the following models: 950 | 951 | FlashSystem 820 DS3860 DCS3700 952 | DS3512 DS3524 953 | 954 | 955 | Support: 956 | 957 | This software is provided as-is, with no express or implied 958 | support. However, the author would love to receive your 959 | patches. Please contact Brian E. Finley 960 | with patches and/or suggestions. 961 | 962 | To request support for an additional storage subsystem, please 963 | email the output from 'lsscsi' to . Emails 964 | that have an actual storage subsystem attached (or at least 965 | remote access to one) are likely to get the most attention. ;-) 966 | 967 | 968 | EOF 969 | return 1; 970 | #XXXX --blacklist DISK[,DISK,DISK] 971 | # 972 | # A comma delimited list of disks to blacklist. 973 | # 974 | # If no --blacklist entry is specified, "sda" will automatically 975 | # be added to the blacklist as the default. If you specify a 976 | # blacklist it overrides the default (doesn't add to it). So if 977 | # you specify a blacklist, and want to include sda, it must be in 978 | # your comma delimited list. 979 | # 980 | # Example: --blacklist sdi,sdb,sdm 981 | # 982 | # 983 | } 984 | 985 | 986 | sub which { 987 | 988 | my $file = shift; 989 | my $path = shift; 990 | 991 | if( ! defined($path) ) { 992 | $path = $ENV{PATH}; 993 | } 994 | 995 | foreach my $dir (split(/:/,$path)) { 996 | if(-x "$dir/$file") { 997 | return "$dir/$file"; 998 | } 999 | } 1000 | return undef; 1001 | } 1002 | 1003 | sub get_tmp_file { 1004 | 1005 | my $file; 1006 | 1007 | my $template_base = '/tmp/multipath.conf'; 1008 | my $cmd = qq(mktemp $template_base.XXX); 1009 | 1010 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input"); 1011 | while() { 1012 | if( m#^($template_base.*)# ) { 1013 | $file = $1; 1014 | } 1015 | } 1016 | close(INPUT); 1017 | 1018 | return $file; 1019 | } 1020 | 1021 | sub user_is_root { 1022 | 1023 | if($< == 0) { 1024 | return 1; 1025 | } 1026 | return undef; 1027 | } 1028 | 1029 | 1030 | # Usage: 1031 | # my $ip = hostname2ip($hostname); 1032 | # Description: 1033 | # Convert hostname into the IPv4 address. 1034 | sub hostname2ip 1035 | { 1036 | my $ip = (gethostbyname(shift))[4] || ""; 1037 | return $ip ? inet_ntoa( $ip ) : undef; 1038 | } 1039 | 1040 | 1041 | sub test_for_host_in_smcli_list { 1042 | 1043 | my $subsystem = shift; 1044 | 1045 | my $cmd = "SMcli -d -v"; 1046 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input"); 1047 | while() { 1048 | if(m/(^|\s)($subsystem)(\s|$)/) { 1049 | my ($assigned_name) = split; 1050 | # we can use SMcli, but let's set the subsystem name to the one 1051 | # SMcli expects to see with "-n". 1052 | return $assigned_name; 1053 | } 1054 | } 1055 | close(INPUT); 1056 | 1057 | return undef; 1058 | } 1059 | 1060 | 1061 | sub test_for_host_is_pingable { 1062 | 1063 | my $ip = shift; 1064 | 1065 | my $cmd = "ping -c 1 $ip >/dev/null 2>&1"; 1066 | if( !system($cmd) ) { 1067 | return 1; 1068 | } 1069 | 1070 | return undef; 1071 | } 1072 | 1073 | 1074 | # 1075 | # END Subroutines 1076 | # 1077 | ######################################################################## 1078 | -------------------------------------------------------------------------------- /sbin/test_block_device_settings: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # 4 | # 2012.09.19 Brian Elliott Finley 5 | # - The core of this I coopted from the command line from Scott 6 | # Denham. Thanks, Scott! 7 | # 2013.11.07 Brian Elliott Finley 8 | # - Turned into perl 9 | # 2013.12.10 Brian Elliott Finley 10 | # - Added the --test option 11 | # 12 | 13 | # 14 | # Use this script to verify that your storage nodes are using the 15 | # desired settings for each block device. -BEF- 16 | # 17 | # Example usage: 18 | # 19 | # scp test_block_device_settings nsd1:/root/ 20 | # ssh nsd1 ./test_block_device_settings 21 | # 22 | 23 | use strict; 24 | use Getopt::Long; 25 | use File::Basename; 26 | 27 | my $progname = basename($0); 28 | my $version_string = 'v' . '20.5'; 29 | 30 | GetOptions( 31 | "help" => \my $help, 32 | "version" => \my $version, 33 | "test" => \my $test, 34 | ) or usage() and exit(1); 35 | 36 | if ( defined $help ) { 37 | usage(); 38 | print qq(\n--> Try "$progname" with no options...\n\n); 39 | exit 0; 40 | } 41 | 42 | if ( defined $version ) { 43 | version(); 44 | exit 0; 45 | } 46 | 47 | unless ( user_is_root() ) { 48 | usage(); 49 | print qq(\n--> Please run as root\n\n); 50 | exit 1; 51 | } 52 | 53 | unless ( defined $test ) { 54 | usage(); 55 | print qq|\n--> Try using the "--test" option (or "-t" for short).\n\n|; 56 | exit 1; 57 | } 58 | 59 | 60 | 61 | my %devices_by_friendly_name; 62 | my %friendly_name_by_index; 63 | my %dmsetup_info; 64 | my $file; 65 | my $cmd; 66 | 67 | $cmd = 'dmsetup ls --target multipath'; 68 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input"); 69 | while() { 70 | # 71 | # [root@x36n09 ~]# dmsetup ls --target multipath 72 | # dcs3700b_lun17 (253, 23) 73 | # dcs3700a_lun28 (253, 14) 74 | # dcs3700a_lun12 (253, 5) 75 | # dcs3700b_lun15 (253, 22) 76 | # dcs3700a_lun9 (253, 6) 77 | # dcs3700a_lun26 (253, 12) 78 | # dcs3700a_lun11 (253, 3) 79 | # dcs3700b_lun29 (253, 28) 80 | # mpatha (253, 30) 81 | # dcs3700b_lun13 (253, 21) 82 | # dcs3700a_lun7 (253, 2) 83 | # dcs3700a_lun24 (253, 13) 84 | # dcs3700b_lun8 (253, 18) 85 | # dcs3700b_lun27 (253, 29) 86 | # dcs3700a_lun23 (253, 11) 87 | # dcs3700b_lun6 (253, 19) 88 | # dcs3700b_lun25 (253, 26) 89 | # dcs3700b_lun10 (253, 20) 90 | # dcs3700a_lun19 (253, 9) 91 | # dcs3700a_lun4 (253, 1) 92 | # dcs3700a_lun21 (253, 10) 93 | # dcs3700b_lun5 (253, 16) 94 | # dcs3700a_lun2 (253, 4) 95 | # dcs3700b_lun3 (253, 17) 96 | # dcs3700b_lun22 (253, 25) 97 | # dcs3700a_lun16 (253, 8) 98 | # dcs3700a_lun0 (253, 0) 99 | # dcs3700b_lun18 (253, 24) 100 | # dcs3700b_lun1 (253, 15) 101 | # dcs3700b_lun20 (253, 27) 102 | # dcs3700a_lun14 (253, 7) 103 | # 104 | chomp; 105 | 106 | if(m/^(\S+)\s+\((\d+),\s+(\d+)\)/) { 107 | 108 | my $device = $1; 109 | my $major = $2; 110 | my $minor = $3; 111 | 112 | $dmsetup_info{"$major:$minor"} = $device; 113 | } 114 | } 115 | close(INPUT); 116 | 117 | 118 | $file = '/proc/partitions'; 119 | open(FILE,"<$file") or die("Couldn't open $file for reading"); 120 | while() { 121 | 122 | chomp; 123 | 124 | # 125 | # [root@x36n09 ~]# cat /proc/partitions | head 126 | # major minor #blocks name 127 | # 128 | # 8 0 70311936 sda 129 | # 8 16 15623913472 sdb 130 | # 8 32 15623913472 sdc 131 | # 132 | if(m/(\d+)\s+(\d+)\s+\d+\s+(\S+)/) { 133 | my $major = $1; 134 | my $minor = $2; 135 | my $device = $3; 136 | 137 | my $friendly_name; 138 | 139 | if($device =~ m/^dm-/) { 140 | 141 | # 142 | # If we have no major/minor, then don't even mess with it. 143 | # 144 | next unless(defined $dmsetup_info{"$major:$minor"}); 145 | 146 | $friendly_name = $dmsetup_info{"$major:$minor"}; 147 | 148 | } else { 149 | 150 | $friendly_name = $device; 151 | } 152 | 153 | $devices_by_friendly_name{$friendly_name} = $device; 154 | 155 | 156 | # 157 | # Take a device name, like dcs3700a_lun2 which sorts after 158 | # dcs3700a_lun10, and before dcs3700b_lun1. 159 | # 160 | # dcs3700b_lun1 => dcs3700_lun1000001 161 | # dcs3700a_lun2 => dcs3700_lun1000002 162 | # dcs3700a_lun10 => dcs3700_lun1000010 163 | # 164 | # If it's just an /dev/sda3 device, then handle it also. 165 | # 166 | my $index = $friendly_name; 167 | $index =~ s/[ab]_lun/_lun/; 168 | if( $index =~ m/(\d+)$/ ) { 169 | my $new_digits = $1 + 1000000; 170 | $index =~ s/(\d+)$/$new_digits/; 171 | } 172 | 173 | # 174 | # Now we pre-pad the index value, so that sdb comes before sdaa when 175 | # sorted later. 176 | # 177 | $index = sprintf('%1000s', $index); 178 | 179 | $friendly_name_by_index{$index} = $friendly_name; 180 | } 181 | } 182 | close(FILE); 183 | 184 | 185 | my $max_name_length = 13; # header 186 | foreach my $friendly_name (keys %devices_by_friendly_name) { 187 | my $friendly_name_length = length($friendly_name); 188 | if($friendly_name_length > $max_name_length) { 189 | $max_name_length = $friendly_name_length; 190 | } 191 | } 192 | $max_name_length += 1; # add a two space pad 193 | 194 | 195 | my $header1 = 'friendly_name'; 196 | my $header2 = ' max_sectors_kb read_ahead_kb queue_depth nr_requests rq_affinity add_random timeout scheduler'; 197 | printf("%-${max_name_length}s%s\n", $header1, $header2); 198 | 199 | $header1 =~ s/\S/-/g; 200 | $header2 =~ s/\S/-/g; 201 | printf("%-${max_name_length}s%s\n", $header1, $header2); 202 | 203 | 204 | foreach my $index_key (sort keys %friendly_name_by_index) { 205 | 206 | my $friendly_name = $friendly_name_by_index{$index_key}; 207 | 208 | my $device = $devices_by_friendly_name{$friendly_name}; 209 | 210 | printf("%-${max_name_length}s", $friendly_name); 211 | 212 | # 213 | # 14c 214 | # 215 | foreach my $file ( 216 | "/sys/block/$device/queue/max_sectors_kb", 217 | "/sys/block/$device/queue/read_ahead_kb", 218 | ) { 219 | 220 | my $value; 221 | 222 | if( ! -e $file ) { 223 | $value = 'N/A'; 224 | } else { 225 | open(FILE,"<$file") or die("Couldn't read $file"); 226 | while() { 227 | chomp; 228 | $value = $_; 229 | } 230 | } 231 | close(FILE); 232 | 233 | printf("%15s", $value); 234 | } 235 | 236 | # 237 | # 11c 238 | # 239 | foreach my $file ( 240 | "/sys/block/$device/device/queue_depth", 241 | "/sys/block/$device/queue/nr_requests", 242 | "/sys/block/$device/queue/rq_affinity", 243 | "/sys/block/$device/queue/add_random", 244 | ) { 245 | 246 | my $value; 247 | 248 | if( ! -e $file ) { 249 | $value = 'N/A'; 250 | } else { 251 | open(FILE,"<$file") or die("Couldn't read $file"); 252 | while() { 253 | chomp; 254 | $value = $_; 255 | } 256 | } 257 | close(FILE); 258 | 259 | printf("%12s", $value); 260 | } 261 | 262 | # 263 | # 7c 264 | # 265 | foreach my $file ( 266 | "/sys/block/$device/device/timeout", 267 | ) { 268 | 269 | my $value; 270 | 271 | if( ! -e $file ) { 272 | $value = 'N/A'; 273 | } else { 274 | open(FILE,"<$file") or die("Couldn't read $file"); 275 | while() { 276 | chomp; 277 | $value = $_; 278 | } 279 | } 280 | close(FILE); 281 | 282 | printf("%8s", $value); 283 | } 284 | 285 | # 286 | # Unknown, but consistent across all entries 287 | # 288 | foreach my $file ( 289 | "/sys/block/$device/queue/scheduler", 290 | ) { 291 | 292 | my $value; 293 | 294 | if( ! -e $file ) { 295 | $value = 'N/A'; 296 | } else { 297 | open(FILE,"<$file") or die("Couldn't read $file"); 298 | while() { 299 | chomp; 300 | $value = $_; 301 | } 302 | } 303 | close(FILE); 304 | 305 | my $length = length($value) + 2; 306 | printf("%${length}s", $value); 307 | } 308 | 309 | print "\n"; 310 | } 311 | 312 | 313 | ######################################################################## 314 | # 315 | # BEGIN Subroutines 316 | # 317 | 318 | sub version { 319 | print qq(\n); 320 | print qq($progname $version_string\n); 321 | print qq(\n); 322 | print qq( Part of the "gpfs_goodies" package\n); 323 | print qq(\n); 324 | 325 | return 1; 326 | } 327 | 328 | sub usage { 329 | version(); 330 | print << "EOF"; 331 | 332 | This program will summarize key block device settings that impact the 333 | performance when accessing your disk subsystems. It makes no changes to 334 | your system, and is safe to run on live production systems. 335 | 336 | Usage: $progname [OPTION...] 337 | 338 | All options can be abbreviated to minimum uniqueness. 339 | 340 | --help 341 | 342 | Show this help output. 343 | 344 | --version 345 | 346 | Yup. These are the only two options. ;-) 347 | 348 | --test 349 | 350 | Perform the test. This should be considered a safe action. 351 | 352 | 353 | Support: 354 | 355 | This software is provided as-is, with no express or implied 356 | support. However, the author would love to receive your 357 | patches. Please contact Brian E. Finley 358 | with patches and/or suggestions. 359 | 360 | 361 | EOF 362 | return 1; 363 | } 364 | 365 | sub user_is_root { 366 | 367 | if($< == 0) { 368 | return 1; 369 | } 370 | return undef; 371 | } 372 | 373 | 374 | # 375 | # END Subroutines 376 | # 377 | ######################################################################## 378 | 379 | -------------------------------------------------------------------------------- /sbin/tune_block_device_settings: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | # 4 | # "tune_block_device_settings" 5 | # 6 | # How it works: 7 | # 8 | # This command should be run from one of your GPFS cluster 9 | # connected nodes after creating your GPFS file systems. It will 10 | # detect a number of things in your GPFS cluster environment, and 11 | # make calculations based on best practices to create optimized 12 | # block device tuning settings in the form of udev rules. 13 | # 14 | # Some of the data it uses to make it's calculations include: 15 | # 16 | # - Storage controller models 17 | # - Storage controller settings, such as cache mirroring 18 | # enabled/disabled 19 | # - Number of storage servers connected to each controller 20 | # - GPFS file system block sizes 21 | # 22 | # It will create a single udev rule file per file system 23 | # (/etc/udev/rules.d/99-gpfs-FSNAME.rules), with one rule per 24 | # device and give you the option of a) deploying the rules to your 25 | # storage servers for you, and b) hot-activating the settings via 26 | # udev. 27 | # 28 | 29 | # 30 | # 2013.11.10 Brian Elliott Finley 31 | # - created 32 | # 2013.11.18 Brian Elliott Finley 33 | # - add initial default tuning settings 34 | # 2014.01.03 Brian Elliott Finley 35 | # - minor refinements after testing on Ray Paden's test cluster 36 | # 2014.08.06 Brian Elliott Finley 37 | # - handle tiered file systems, and other file systems with multiple 38 | # pools gracefully 39 | # - detect GSS file systems and exit with no action taken 40 | # - Operate in "--test" mode if "--deploy" not specified. Suggested 41 | # by Francis Dang . 42 | # 43 | 44 | use strict; 45 | use Getopt::Long; 46 | use File::Basename; 47 | 48 | my $progname = basename($0); 49 | my $version_number = '20.8.7'; 50 | my $disks_per_array_default = 8; 51 | my $udev_rules_file_base = "/etc/udev/rules.d/99-gpfs_goodies"; 52 | 53 | our $ERROR = 0; 54 | 55 | GetOptions( 56 | "help" => \my $help, 57 | "version" => \my $version, 58 | "filesystem|fsname|fs=s" => \my $fs, 59 | "deploy" => \my $deploy, 60 | "disks-per-array=s" => \my $disks_per_array, 61 | "test|test-run" => \my $test_run, 62 | "verbose|v" => \my $debug, 63 | "out-file=s" => \my $out_file, 64 | ) or usage() and exit(1); 65 | 66 | if ( defined $help ) { 67 | usage(); 68 | exit 0; 69 | } 70 | elsif ( defined $version ) { 71 | version(); 72 | exit 0; 73 | } 74 | 75 | if( defined $deploy and defined $test_run ) { 76 | usage(); 77 | print qq(\n--> Try either "--deploy" or "--test"\n\n); 78 | exit 0; 79 | } 80 | 81 | if( ! defined $fs ) { 82 | usage(); 83 | print qq(\n--> Try "--filesystem FSNAME"\n\n); 84 | exit 0; 85 | } 86 | 87 | if( ! defined $deploy and ! defined $test_run ) { 88 | $test_run = 1; 89 | print qq(Running in "--test" mode only -- no changes will be made to your system.\n); 90 | } 91 | 92 | if( ! defined $out_file ) { 93 | $out_file = get_tmp_file(); 94 | } 95 | 96 | if ( ! user_is_root() ) { 97 | usage(); 98 | print qq(\n--> Please run as root\n\n); 99 | exit 1; 100 | } 101 | 102 | unless ( -e "/usr/lpp/mmfs/bin/mmlsnsd" and -e "/usr/lpp/mmfs/bin/mmlsfs" ) { 103 | usage(); 104 | print qq(\n--> GPFS doesn't appear to be installed on this machine\n\n); 105 | exit 1; 106 | } 107 | 108 | my %servers; 109 | my $block_size; 110 | my %server_by_disk_path; 111 | my $block_allocation_type; 112 | my %disk_path_by_disk_name; 113 | my %disk_pools; 114 | my %disk_pool_by_disk_name; 115 | my %disks_per_array_by_pool; 116 | 117 | 118 | check_for_fs(); 119 | get_disk_pool_info(); 120 | process_disks_per_array_option(); 121 | get_mmlsnsd_info(); 122 | detect_gss_servers(); 123 | get_mmlsfs_info(); 124 | create_udev_rules(); 125 | report_results(); 126 | 127 | exit 0; 128 | 129 | 130 | 131 | ######################################################################## 132 | # 133 | # BEGIN Subroutines 134 | # 135 | sub process_disks_per_array_option { 136 | 137 | my $valid; 138 | 139 | # Pre-assign the default for each pool 140 | foreach my $pool (keys %disk_pools) { 141 | $disks_per_array_by_pool{$pool} = $disks_per_array_default; 142 | $valid = 'yes'; 143 | } 144 | 145 | if( defined $disks_per_array ) { 146 | 147 | # 148 | # Override the default with custom setting 149 | # 150 | if( $disks_per_array =~ m/^\d+$/ ) { 151 | 152 | foreach my $pool (keys %disk_pools) { 153 | $disks_per_array_by_pool{$pool} = $disks_per_array; 154 | $valid = 'yes'; 155 | } 156 | 157 | } elsif( $disks_per_array =~ m/:/ ) { 158 | 159 | my @arguments = split(/,/, $disks_per_array); 160 | foreach (@arguments) { 161 | 162 | my ($count, $pool) = split(/:/); 163 | 164 | # 165 | # Break out of the loop and jump to the error message unless 166 | # $count is a digit. 167 | # 168 | unless($count =~ m/^\d+$/) { 169 | $valid = 'no'; 170 | last; 171 | } 172 | 173 | $disks_per_array_by_pool{$pool} = $count; 174 | $valid = 'yes'; 175 | } 176 | } 177 | } 178 | 179 | 180 | if($debug) { 181 | foreach my $pool (sort keys %disks_per_array_by_pool) { 182 | print "\$disks_per_array_by_pool{$pool}: $disks_per_array_by_pool{$pool}\n"; 183 | } 184 | } 185 | 186 | 187 | if($valid eq 'yes') { 188 | return 1; 189 | } else { 190 | usage(); 191 | print qq(\n--> "$disks_per_array" is an invalid value for --disks-per-array.\n\n); 192 | exit 1; 193 | } 194 | } 195 | 196 | 197 | sub check_for_fs { 198 | 199 | my $cmd = "mmlsfs $fs >/dev/null 2>&1"; 200 | if( !system($cmd) ) { 201 | # success 202 | print qq(Examining file system "$fs"...\n); 203 | } else { 204 | # fail 205 | print "\n"; 206 | print qq(WARNING: I can't seem to find file system "$fs". Please make sure to:\n); 207 | print "\n"; 208 | print " - check the spelling of the file system name\n"; 209 | print qq( - run this command from a node in the GPFS cluster that owns "$fs"\n); 210 | print "\n"; 211 | 212 | exit 1; 213 | } 214 | } 215 | 216 | 217 | sub detect_gss_servers { 218 | 219 | print "> detect_gss_servers()\n" if(defined $debug); 220 | 221 | my @gss_servers; 222 | 223 | foreach my $server (sort keys %servers) { 224 | 225 | next unless($servers{$server} eq 'suspect'); 226 | 227 | my $cmd = qq(ssh $server rpm -q gpfs.gss.firmware >/dev/null 2>&1); 228 | print ">> Command: $cmd\n" if(defined $debug); 229 | if( !system($cmd) ) { 230 | push @gss_servers, $server; 231 | } 232 | } 233 | 234 | if(@gss_servers) { 235 | 236 | print qq(\n); 237 | print qq(WARNING:\n); 238 | print qq(\n); 239 | print qq( The following servers for file system "$fs" appear to be GSS servers:\n); 240 | print qq(\n); 241 | 242 | foreach my $server (@gss_servers) { 243 | print " $server\n"; 244 | } 245 | 246 | print qq(\n); 247 | print qq( This tool is not necessary for GSS file systems, so I'm exiting\n); 248 | print qq( now without making any changes.\n); 249 | print qq(\n); 250 | 251 | exit 1; 252 | } 253 | } 254 | 255 | 256 | sub get_mmlsnsd_info { 257 | 258 | print "> get_mmlsnsd_info()\n" if(defined $debug); 259 | my $cmd = "/usr/lpp/mmfs/bin/mmlsnsd -f $fs -m 2>&1"; 260 | print ">> Command: $cmd\n" if(defined $debug); 261 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input."); 262 | while() { 263 | # 264 | # Sample input: 265 | # 266 | # [root@x36n10 bef]# mmlsnsd -m 267 | # 268 | # Disk name NSD volume ID Device Node name Remarks 269 | # --------------------------------------------------------------------------------------- 270 | # nsd_0 AC100009527FEBFE /dev/mapper/dcs3860a_lun0 x36n09 server node 271 | # nsd_0 AC100009527FEBFE /dev/mapper/dcs3860a_lun0 x36n10 server node 272 | # nsd_0 AC100009527FEBFE /dev/mapper/dcs3860a_lun0 x36n11 273 | # nsd_0 AC100009527FEBFE /dev/mapper/dcs3860a_lun0 x36n12 274 | # nsd_1 AC10000A527FEC03 /dev/mapper/dcs3860b_lun1 x36n09 275 | # nsd_1 AC10000A527FEC03 /dev/mapper/dcs3860b_lun1 x36n10 server node 276 | # nsd_1 AC10000A527FEC03 /dev/mapper/dcs3860b_lun1 x36n11 server node 277 | # nsd_1 AC10000A527FEC03 /dev/mapper/dcs3860b_lun1 x36n12 278 | # nsd_10 AC10000A527FEC30 /dev/mapper/dcs3860a_lun11 x36n09 279 | # nsd_10 AC10000A527FEC30 /dev/mapper/dcs3860a_lun11 x36n10 server node 280 | # nsd_10 AC10000A527FEC30 /dev/mapper/dcs3860a_lun11 x36n11 server node 281 | # nsd_10 AC10000A527FEC30 /dev/mapper/dcs3860a_lun11 x36n12 282 | # nsd_11 AC10000B527FEC34 /dev/mapper/dcs3860b_lun10 x36n09 283 | # nsd_11 AC10000B527FEC34 /dev/mapper/dcs3860b_lun10 x36n10 284 | # nsd_11 AC10000B527FEC34 /dev/mapper/dcs3860b_lun10 x36n11 server node 285 | # nsd_11 AC10000B527FEC34 /dev/mapper/dcs3860b_lun10 x36n12 server node 286 | # nsd_12 AC10000B527FEC39 /dev/mapper/dcs3860a_lun12 x36n09 287 | # nsd_12 AC10000B527FEC39 /dev/mapper/dcs3860a_lun12 x36n10 288 | # [snip] 289 | # 290 | # [root@flashnsd1 bef]# mmlsnsd -m 291 | # Disk name NSD volume ID Device Node name Remarks 292 | # --------------------------------------------------------------------------------------- 293 | # flashctl1ab_lun0 0A46011F53BD6EC5 /dev/mapper/flashctl1ab_lun0 flashnsd1-10g server node 294 | # flashctl1ab_lun0 0A46011F53BD6EC5 /dev/mapper/flashctl1ab_lun0 flashnsd2-10g server node 295 | # flashctl1ab_lun1 0A46011F53BD6ECA /dev/mapper/flashctl1ab_lun1 flashnsd1-10g server node 296 | # flashctl1ab_lun1 0A46011F53BD6ECA /dev/mapper/flashctl1ab_lun1 flashnsd2-10g server node 297 | # flashctl1ab_lun2 0A46012053BD6ECF /dev/mapper/flashctl1ab_lun2 flashnsd1-10g server node 298 | # flashctl1ab_lun2 0A46012053BD6ECF /dev/mapper/flashctl1ab_lun2 flashnsd2-10g server node 299 | # flashctl1ab_lun3 0A46012053BD6ED4 /dev/mapper/flashctl1ab_lun3 flashnsd1-10g server node 300 | # flashctl1ab_lun3 0A46012053BD6ED4 /dev/mapper/flashctl1ab_lun3 flashnsd2-10g server node 301 | # gss1_Data_16M_2p_1 0A460501539A6825 gss1_Data_16M_2p_1 gss1-10g.cluster server node 302 | # gss1_Data_16M_2p_1 0A460501539A6825 gss1_Data_16M_2p_1 gss2-10g.cluster server node 303 | # gss1_Data_16M_2p_2 0A460501539A6838 gss1_Data_16M_2p_2 gss1-10g.cluster server node 304 | # gss1_Data_16M_2p_2 0A460501539A6838 gss1_Data_16M_2p_2 gss2-10g.cluster server node 305 | # gss1_Data_16M_2p_3 0A460501539A684B gss1_Data_16M_2p_3 gss1-10g.cluster server node 306 | # gss1_Data_16M_2p_3 0A460501539A684B gss1_Data_16M_2p_3 gss2-10g.cluster server node 307 | # gss1_Data_4M_2p_1 0A460501539A68BC gss1_Data_4M_2p_1 gss1-10g.cluster server node 308 | # gss1_Data_4M_2p_1 0A460501539A68BC gss1_Data_4M_2p_1 gss2-10g.cluster server node 309 | # gss1_Data_4M_2p_2 0A460501539A68C6 gss1_Data_4M_2p_2 gss1-10g.cluster server node 310 | # gss1_Data_4M_2p_2 0A460501539A68C6 gss1_Data_4M_2p_2 gss2-10g.cluster server node 311 | # gss1_Data_4M_2p_3 0A460501539A68D0 gss1_Data_4M_2p_3 gss1-10g.cluster server node 312 | # gss1_Data_4M_2p_3 0A460501539A68D0 gss1_Data_4M_2p_3 gss2-10g.cluster server node 313 | # [snip] 314 | # 315 | print ">> $_" if(defined $debug); 316 | if(m/^\s+(\S+)\s+\S+\s+(\S+)\s+(\S+)\s.*server node$/) { 317 | 318 | my $disk_name = $1; 319 | my $disk_path = $2; 320 | my $server = $3; 321 | 322 | # 323 | # Note that the GSS NSD devices don't start with /dev/, and therefore 324 | # will not be processed. We will silently and harmlessly skip over 325 | # such disk devices, mark those servers as suspect, and test them 326 | # later to see if they are in fact GSS nodes. -BEF- 327 | # 328 | if( $disk_path =~ m#^/dev/# ) { 329 | $disk_path_by_disk_name{$disk_name} = $disk_path; 330 | $server_by_disk_path{$disk_path} = $server; 331 | $servers{$server} = 'nsd'; 332 | } else { 333 | $servers{$server} = 'suspect'; 334 | } 335 | } 336 | } 337 | close(INPUT); 338 | 339 | return 1; 340 | } 341 | 342 | 343 | sub get_mmlsfs_info { 344 | 345 | print "> get_mmlsfs_info()\n" if(defined $debug); 346 | my $cmd = "/usr/lpp/mmfs/bin/mmlsfs $fs 2>&1"; 347 | print ">> Command: $cmd\n" if(defined $debug); 348 | open(INPUT,"$cmd|") or die("Can't run $cmd for input"); 349 | while() { 350 | # 351 | # Sample input: 352 | # 353 | # [root@x36n01 bef]# ./tune_block_device_settings 354 | # flag value description 355 | # ------------------- ------------------------ ----------------------------------- 356 | # -f 32768 Minimum fragment size in bytes 357 | # -i 512 Inode size in bytes 358 | # -I 32768 Indirect block size in bytes 359 | # -m 1 Default number of metadata replicas 360 | # -M 2 Maximum number of metadata replicas 361 | # -r 1 Default number of data replicas 362 | # -R 2 Maximum number of data replicas 363 | # -j scatter Block allocation type 364 | # -D nfs4 File locking semantics in effect 365 | # -k all ACL semantics in effect 366 | # -n 32 Estimated number of nodes that will mount file system 367 | # -B 1048576 Block size 368 | # -Q none Quotas enforced 369 | # none Default quotas enabled 370 | # --filesetdf No Fileset df enabled? 371 | # -V 13.23 (3.5.0.7) File system version 372 | # --create-time Sun Nov 10 17:20:44 2013 File system creation time 373 | # -u Yes Support for large LUNs? 374 | # -z No Is DMAPI enabled? 375 | # -L 4194304 Logfile size 376 | # -E Yes Exact mtime mount option 377 | # -S No Suppress atime mount option 378 | # -K whenpossible Strict replica allocation option 379 | # --fastea Yes Fast external attributes enabled? 380 | # --inode-limit 134217728 Maximum number of inodes 381 | # -P system Disk storage pools in file system 382 | # -d nsd_0;nsd_1;nsd_2;nsd_3;nsd_4;nsd_5;nsd_6;nsd_7;nsd_8;nsd_9;nsd_10;nsd_11;nsd_12;nsd_13;nsd_14;nsd_15;nsd_16;nsd_17;nsd_18;nsd_19;nsd_20;nsd_21;nsd_22;nsd_23;nsd_24; 383 | # -d nsd_25;nsd_26;nsd_27;nsd_28;nsd_29 Disks in file system 384 | # --perfileset-quota no Per-fileset quota enforcement 385 | # -A yes Automatic mount option 386 | # -o none Additional mount options 387 | # -T /fs_1m Default mount point 388 | # --mount-priority 0 Mount priority 389 | # 390 | print ">> $_" if(defined $debug); 391 | 392 | if(m/^\s+-B\s+(\d+)\s+/) { 393 | # -B 1048576 Block size 394 | $block_size = $1; 395 | 396 | } elsif(m/^\s+-j\s+(scatter)\s+/) { 397 | # -j scatter Block allocation type 398 | $block_allocation_type = $1; 399 | } 400 | } 401 | close(INPUT); 402 | 403 | return 1; 404 | } 405 | 406 | 407 | sub create_udev_rules { 408 | 409 | print qq(Creating fresh udev rules for file system "$fs"...\n); 410 | 411 | my $file = $out_file; 412 | open(FILE,">$file") or die("Couldn't open $file for writing"); 413 | 414 | # 415 | # Create LUN specific settings 416 | # 417 | foreach my $disk_name (keys %disk_path_by_disk_name) { 418 | 419 | my $disk_path = $disk_path_by_disk_name{$disk_name}; 420 | my $server = $server_by_disk_path{$disk_path}; 421 | 422 | my $max_sectors_kb = calculate_max_sectors_kb($block_size); 423 | my $read_ahead_kb = calculate_read_ahead_kb($block_allocation_type, $max_sectors_kb); 424 | my $nr_requests = calculate_nr_requests($disk_name); 425 | my $queue_depth = calculate_queue_depth($disk_name); 426 | my $elevator = 'noop'; 427 | 428 | my $scsi_id = get_scsi_id( $server, $disk_path ); 429 | if(defined $scsi_id) { 430 | 431 | print FILE qq(#\n); 432 | print FILE qq(# NSD: $disk_name PATH: $disk_path\n); 433 | print FILE qq(#\n); 434 | print FILE qq(# Individual Devices\n); 435 | print FILE q(SUBSYSTEM=="block", SUBSYSTEMS=="scsi", PROGRAM=="/lib/udev/scsi_id -g -u -d /dev/%k", RESULT==") . $scsi_id . q(", RUN+="/bin/sh -c '); 436 | print FILE q(/bin/echo ) . $max_sectors_kb . q( > /sys/block/%k/queue/max_sectors_kb; ); 437 | print FILE q(/bin/echo ) . $read_ahead_kb . q( > /sys/block/%k/queue/read_ahead_kb; ); 438 | print FILE q(/bin/echo ) . $nr_requests . q( > /sys/block/%k/queue/nr_requests; ); 439 | print FILE q(/bin/echo ) . $queue_depth . q( > /sys/block/%k/device/queue_depth; ); 440 | print FILE q(/bin/echo ) . $elevator . q( > /sys/block/%k/queue/scheduler; ); 441 | print FILE qq('"\n); 442 | print FILE qq(#\n); 443 | print FILE qq(# Multipathed device\n); 444 | print FILE q(SUBSYSTEM=="block", KERNEL=="dm-*", PROGRAM=="/lib/udev/scsi_id -g -u -d /dev/%k", RESULT==") . $scsi_id . q(", RUN+="/bin/sh -c '); 445 | print FILE q(/bin/echo ) . $max_sectors_kb . q( > /sys/block/%k/queue/max_sectors_kb; ); 446 | print FILE q(/bin/echo ) . $read_ahead_kb . q( > /sys/block/%k/queue/read_ahead_kb; ); 447 | print FILE q(/bin/echo ) . $nr_requests . q( > /sys/block/%k/queue/nr_requests; ); 448 | print FILE q(/bin/echo ) . $queue_depth . q( > /sys/block/%k/device/queue_depth; ); 449 | print FILE q(/bin/echo ) . $elevator . q( > /sys/block/%k/queue/scheduler; ); 450 | print FILE qq('"\n\n); 451 | 452 | } else { 453 | print "$disk_name -- Couldn't retrieve the scsi_id from $server:$disk_path.\n"; 454 | $ERROR++; 455 | } 456 | } 457 | 458 | # 459 | # Augment with device specific "always appropriate" settings 460 | # 461 | print FILE << 'EOF'; 462 | # --------------------------------------------------------------------------- 463 | # The following is excerpted from "Implementing IBM FlashSystem 840" Redbook. 464 | # --------------------------------------------------------------------------- 465 | # 466 | # Linux tuning 467 | # 468 | # The Linux kernel buffer file system writes data before it sends the data to the storage system. 469 | # With the FlashSystem 840, better performance can be achieved when the data is not buffered 470 | # but directly sent to the FlashSystem 840. When setting the scheduling policy to no operation 471 | # (NOOP), the fewest CPU instructions possible are used for each I/O. Setting the scheduler to 472 | # NOOP gives the best write performance on Linux systems. You can use the following setting 473 | # in most Linux distributions as a boot parameter: elevator=noop. 474 | # 475 | # Current Linux devices are managed by the device manager Udev. You can define how Udev 476 | # will manage devices by adding rules to the /etc/udev/rules.d directory. Example 5-24 477 | # shows a rule for the FlashSystem 840. 478 | # 479 | # Example 5-24 Linux device rules 480 | # 481 | # [root@flashnsd1 ~]# cat 99-IBM-FlashSystem.rules 482 | # 483 | ACTION=="add|change", SUBSYSTEM=="block", ATTRS{device/model}=="FlashSystem", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0", ATTR{device/timeout}="5" 484 | ACTION=="add|change", KERNEL=="dm-*", PROGRAM="/bin/bash -c 'cat /sys/block/$name/slaves/*/device/model | grep FlashSystem'", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0" 485 | # 486 | ACTION=="add|change", SUBSYSTEM=="block", ATTRS{device/model}=="FlashSystem-9840", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0", ATTR{device/timeout}="10" 487 | ACTION=="add|change", KERNEL=="dm-*", PROGRAM="/bin/bash -c 'cat /sys/block/$name/slaves/*/device/model | grep FlashSystem-9840'", ATTR{queue/scheduler}="noop", ATTR{queue/rq_affinity}="2", ATTR{queue/add_random}="0" 488 | EOF 489 | 490 | close(FILE); 491 | 492 | return 1; 493 | } 494 | 495 | sub report_results { 496 | 497 | if($ERROR == 0) { 498 | 499 | print "Your new rules can be found here:\n\n $out_file\n\n"; 500 | 501 | if(defined $deploy) { 502 | distribute_rule($out_file); 503 | udev_reread(); 504 | } else { 505 | print "\n"; 506 | print "Not deploying. You might want to try --deploy.\n"; 507 | print "\n"; 508 | print "See $progname --help for details.\n"; 509 | print "\n"; 510 | } 511 | 512 | } else { 513 | print "\n"; 514 | print " ERROR: Please make sure that all devices, and ideally all\n"; 515 | print " NSD servers are fully operational, then try again.\n"; 516 | print "\n"; 517 | print " Try 'mmlsnsd -m' to verify.\n"; 518 | print "\n"; 519 | exit 1; 520 | } 521 | 522 | return 1; 523 | } 524 | 525 | 526 | sub get_tmp_file { 527 | 528 | my $cmd = "mktemp /tmp/$progname.99-gpfs-${fs}.rules.XXX"; 529 | 530 | my $file; 531 | 532 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input"); 533 | while() { 534 | if( m#^(/tmp/$progname.*)# ) { 535 | $file = $1; 536 | } 537 | } 538 | close(INPUT); 539 | 540 | print ">> get_tmp_file => $file\n" if(defined $debug); 541 | return $file; 542 | } 543 | 544 | sub distribute_rule { 545 | 546 | my $file = shift; 547 | 548 | my $udev_rules_file = $udev_rules_file_base . "-${fs}.rules"; 549 | 550 | print "Deploying as $udev_rules_file to:\n"; 551 | foreach my $server (sort keys %servers) { 552 | print " $server\n"; 553 | my $cmd = "scp -q $file $server:$udev_rules_file"; 554 | print ">> Command: $cmd\n" if(defined $debug); 555 | !system($cmd) or die("FAILED: $cmd\n"); 556 | } 557 | print "\n"; 558 | print "Done! Your file system has been tuned and is ready for action.\n"; 559 | print qq(Perhaps try "test_block_device_settings" on each of your NSD servers\n); 560 | print "\n"; 561 | 562 | return 1; 563 | } 564 | 565 | sub udev_reread { 566 | 567 | # 568 | # RHEL6 and friends, Ubuntu 12.10 and later: 569 | # udevadm trigger --verbose --subsystem-match=block 570 | # which udevadm >/dev/null 2>&1 && udevadm trigger --verbose --subsystem-match=block 571 | # 572 | # RHEL5 and friends: 573 | # udevcontrol reload_rules 574 | # 575 | foreach my $server (sort keys %servers) { 576 | my $cmd = qq(ssh $server 'which udevadm >/dev/null 2>&1 && udevadm trigger --subsystem-match=block || udevcontrol reload_rules'); 577 | print ">> Command: $cmd\n" if(defined $debug); 578 | !system($cmd) or die("FAILED: $cmd\n"); 579 | } 580 | 581 | return 1; 582 | } 583 | 584 | sub get_scsi_id { 585 | 586 | my $server = shift; 587 | my $disk_path = shift; 588 | my $scsi_id; 589 | 590 | print "> get_scsi_id()\n" if(defined $debug); 591 | my $cmd = "ssh $server /lib/udev/scsi_id -g -u -d $disk_path"; 592 | print ">> Command: $cmd\n" if(defined $debug); 593 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input."); 594 | while() { 595 | # 596 | # Example Input -- this is what we expect the response to normally 597 | # look like, but as long as it's a unique string, we don't really 598 | # care. 599 | # 600 | # DCS3700 style: 601 | # 602 | # 360080e50002934b80000403b5283dbe6 603 | # 360080e50002937b800003f6b5283dbf5 604 | # 360080e50002937b800003f8f5283dc3f 605 | # 606 | # FlashSystems 820 style: 607 | # 608 | # 20020c24001146a2c 609 | # 20020c24000146a2c 610 | # 20020c24002146a2c 611 | # 612 | if( m/^(\S+)/ ) { 613 | $scsi_id = $1; 614 | last; 615 | } 616 | } 617 | close(INPUT); 618 | 619 | if(defined $scsi_id) { 620 | return $scsi_id; 621 | } else { 622 | return undef; 623 | } 624 | } 625 | 626 | 627 | sub calculate_read_ahead_kb { 628 | 629 | my $block_allocation_type = shift; 630 | my $max_sectors_kb = shift; 631 | 632 | # 633 | # General best practice: 634 | # 635 | # If block allocation type is 'scatter', then set to zero. If 636 | # it's 'cluster', then set to /* XXX ask Ray for method to determine this */ 637 | # 638 | # -Tuning input from: 639 | # Sven Oehme 640 | # Ray Paden 641 | # 642 | if( "$block_allocation_type" eq 'scatter' ) { 643 | return 0; 644 | } else { 645 | # Hmm. Must be type 'cluster' 646 | #XXX Ray -- what formula should we use here? Using $max_sectors_kb for now... -BEF- 647 | return $max_sectors_kb; 648 | } 649 | } 650 | 651 | 652 | sub calculate_max_sectors_kb { 653 | 654 | my $block_size = shift; 655 | 656 | # 657 | # GPFS block size / 4 or 512, whichever is greater. 658 | # 659 | # max_sectors_kb should always be set as high as you can, 660 | # independent of the block size. However, empirical testing 661 | # indicates lower performance, and in rare cases SCSI underruns, 662 | # if this is set too high with certain hardware. 663 | # 664 | # Therefore, we start with the conservative setting below, that 665 | # should still be an improvement over the out of the box setup. 666 | # 667 | # -Tuning input from: 668 | # Sven Oehme 669 | # Ray Paden 670 | # 671 | # Note: GPFS block size is in bytes, so we must divide by 1024 to 672 | # get kilobytes. Then we divide by 4 to get our max_sectors_kb 673 | # value. Thanks go to Christian Caruthers for his input on this 674 | # -BEF-. 675 | # 676 | my $kb = $block_size / 1024 / 4; 677 | 678 | if( $kb < 512 ) { 679 | $kb = 512; 680 | } 681 | 682 | return $kb; 683 | } 684 | 685 | 686 | sub get_disk_pool_info { 687 | 688 | print "> get_disk_pool_info()\n" if(defined $debug); 689 | my $cmd = "/usr/lpp/mmfs/bin/mmlsdisk $fs 2>&1"; 690 | print ">> Command: $cmd\n" if(defined $debug); 691 | open(INPUT,"$cmd|") or die("Couldn't run $cmd for input."); 692 | while() { 693 | # 694 | # [root@flashnsd1 ~]# mmlsdisk tiered 695 | # disk driver sector failure holds holds storage 696 | # name type size group metadata data status availability pool 697 | # ------------ -------- ------ ----------- -------- ----- ------------- ------------ ------------ 698 | # flashctl1ab_lun0 nsd 512 -1 Yes Yes ready up system 699 | # flashctl1ab_lun1 nsd 512 -1 Yes Yes ready up system 700 | # flashctl1ab_lun2 nsd 512 -1 Yes Yes ready up system 701 | # flashctl1ab_lun3 nsd 512 -1 Yes Yes ready up system 702 | # sur_dcs3700a_lun0 nsd 512 121 No Yes ready up Tier2 703 | # sur_dcs3700b_lun1 nsd 512 121 No Yes ready up Tier2 704 | # sur_dcs3700a_lun2 nsd 512 121 No Yes ready up Tier2 705 | # sur_dcs3700b_lun3 nsd 512 121 No Yes ready up Tier2 706 | # sur_dcs3700a_lun4 nsd 512 121 No Yes ready up Tier2 707 | # sur_dcs3700b_lun5 nsd 512 121 No Yes ready up Tier2 708 | # sur_dcs3700a_lun6 nsd 512 121 No Yes ready up Tier2 709 | # sur_dcs3700b_lun7 nsd 512 121 No Yes ready up Tier2 710 | # sur_dcs3700a_lun8 nsd 512 121 No Yes ready up Tier2 711 | # sur_dcs3700b_lun9 nsd 512 121 No Yes ready up Tier2 712 | # sur_dcs3700a_lun10 nsd 512 121 No Yes ready up Tier2 713 | # sur_dcs3700b_lun11 nsd 512 121 No Yes ready up Tier2 714 | # sur_dcs3700a_lun12 nsd 512 121 No Yes ready up Tier2 715 | # sur_dcs3700b_lun13 nsd 512 121 No Yes ready up Tier2 716 | # sur_dcs3700a_lun14 nsd 512 121 No Yes ready up Tier2 717 | # sur_dcs3700b_lun15 nsd 512 121 No Yes ready up Tier2 718 | # sur_dcs3700a_lun16 nsd 512 121 No Yes ready up Tier2 719 | # sur_dcs3700b_lun17 nsd 512 121 No Yes ready up Tier2 720 | # sur_dcs3700a_lun18 nsd 512 121 No Yes ready up Tier2 721 | # sur_dcs3700b_lun19 nsd 512 121 No Yes ready up Tier2 722 | # sur_dcs3700a_lun20 nsd 512 121 No Yes ready up Tier2 723 | # sur_dcs3700b_lun21 nsd 512 121 No Yes ready up Tier2 724 | # sur_dcs3700a_lun22 nsd 512 121 No Yes ready up Tier2 725 | # sur_dcs3700b_lun23 nsd 512 121 No Yes ready up Tier2 726 | # sur_dcs3700a_lun24 nsd 512 121 No Yes ready up Tier2 727 | # sur_dcs3700b_lun25 nsd 512 121 No Yes ready up Tier2 728 | # sur_dcs3700a_lun26 nsd 512 121 No Yes ready up Tier2 729 | # sur_dcs3700b_lun27 nsd 512 121 No Yes ready up Tier2 730 | # sur_dcs3700a_lun28 nsd 512 121 No Yes ready up Tier2 731 | # sur_dcs3700b_lun29 nsd 512 121 No Yes ready up Tier2 732 | # sur_dcs3700a_lun30 nsd 512 121 No Yes ready up Tier2 733 | # sur_dcs3700b_lun31 nsd 512 121 No Yes ready up Tier2 734 | # sur_dcs3700a_lun32 nsd 512 121 No Yes ready up Tier2 735 | # sur_dcs3700b_lun33 nsd 512 121 No Yes ready up Tier2 736 | # sur_dcs3700a_lun34 nsd 512 121 No Yes ready up Tier2 737 | # sur_dcs3700b_lun35 nsd 512 121 No Yes ready up Tier2 738 | 739 | print ">> $_" if(defined $debug); 740 | 741 | next if(m/^(disk|name|----+)\s/); 742 | # 743 | # disk driver sector failure holds holds storage 744 | # name type size group metadata data status availability pool 745 | # ------------ -------- ------ ----------- -------- ----- ------------- ------------ ------------ 746 | # 747 | 748 | if(m/^(\S+)\s+.*\s+(\S+)/) { 749 | # ^^^ ^^^ 750 | # | | 751 | # | ----< pool 752 | # ----< disk_name 753 | 754 | my $disk_name = $1; 755 | my $pool = $2; 756 | 757 | $disk_pool_by_disk_name{$disk_name} = $pool; 758 | $disk_pools{$pool} = 1; 759 | } 760 | } 761 | 762 | return 1; 763 | } 764 | 765 | 766 | sub calculate_nr_requests { 767 | 768 | my $disk_name = shift; 769 | 770 | # 771 | # nr_requests can be higher than queue_depth without much negative 772 | # impact as the device queue determines how much is actually queued 773 | # on the device. 774 | # 775 | # queue_depth is what can really hurt drive performance if it's set 776 | # too high. 777 | # 778 | # So, 32 per physical disk is a good starting point for nr_requests 779 | # and a queue depth of 4 per physical disk. 780 | # 781 | # -Tuning input from: 782 | # Sven Oehme 783 | # 784 | 785 | my $pool = $disk_pool_by_disk_name{$disk_name}; 786 | my $nr_requests = $disks_per_array_by_pool{$pool} * 32; 787 | 788 | return $nr_requests; 789 | } 790 | 791 | 792 | sub calculate_queue_depth { 793 | 794 | my $disk_name = shift; 795 | 796 | # 797 | # Let's take an 8+2p array (8 data disks, 2 parity disks, AKA RAID6) 798 | # as an example: 799 | # 800 | # The most accurate value for _writes_ would be based on 8 disks. 801 | # I/O on the other 2 disks is "generated parity" by the array, and 802 | # need not be considered by the OS. 803 | # 804 | # The most accurate value for _reads_ would be based on 10 disks. 805 | # Reads may pull data from across all 10 disks. 806 | # 807 | # Therefore, we recommend the conservative approach for this tool, 808 | # which would be 8. 809 | # 810 | # -Tuning input from: 811 | # Sven Oehme 812 | # 813 | 814 | my $pool = $disk_pool_by_disk_name{$disk_name}; 815 | my $queue_depth = $disks_per_array_by_pool{$pool} * 4; 816 | 817 | return $queue_depth; 818 | } 819 | 820 | 821 | sub version { 822 | print qq(\n); 823 | print qq($progname v$version_number\n); 824 | print qq(\n); 825 | print qq( Part of the "gpfs_goodies" package\n); 826 | print qq(\n); 827 | 828 | return 1; 829 | } 830 | 831 | sub usage { 832 | 833 | my $udev_rules_file = $udev_rules_file_base . "-FSNAME.rules"; 834 | 835 | version(); 836 | print << "EOF"; 837 | Usage: $progname --filesystem FSNAME [OPTION...] 838 | 839 | $progname should be considered BETA code at this point. 840 | 841 | All options can be abbreviated to minimum uniqueness. 842 | 843 | This program will examine your environment including GPFS, storage 844 | servers, disk subsystems, and LUNs, to calculate best practice block 845 | device tuning settings. It will create one udev rules file per file 846 | system with the new tuning settings, using one entry for each LUN, 847 | and optionally deploy it to each participating storage server. 848 | 849 | This tool is intended to be run with GPFS 'active' on all the NSD 850 | servers serving your specified file system. 851 | 852 | Note that it will skip over and ignore any GSS file systems. GSS block 853 | device settings are tuned directly by the GSS software stack. 854 | 855 | --help 856 | 857 | --version 858 | 859 | --filesystem FSNAME 860 | 861 | Where FSNAME is the name of the file system whose block devices 862 | should be tuned. 863 | 864 | 865 | --disks-per-array (NN|NN:POOLNAME,[...]) 866 | 867 | Where NN is the number of disks in each array. 868 | 869 | Often, each LUN presented to the OS represents one RAID array in 870 | the storage subsystem. Here are some examples by array type: 871 | 872 | Value for NN Array Type 873 | ------------ -------------------------------------------- 874 | 8 8+2p RAID6 (8 data + 2 parity disks) 875 | 8 8+3p Reed Solomon (8 data + 3 parity disks) 876 | 4 4+1p RAID5 (4 data + 1 parity disk) 877 | 1 1+1m RAID1 (1 data + 1 mirrored disk) 878 | 879 | Hint #1: If all of your disks are in the same kind of array 880 | (e.g.: RAID6 with 8 data disks + 2 parity disks), then you can 881 | simply use the "NN" format, even if you have multiple file 882 | systems and multiple pools. 883 | 884 | Hint #2: If you don't know what all this "pool" stuff is about, 885 | then you probably only have one pool (the "system" pool). Try 886 | "mmlspool FSNAME" to have a look if you're curious. 887 | 888 | NN - If only "NN" is specified, then it is assumed that NN 889 | represents the number of disks per array across all file systems 890 | and living on all pools served by "--servers" (or you only have 891 | one pool per file system). 892 | 893 | NN:POOLNAME - The number of disks (NN) per array across all 894 | arrays in pool POOLNAME that are part of file system FSNAME. 895 | 896 | Examples: 897 | 898 | --disks-per-array 8 899 | --disks-per-array 4:system,8:tier2 900 | 901 | Default: $disks_per_array_default 902 | 903 | 904 | --test 905 | 906 | Create the rules, but don't deploy them. This is the default 907 | action if --deploy is not specified. 908 | 909 | 910 | --deploy 911 | 912 | Deploy and activate the resultant udev rules file to each 913 | participating storage server. Participating storage servers are 914 | identified by their role as an NSD server for any of the LUNs in 915 | active file systems. Execute the command "mmlsnsd" for a list 916 | of these servers. 917 | 918 | The name of the udev rules file on the target NSD servers will 919 | be: $udev_rules_file 920 | 921 | 922 | --out-file FILE 923 | 924 | The name of your resultant udev rules file. This file can be 925 | given any name you like. 926 | 927 | If you also use the --deploy option, this file will still be 928 | deployed to your storage servers with the name of: 929 | 930 | $udev_rules_file 931 | 932 | Example: --out-file /tmp/my_shiny_new_udev_rules_file.conf 933 | 934 | Default: I'll choose one for you and tell you what I've named 935 | it. 936 | 937 | 938 | Support: 939 | 940 | This software is provided as-is, with no express or implied 941 | support. However, the author would love to receive your 942 | patches. Please contact Brian E. Finley 943 | with patches and/or suggestions. 944 | 945 | EOF 946 | return 1; 947 | } 948 | 949 | sub user_is_root { 950 | 951 | if($< == 0) { 952 | return 1; 953 | } 954 | return undef; 955 | } 956 | 957 | 958 | # 959 | # END Subroutines 960 | # 961 | ######################################################################## 962 | 963 | # 964 | # :set tw=0 ts=4 ai et 965 | # 966 | -------------------------------------------------------------------------------- /var/mmfs/etc/nsddevices: -------------------------------------------------------------------------------- 1 | #!/bin/ksh 2 | 3 | # 4 | # 2012.10.27 Brian Finley . 5 | # - Modified from the stock example provided with the GPFS distribution 6 | # 7 | 8 | # 9 | # nsddevices file 10 | # 11 | # Used to tell GPFS to use our fresh multipath devices instead of the 12 | # /dev/dm-* devices. 13 | # 14 | # Key benefits: 15 | # 16 | # - Allows you to match your NSD names directly to your multipath 17 | # device names, which is quite handy for troubleshooting a running 18 | # filesystem. 19 | # 20 | # - Dramatically simplifies the "mmcrnsd" process. 21 | # 22 | # Use of this file can eliminate much confusion at mmcrnsd time. 23 | # With /dev/dm-* devices, and multiple NSD servers that can both see 24 | # the same LUNs, each NSD server may end up with a different 25 | # /dev/dm-* name for the same LUN (Ie.: lun1 is /dev/dm-7 on server1 26 | # and /dev/dm-12 on server2). 27 | # 28 | # As the mmcrnsd command performs it's LUN initialization from the 29 | # primary NSD server indicated, and one wants to balance shared LUNs 30 | # across connected NSD servers, this can cause significant confusion 31 | # at best. At worst, it can lead to human error resulting in the 32 | # initialization of an NSD server's local disk (Ie.: /dev/dm-7 is 33 | # actually /dev/sda on server2). It can also lead to the mistaken 34 | # belief that LUNs are balanced across NSD servers, when in fact, 35 | # they're not. ;-) 36 | # 37 | # Use of this file is not a requirement, but provides significant 38 | # benefit by addressing these issues. 39 | # 40 | 41 | 42 | ############################################################################## 43 | # 44 | # When properly installed, this script is invoked by the 45 | # /usr/lpp/mmfs/bin/mmdevdiscover script. 46 | # 47 | # INSTALLATION GUIDELINES FOR THIS SCRIPT: 48 | # 49 | # a) Use /usr/lpp/mmfs/samples/nsddevices.sample as a configuration guideline 50 | # b) Copy this script to /var/mmfs/etc/nsddevices on each NSD server 51 | # c) Make it executable: "chmod +x /var/mmfs/etc/nsddevices" 52 | # 53 | ############################################################################## 54 | 55 | CONTROLLER_REGEX='[ab]_lun[0-9]+' 56 | for dev in $( /bin/ls /dev/mapper | egrep $CONTROLLER_REGEX ) 57 | do 58 | # 59 | # dmm vs. generic is used by GPFS to prioritize internal order of 60 | # searching through available disks, then later GPFS discards other 61 | # disk device names that it finds that match as the same NSD device 62 | # by a different path. For this reason, dmm vs. generic is an 63 | # important distinction if you are not explicitly producing the 64 | # entire and exclusive set of disks that GPFS should use, as output 65 | # from this script (nsddevices) _and_ exiting this script with a 66 | # "return 0". -Brian Finley 67 | # 68 | #echo mapper/$dev dmm 69 | echo mapper/$dev generic 70 | done 71 | 72 | # Bypass the GPFS disk discovery (/usr/lpp/mmfs/bin/mmdevdiscover), 73 | return 0 74 | 75 | # If you wanted to continue with the GPFS disk discovery steps, you 76 | # could uncomment this next line (and comment out the one above). But 77 | # you probably don't want to do that... 78 | # 79 | #return 1 80 | 81 | --------------------------------------------------------------------------------