├── README.asciidoc
├── bin
    └── vimregextutor
├── plugin
    └── vimregextutor.vim
├── tutor
    ├── regextutor
    ├── regextutor.utf-8
    └── regextutor.vim
└── vimgor


/README.asciidoc:
--------------------------------------------------------------------------------
 1 | == VimRegexTutor
 2 | 
 3 | __A "hands on" regular expression tutorial for users of the Vim editor.__
 4 | 
 5 | TIP: If you like VimRegexTutor and want to share the W00t!, I'm grateful for
 6 | https://www.gittip.com/bairuidahu/[tips] or
 7 | http://of-vim-and-vigor.blogspot.com/[beverages].
 8 | 
 9 | [horizontal]
10 | **Project Page** :: https://github.com/dahu/VimRegexTutor
11 | **Maintainer**   :: Barry Arthur <barry.arthur@gmail.com>
12 | **Status**       :: Beta, feedback welcome
13 | **Adapted From** :: http://www.codeproject.com/Articles/9099/The-30-Minute-Regex-Tutorial
14 | 
15 | Most new users can get through it in less than one hour. The result is that you
16 | can use simple regular expressions using the Vim editor.
17 | 
18 | === `:VimRegexTutor`
19 | 
20 | The `:VimRegextutor` command will open a new tab containing an editable copy of
21 | the tutorial. Have at it!
22 | 
23 | === Old School
24 | 
25 | `regextutor` is a file that contains the tutorial lessons. You can simply
26 | execute `vim regextutor` and then follow the instructions in the lessons.  The
27 | lessons tell you to modify the file, so *DON'T DO THIS ON YOUR ORIGINAL COPY*.
28 | 
29 | On Unix you can also use the `vimregextutor` program.  It will make a scratch
30 | copy of the tutor first. Currently, this program needs to be manually installed
31 | into a directory in your PATH. Assuming you used pathogen to install
32 | VimRegexTutor, you could do something like:
33 | 
34 |     cd
35 |     mkdir -p bin
36 |     ln -s $HOME/.vim/bundle/VimRegexTutor/bin/vimregextutor $HOME/bin/vimregextutor
37 | 
38 | **NOTE:** This is a once-off setup and won't need to be repeated after
39 | upgrading VimRegexTutor.
40 | 


--------------------------------------------------------------------------------
/bin/vimregextutor:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | 
 3 | # Start Vim on a copy of the regextutor file.
 4 | 
 5 | # Usage: vimregextutor [-g] [xx]
 6 | # Where optional argument -g starts vimtutor in gvim (GUI) instead of vim.
 7 | # and xx is a language code like "es" or "nl".
 8 | # When an argument is given, it tries loading that tutor.
 9 | # When this fails or no argument was given, it tries using 'v:lang'
10 | # When that also fails, it uses the English version.
11 | 
12 | # Vim could be called "vim" or "vi".  Also check for "vimN", for people who
13 | # have Vim installed with its version number.
14 | # We anticipate up to a future Vim 8 version :-).
15 | seq="vim vim8 vim75 vim74 vim73 vim72 vim71 vim70 vim7 vim6 vi"
16 | if test "$1" = "-g"; then 
17 |   # Try to use the GUI version of Vim if possible, it will fall back
18 |   # on Vim if Gvim is not installed.
19 |   seq="gvim gvim8 gvim75 gvim74 gvim73 gvim72 gvim71 gvim70 gvim7 gvim6 $seq"
20 |   shift
21 | fi
22 | 
23 | xx=$1
24 | export xx
25 | 
26 | # We need a temp file for the copy.  First try using a standard command.
27 | tmp="${TMPDIR-/tmp}"
28 | REGEXTUTORCOPY=`mktemp $tmp/regextutorXXXXXX || tempfile -p regextutor || echo none`
29 | 
30 | # If the standard commands failed then create a directory to put the copy in.
31 | # That is a secure way to make a temp file.
32 | if test "$REGEXTUTORCOPY" = none; then
33 | 	tmpdir=$tmp/vimregextutor$$
34 | 	OLD_UMASK=`umask`
35 | 	umask 077
36 | 	getout=no
37 | 	mkdir $tmpdir || getout=yes
38 | 	umask $OLD_UMASK
39 | 	if test $getout = yes; then
40 | 		echo "Could not create directory for regextutor copy, exiting."
41 | 		exit 1
42 | 	fi
43 | 	REGEXTUTORCOPY=$tmpdir/tutorcopy
44 | 	touch $REGEXTUTORCOPY
45 | 	TODELETE=$tmpdir
46 | else
47 | 	TODELETE=$REGEXTUTORCOPY
48 | fi
49 | 
50 | export REGEXTUTORCOPY
51 | 
52 | # remove the copy of the tutor on exit
53 | trap "rm -rf $TODELETE" 0 1 2 3 9 11 13 15
54 | 
55 | for i in $seq; do
56 | 	testvim=`which $i 2>/dev/null`
57 | 	if test -f "$testvim"; then
58 | 		VIM=$i
59 | 		break
60 | 	fi
61 | done
62 | 
63 | # When no Vim version was found fall back to "vim", you'll get an error message
64 | # below.
65 | if test -z "$VIM"; then
66 | 	VIM=vim
67 | fi
68 | 
69 | # Use Vim to copy the tutor, it knows the value of $VIMRUNTIME
70 | # The script regextutor.vim tells Vim which file to copy
71 | $VIM -f -c 'ru tutor/regextutor.vim'
72 | 
73 | # Start vim
74 | $VIM -f $REGEXTUTORCOPY
75 | 


--------------------------------------------------------------------------------
/plugin/vimregextutor.vim:
--------------------------------------------------------------------------------
 1 | " Vim global plugin for learning and practicing Vim style regular expressions
 2 | " Maintainer:	Barry Arthur <barry.arthur@gmail.com>
 3 | " Version:	0.1
 4 | " License:	Vim License (see :help license)
 5 | " Location:	plugin/vimregextutor.vim
 6 | " Website:	https://github.com/dahu/vimregextutor
 7 | "
 8 | " See vimregextutor.txt for help.  This can be accessed by doing:
 9 | "
10 | " :helptags ~/.vim/doc
11 | " :help vimregextutor
12 | 
13 | " Vimscript Setup: {{{1
14 | " Allow use of line continuation.
15 | let s:save_cpo = &cpo
16 | set cpo&vim
17 | 
18 | "if exists("g:loaded_vimregextutor")
19 | "      \ || v:version < 700
20 | "      \ || v:version == 703 && !has('patch338')
21 | "      \ || &compatible
22 | "  let &cpo = s:save_cpo
23 | "  finish
24 | "endif
25 | let g:loaded_vimregextutor = 1
26 | 
27 | let s:script_file = expand('<sfile>:p:h:h')
28 | 
29 | " Public Interface: {{{1
30 | function! VimRegexTutor()
31 |   tabnew
32 |   setlocal buftype=nofile
33 |   setlocal bufhidden=hide
34 |   setlocal noswapfile
35 |   call setline(1, readfile(s:script_file . '/tutor/regextutor'))
36 |   1
37 | endfunction
38 | 
39 | " Commands: {{{1
40 | command! -nargs=0 -bar VimRegexTutor call VimRegexTutor()
41 | command! -nargs=0 -bar RegexTutor    call VimRegextutor()
42 | 
43 | " Teardown: {{{1
44 | " reset &cpo back to users setting
45 | let &cpo = s:save_cpo
46 | 
47 | " Template From: https://github.com/dahu/Area-41/
48 | " vim: set sw=2 sts=2 et fdm=marker:
49 | 


--------------------------------------------------------------------------------
/tutor/regextutor:
--------------------------------------------------------------------------------
   1 | ===============================================================================
   2 | = W e l c o m e   t o   t h e   V I M   R e g e x   T u t o r  -  Version 0.2 =
   3 | ===============================================================================
   4 | 
   5 |   Vim is a very powerful editor with a very powerful search and replace
   6 |   system based on Regular Expressions. This tutor is designed to describe
   7 |   enough of those features that you will be able to more powerfully use
   8 |   Vim as an all-purpose editor.
   9 | 
  10 |   The approximate time required to complete the tutor is 30 minutes,
  11 |   depending upon how much time is spent with experimentation.
  12 | 
  13 |   ATTENTION:~
  14 |   The commands in the lessons will modify the text.  Make a copy of this
  15 |   file to practise on (if you started "vimregextutor" this is already a
  16 |   copy).
  17 | 
  18 |   It is important to remember that this tutor is set up to teach by use.
  19 |   That means that you need to execute the commands to learn them
  20 |   properly.  If you only read the text, you will forget the commands!
  21 | 
  22 |   If you haven't already completed the vimtutor, it is highly recommended
  23 |   that you do so first before attempting this tutorial.
  24 | 
  25 |   Occasional references to the Vim documentation are made throughout this
  26 |   tutorial. Such entries look like this: |'ignorecase'|. To open those
  27 |   entries, type (in normal mode - so press <ESCAPE> to leave insert mode
  28 |   first)  :help  followed by the exact entry. In this case, you would
  29 |   type:  :help 'ignorecase'
  30 | 
  31 |   What the Heck is a Regular Expression Anyway?~
  32 | 
  33 |   I'm sure you are familiar with the use of "wildcard" characters for
  34 |   pattern matching. For example, if you want to find all the text files
  35 |   in a directory, you search for "*.txt", knowing that the asterisk is
  36 |   interpreted as a wildcard that can match any sequence of characters.
  37 |   Regular expressions are just an elaborate extension of this capability.
  38 | 
  39 |   When manipulating text, it is frequently necessary to locate strings
  40 |   that match complex patterns. Regular expressions were invented to
  41 |   describe such patterns. Thus, a regular expression is just a shorthand
  42 |   code for a pattern. For example, the pattern  \w\+  is a concise way to
  43 |   say "match any non-null strings of alphanumeric characters". Vim
  44 |   provides a rich and powerful regular expression vocabulary with which
  45 |   you can readily and efficiently search and replace text.
  46 | 
  47 |   A good way to learn the arcane syntax of regular expressions is by
  48 |   starting with examples and then experimenting with your own creations.
  49 |   This tutorial introduces the basics of regular expressions, giving many
  50 |   common examples. The additional Vim plugin, regexcoach, can be used to
  51 |   try out the examples and to experiment with your own regular
  52 |   expressions.
  53 | 
  54 |   NOTE: All of the exercises in this tutorial will use plain (non-|magic|-al)
  55 |         regular expressions. Frequently a very-magic  (using the |\v| option)
  56 |         equivalent is shown alongside the original. The purpose of magic and
  57 |         the \v option is explained in Lesson 2.3.
  58 | 
  59 |   Let's get started!
  60 | 
  61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  62 |   Lesson 1.1: Searching for Elvis~
  63 | 
  64 |   Suppose you spend all your free time scanning documents looking for evidence
  65 |   that Elvis is still alive. You could search with the following regular
  66 |   expression:
  67 | 
  68 |   =========================================================================
  69 |   *1* : Find  elvis >
  70 | 
  71 |     /elvis
  72 | < =========================================================================
  73 | 
  74 |   This is a perfectly valid regular expression that searches for an exact
  75 |   sequence of characters. In Vim, you can set the |'ignorecase'| option
  76 |   to ignore the case of characters, so this expression will match
  77 |   "Elvis", "ELVIS", or "eLvIs". Unfortunately, it will also match the
  78 |   last five letters of the word "pelvis".
  79 | 
  80 |   Your Turn~
  81 | 
  82 |   1. Make sure you have search highlighting enabled:  :set hlsearch
  83 | 
  84 |   2. Go to the line below marked --->
  85 | 
  86 |   3. type  /elvis  <ENTER>
  87 | 
  88 |   4. Confirm that the letters "elvis" are highlighted
  89 | 
  90 | --->  a) Rare are the sightings of elves, as are those of elvis himself.
  91 |       b) The pelvis of elvis was seldom still.
  92 | 
  93 |   NOTE: You can use  n  to move to the next match and  :nohl  to clear
  94 |         the search highlight.
  95 | 
  96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  97 |   Lesson 1.2: Limiting the search to a whole word~
  98 | 
  99 |   Did you notice how the word "pelvis" is partially matched with the
 100 |   search  /elvis  ? We can improve the expression as follows:
 101 | 
 102 |   =========================================================================
 103 |   *2* : Find  elvis  as a whole word >
 104 | 
 105 |     /\<elvis\>
 106 | < =========================================================================
 107 | 
 108 |   Now things are getting a little more interesting. The  \<  is a special code
 109 |   that means, "match the position at the beginning of any word". Likewise,  \>
 110 |   means "match the position at the end of any word". This expression will only
 111 |   match complete words spelled "elvis" with any combination of lower case or
 112 |   capital letters (if |'ignorecase'| is enabled).
 113 | 
 114 |   Your Turn~
 115 | 
 116 |   1. Go to the line below marked --->
 117 | 
 118 |   2. type  /\<elvis\>  <ENTER>
 119 | 
 120 |   3. Confirm that only the word "elvis" (and not "pelvis") is highlighted
 121 | 
 122 | --->  a) Rare are the sightings of elves, as are those of elvis himself.
 123 |       b) The pelvis of elvis was seldom still.
 124 | 
 125 |   Did you notice how the word "pelvis" is now NOT matched with this
 126 |   enhanced regex?
 127 | 
 128 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 129 |   Lesson 1.3: Searching for everything between two words~
 130 | 
 131 |   Suppose you want to find all lines in which the word "elvis" is followed by
 132 |   the word "alive." The period or dot  .  is a special code that matches any
 133 |   character other than a newline. The asterisk  *  means repeat the previous
 134 |   term as many times as necessary to guarantee a match. Thus,  .*  means
 135 |   "match any number of characters other than newline". It is now a simple
 136 |   matter to build an expression that means 'search for the word "elvis"
 137 |   followed anywhere thereafter on the same line by the word "alive"'.
 138 | 
 139 |   =========================================================================
 140 |   *3* : Find  elvis  followed by anything and then followed by  alive >
 141 | 
 142 |     /\<elvis\>.*\<alive\>
 143 | < =========================================================================
 144 | 
 145 |   With just a few special characters we are beginning to build powerful
 146 |   regular expressions and they are already becoming hard for we humans to
 147 |   read.
 148 | 
 149 |   Your Turn~
 150 | 
 151 |   1. Go to the line below marked --->
 152 | 
 153 |   2. type  /\<elvis\>.*\<alive\>  <ENTER>
 154 | 
 155 |   3. Confirm that everything between "elvis" and "alive" is highlighted
 156 | 
 157 | --->  Rare are the sightings of elvis, more so those of him being alive.
 158 | 
 159 | 
 160 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 161 |   Lesson 2.1: Determining the Validity of Phone Numbers~
 162 | 
 163 |   Suppose you're editing a file which contains seven-digit phone numbers
 164 |   and you want to verify that the phone numbers are in the correct format,
 165 |   "xxx-xxxx", where each "x" is a digit. The following expression will
 166 |   search through text looking for such a string:
 167 | 
 168 |   =========================================================================
 169 |   *4* : Find  seven-digit phone number >
 170 | 
 171 |     /\<\d\d\d-\d\d\d\d
 172 | < =========================================================================
 173 | 
 174 |   Your Turn~
 175 | 
 176 |   1. Go to the line below marked --->
 177 | 
 178 |   2. type  /\<\d\d\d-\d\d\d\d  <ENTER>
 179 | 
 180 |   3. Confirm that the seven-digit phone number is highlighted
 181 | 
 182 | --->  123-4567
 183 | 
 184 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 185 |   Lesson 2.2: Specifying repetition more concisely~
 186 | 
 187 |   Each  \d  means "match any single digit". The "-" has no special
 188 |   meaning and is interpreted literally, matching a hyphen. To avoid the
 189 |   annoying repetition, we can use a shorthand notation that means the
 190 |   same thing:
 191 | 
 192 |   =========================================================================
 193 |   *5.1* : Find  seven-digit phone number  (more concisely) >
 194 | 
 195 |     /\<\d\{3\}-\d\{4\}
 196 | < =========================================================================
 197 | 
 198 |   Your Turn~
 199 | 
 200 |   1. Go to the line below marked --->
 201 | 
 202 |   2. type  /\<\d\{3\}-\d\{4\}  <ENTER>
 203 | 
 204 |   3. Confirm that the seven-digit phone number is highlighted
 205 | 
 206 | --->  123-4567
 207 | 
 208 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 209 |   Lesson 2.3: It's Magic~
 210 | 
 211 |   The  \{3\}  following the  \d  means "repeat the preceding character
 212 |   three times". The  {  and  }  in that expression need to be escaped
 213 |   with  \  which can quickly become annoying for large expressions. Vim
 214 |   uses the  \m   regex pattern to enable |'magic'| mode, which helps to
 215 |   reduce the amount of  \  escaping needed in some patterns.
 216 |   Additionally, Vim also provides the  \v  pattern to enable "very magic"
 217 |   mode, even further reducing the need to escape certain elements.
 218 |   Using magic, the previous pattern becomes:
 219 | 
 220 |   =========================================================================
 221 |   *5.2* : Find  seven-digit phone number  (using magic for better
 222 |        readability) >
 223 | 
 224 |     /\m\<\d\{3}-\d\{4}
 225 | < =========================================================================
 226 | 
 227 |   OR, with very magic:
 228 | 
 229 |   =========================================================================
 230 |   *5.3* : Find  seven-digit phone number  (using very magic for even
 231 |        better readability) >
 232 | 
 233 |      /\v<\d{3}-\d{4}
 234 | < =========================================================================
 235 | 
 236 |   Your Turn~
 237 | 
 238 |   1. Go to the line below marked --->
 239 | 
 240 |   2. type  /\v<\d{3}-\d{4}  <ENTER>
 241 | 
 242 |   3. Confirm that the seven-digit phone number is highlighted
 243 | 
 244 | --->  123-4567
 245 | 
 246 | 
 247 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 248 |   Lesson 3.1: Special Characters:  *~
 249 | 
 250 |   You should get to know a few characters with special meaning. You already
 251 |   met  \<  .  *  and  \d . To match any whitespace characters, like spaces and
 252 |   tabs, use  \s  . Similarly,  \w   matches any alphanumeric character, and \n
 253 |   matches newlines. The special form   \_s   to match spaces, tabs AND
 254 |   newlines is discussed in Lesson 3.11.
 255 | 
 256 |   Let's try a few more examples:
 257 | 
 258 |   =========================================================================
 259 |   *6* : Find  words that start with the letter a >
 260 | 
 261 |     /\<a\w*\>
 262 | < =========================================================================
 263 | 
 264 |   Your Turn~
 265 | 
 266 |   1. Go to the line below marked --->
 267 | 
 268 |   2. type  /\<a\w*\>  <ENTER>
 269 | 
 270 |   3. Confirm that all of the words starting with "a" are highlighted
 271 | 
 272 | --->  an apple a day keeps the aardvarks away
 273 | 
 274 |   This works by searching for the beginning of a word  \<  , then the letter
 275 |   "a", then any number of repetitions of alphanumeric characters  \w*  , then
 276 |   the end of a word  \> .
 277 | 
 278 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 279 |   Lesson 3.2: Special Characters:  \+~
 280 | 
 281 |   =========================================================================
 282 |   *7* : Find  repeated strings of digits >
 283 | 
 284 |     /\d\+
 285 | < =========================================================================
 286 | 
 287 |   Here, the  \+  is similar to  *  , except it requires at least one
 288 |   repetition.
 289 | 
 290 |   Your Turn~
 291 | 
 292 |   1. Go to the line below marked --->
 293 | 
 294 |   2. type  /\d\+  <ENTER>
 295 | 
 296 |   3. Confirm that all of the digits and only the digits are highlighted
 297 | 
 298 | --->  apple 1234 5678 900,000 1.23 13:45 %^@#
 299 | 
 300 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 301 |   Lesson 3.3: Six Letter Words~
 302 | 
 303 |   =========================================================================
 304 |   *8* : Find  six letter words >
 305 | 
 306 |     /\<\w\{6\}\>
 307 | 
 308 | <   OR, using very-magic: >
 309 | 
 310 |     /\v<\w{6}>
 311 | < =========================================================================
 312 | 
 313 |   Your Turn~
 314 | 
 315 |   1. Go to the line below marked --->
 316 | 
 317 |   2. Type  /\<\w\{6\}\>   <ENTER>
 318 | 
 319 |   3. Confirm that the word "attend" is highlighted
 320 | 
 321 | --->  Happy times and fine edits attend on thee.
 322 | 
 323 |   Start experimenting by inventing your own expressions. See
 324 |   |pattern-overview|, |ordinary-atom|, and |character-classes| for Vim's
 325 |   special regular expression characters.
 326 | 
 327 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 328 |   Lesson 3.4: In the beginning~
 329 | 
 330 |   The special characters  ^  and  $  are used when looking for something that
 331 |   must start at the beginning of the line and end at the end of the line,
 332 |   respectively.
 333 |   This is especially useful for matching exact text. For example, to find a
 334 |   line containing only a seven-digit phone number, you might use:
 335 | 
 336 |   =========================================================================
 337 |   *9* : Find  a seven-digit phone number on a line by itself >
 338 | 
 339 |     /^\d\{3\}-\d\{4\}$
 340 | 
 341 | <   OR >
 342 | 
 343 |     /^\v\d{3}-\d{4}$
 344 | < =========================================================================
 345 | 
 346 |   This is the same as example [|5|], but anchored to the whole line,
 347 |   with nothing else before or after the matched text. This start of line
 348 |   character must be placed at the start of the regex pattern, otherwise it
 349 |   will represent a literal "^". The special character \_^  matches a newline
 350 |   at any position in the regex.
 351 | 
 352 |   Your Turn~
 353 | 
 354 |   1. Go to the line below marked --->
 355 | 
 356 |   2. Type  /^\d\{3\}-\d\{4\}$   <ENTER>
 357 | 
 358 |   3. Confirm that the line with only a seven-digit phone number is highlighted
 359 | 
 360 | --->
 361 | My phone number is 123-4567, or
 362 | 123-4568 after hours.
 363 | 123-4567
 364 | 
 365 |   Did you notice that even though the first two lines contained a seven-digit
 366 |   phone number, they were not matched because they contained other characters?
 367 | 
 368 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 369 |   Lesson 3.5: Escaped characters~
 370 | 
 371 |   A problem occurs if you actually want to match one of the special
 372 |   characters, like  ^  or  $  . Use the backslash to remove the special
 373 |   meaning. Thus,  \^  ,  \.  , and  \\  match the literal characters "^",
 374 |   ".", and "\" respectively.
 375 | 
 376 |   Your Turn~
 377 | 
 378 |   1. Go to the line below marked --->
 379 | 
 380 |   2. Type  /\^_\^  <ENTER>
 381 | 
 382 |   3. Confirm that the smiley "^_^" is highlighted
 383 | 
 384 | --->  ^_^   $_$   @_%   *_#   !_!   ()_+   <>_{}
 385 | 
 386 |   Can you match the other smilies? Which of those other punctuation symbols
 387 |   need escaping and which do not?
 388 | 
 389 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 390 |   Lesson 3.6: Repetitions~
 391 | 
 392 |   You've seen that  \{3\}  and  *  can be used to indicate repetition of a
 393 |   single character. Later, you'll see how the same syntax can be used to
 394 |   repeat entire subexpressions. There are several other ways to specify a
 395 |   repetition, as shown in |pattern-overview|.
 396 | 
 397 |   NOTE: You no longer need explicit instructions on how to experiment with
 398 |         the patterns in this tutorial. All future search lessons will assume
 399 |         that you use the patterns provided to experiment with and learn from.
 400 |         Test data is available inside each excercise block showing expected
 401 |         matchable text. Where appropriate, false matches and missed text are
 402 |         also provided to emphasise the current learning point.
 403 | 
 404 |   =========================================================================
 405 |   *10* : Find  all five and six letter words >
 406 | 
 407 |      /\<\w\{5,6\}\>
 408 | 
 409 | <    OR >
 410 | 
 411 |      /\v<\w{5,6}>
 412 | <
 413 |      I am a Bear of Very Little Brain, and long words Bother Me.
 414 |   =========================================================================
 415 | 
 416 |   =========================================================================
 417 |   *11* : Find  ten digit phone numbers >
 418 | 
 419 |      /\<\d\{3\}\s\d\{3\}-\d\{4\}
 420 | 
 421 | <    OR >
 422 | 
 423 |      /\v<\d{3}\s\d{3}-\d{4}
 424 | <
 425 |      For a good edit, call 846 968-7615
 426 |   =========================================================================
 427 | 
 428 |   =========================================================================
 429 |   *12* : Find  Social Security Number >
 430 | 
 431 |      /\d\{3\}-\d\{2\}-\d\{4\}
 432 | 
 433 | <    OR >
 434 | 
 435 |      /\v\d{3}-\d{2}-\d{4}
 436 | <
 437 |      111-21-1211
 438 |   =========================================================================
 439 | 
 440 |   =========================================================================
 441 |   *13.1* : Find  The first word in the line >
 442 | 
 443 |      /^\w\+
 444 | <
 445 | Fools to the left of me,
 446 |   jokers to the right,
 447 |   here I am stuck in the middle with Vim.
 448 |   =========================================================================
 449 |   =========================================================================
 450 |   *13.2* : Find  The first word in the line, ignoring leading whitespace >
 451 | 
 452 |      /^\s*\w\+
 453 | <
 454 | Fools to the left of me,
 455 |   jokers to the right,
 456 |   here I am stuck in the middle with Vim.
 457 |   =========================================================================
 458 |   =========================================================================
 459 |   *13.2* : Find  The first word in the line, ignoring AND skipping leading
 460 |                whitespace >
 461 | 
 462 |      /^\s*\zs\w\+
 463 | <
 464 | Fools to the left of me,
 465 |   jokers to the right,
 466 |   here I am stuck in the middle with Vim.
 467 | 
 468 |   NOTE: The special operator   \zs   is discussed in Lesson 5.2
 469 |   =========================================================================
 470 | 
 471 |   Exercise~
 472 | 
 473 |   Use the   $   anchor to match the last 'line' word on this line
 474 | 
 475 |     ANSWER {{{~
 476 |     /line$
 477 |     }}}
 478 | 
 479 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 480 |   Lesson 3.7: Character Classes~
 481 | 
 482 |   It is simple to find alphanumerics, digits, and whitespace, but what if we
 483 |   want to find anything from some other set of characters? This is easily done
 484 |   by listing the desired characters within square brackets. Thus, "[aeiou]"
 485 |   matches any vowel and "[.?!]" matches the punctuation at the end of a
 486 |   sentence. In this example, notice that the "." and "?" lose their special
 487 |   meanings within square brackets and are interpreted literally. We can also
 488 |   specify a range of characters, so "[a-z0-9]" means, "match any lowercase
 489 |   letter of the alphabet, or any digit".
 490 | 
 491 |   Let's try a more complicated expression that searches for telephone numbers.
 492 | 
 493 |   =========================================================================
 494 |   *14* : Find  A ten digit phone number >
 495 | 
 496 |      /(\?\d\{3\}[) ]\s\?\d\{3\}[- ]\d\{4\}
 497 | 
 498 | <    OR >
 499 | 
 500 |      /\v\(?\d{3}[) ]\s?\d{3}[- ]\d{4}
 501 | <
 502 |      (800) 325-3535
 503 |      650 555 1212
 504 |      650) 555-1212     (NOTE: Example of a false positive)
 505 |      Just dial (800)
 506 |      325-3535 now!     (NOTE: Example of a false negative)
 507 |   =========================================================================
 508 | 
 509 |   This expression will find phone numbers in several formats, like "(800)
 510 |   325-3535" or "650 555 1212". The  (\?  searches for zero or one left
 511 |   parentheses,  [) ]  searches for a right parenthesis or a space. The  \s\?
 512 |   searches for zero or one whitespace characters. Unfortunately, it will also
 513 |   find cases like "650) 555-1212" in which the parenthesis is not balanced.
 514 |   Below, you'll see how to use alternatives to eliminate this problem. Another
 515 |   problem you might have noticed is that, by default in Vim, searches do not
 516 |   span across multiple lines. This is evident in the first phone number above:
 517 |   (800) 325-3535, which matches on this line, but not above because it's
 518 |   split over two lines. Solutions to this problem will also be shown below.
 519 | 
 520 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 521 |   Lesson 3.8: Negation~
 522 | 
 523 |   Sometimes we need to search for a character that is NOT a member of an
 524 |   easily defined class of characters. The following table shows how this can
 525 |   be specified:
 526 | 
 527 |     \W         Match any character that is NOT alphanumeric
 528 | 
 529 |     \S         Match any character that is NOT whitespace
 530 | 
 531 |     \D         Match any character that is NOT a digit
 532 | 
 533 |     [^x]       Match any character that is NOT x
 534 | 
 535 |     [^aeiou]   Match any character that is NOT one of the characters aeiou
 536 | 
 537 |   =========================================================================
 538 |   *15* : Find  All strings that do not contain whitespace characters >
 539 | 
 540 |      /\S\+
 541 | <
 542 |      Blessed are those who in the name of Vim edit righteously.
 543 |   =========================================================================
 544 | 
 545 |   Later, we'll see how to use "lookahead" (|/\@=| , |/\@!|) and "lookbehind"
 546 |   (|/\@<=| , |/\@<!|) to search for the absence of more complex patterns.
 547 | 
 548 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 549 |   Lesson 3.9: Alternatives~
 550 | 
 551 |   To select between several alternatives, allowing a match if either one is
 552 |   satisfied, use the pipe  |  symbol to separate the alternatives. For
 553 |   example, Zip Codes come in two flavors, one with 5 digits, the other with 9
 554 |   digits and a hyphen. We can find either with this expression:
 555 | 
 556 |   =========================================================================
 557 |   *16* : Find  Nine and five digit Zip Codes >
 558 | 
 559 |      /\<\d\{5\}-\d\{4\}\>\|\<\d\{5\}\>
 560 | 
 561 | <    OR >
 562 | 
 563 |      /\v<\d{5}-\d{4}>|<\d{5}>
 564 | <
 565 |      12345-1234
 566 |      12345
 567 |      123-456           (NOTE: non-zip codes don't match)
 568 |   =========================================================================
 569 | 
 570 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 571 |   Lesson 3.10: Order of Alternatives is Important~
 572 | 
 573 |   When using alternatives, the order is important since the matching algorithm
 574 |   will attempt to match the leftmost alternative first. If the order is
 575 |   reversed in this example, the expression will only find the 5 digit Zip
 576 |   Codes and fail to find the 9 digit ones.
 577 | 
 578 |   =========================================================================
 579 |   Try it: >
 580 |      /\v<\d{5}>|<\d{5}-\d{4}>
 581 | <
 582 |           12345        (NOTE: This still matches...
 583 |           12345-1234          ...but this does not)
 584 |   =========================================================================
 585 | 
 586 |   We can use alternatives to improve the expression for ten digit phone
 587 |   numbers, allowing the area code to appear either delimited by whitespace or
 588 |   parenthesis:
 589 | 
 590 |   =========================================================================
 591 |   *17.1* : Find  Ten digit phone numbers, a better way >
 592 | 
 593 |      /\((\d\{3\})\|\d\{3\}\)\s\?\d\{3\}[- ]\d\{4\}
 594 | 
 595 | <    OR >
 596 | 
 597 |      /\v(\(\d{3}\)|\d{3})\s?\d{3}[- ]\d{4}
 598 | <
 599 |      (800) 325-3535
 600 |      650 555 1212
 601 |      650) 555-1212     (NOTE: Badly formatted numbers no longer match)
 602 |      Just dial (800)
 603 |      325-3535 now!     (NOTE: Numbers split over a line still fail to match)
 604 |   =========================================================================
 605 | 
 606 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 607 |   Lesson 3.11: Newlines are NOT Matched by Default~
 608 | 
 609 |   That fixes the problem of accidentally matching badly formatted phone
 610 |   numbers, but not the problem of phone numbers split over a line. Let's fix
 611 |   that problem now. Vim limits regex matches to a single line by default. Put
 612 |   another way, Vim -excludes- newlines in matches by default. To override
 613 |   this, we need to use the special   |/\_|   modifiers which -include-
 614 |   newlines in the match. As an example, use   |/\_s|   to capture whitespace
 615 |   (like   \s   ) including newlines. Let's use   \_s   to fix our phone number
 616 |   problem:
 617 | 
 618 |   =========================================================================
 619 |   *17.2* : Find  Ten digit phone numbers, an even better way (with newlines) >
 620 | 
 621 |      /\((\d\{3\})\|\d\{3\}\)\_s*\d\{3\}\_s*-\?\_s*\d\{4\}
 622 | 
 623 | <    OR >
 624 | 
 625 |      /\v(\(\d{3}\)|\d{3})\_s*\d{3}\_[- ]?\d{4}
 626 | <
 627 |      (800) 325-3535
 628 |      650 555 1212
 629 |      650) 555-1212     (NOTE: Badly formatted numbers still don't match)
 630 |      Just dial (800)
 631 |      325-3535 now!     (NOTE: Numbers split over a line now match correctly)
 632 |   =========================================================================
 633 | 
 634 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 635 |   Lesson 4.1: Introducing Grouping~
 636 | 
 637 |   Parentheses may be used to delimit a subexpression to allow repetition or
 638 |   other special treatment. For example:
 639 | 
 640 |   =========================================================================
 641 |   *18* : Find  A simple IP address >
 642 | 
 643 |      /\(\d\{1,3\}\.\)\{3\}\d\{1,3\}
 644 | 
 645 | <    OR >
 646 | 
 647 |      /\v(\d{1,3}\.){3}\d{1,3}
 648 | <
 649 |      192.168.1.1
 650 |      127.0.0.1
 651 |      3.142             (NOTE: non-IP addresses are not matched)
 652 |      1.2.3
 653 |      999.999.999.999   (NOTE: Invalid IP addresses are falsely matched)
 654 |   =========================================================================
 655 | 
 656 |   The first part of the expression searches for a one to three digit number
 657 |   followed by a literal period  .  . This is enclosed in parentheses and
 658 |   repeated three times using the  \{3\}  quantifier, followed by the same
 659 |   expression without the trailing period.
 660 | 
 661 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 662 |   Lesson 4.2: Groups within Groups~
 663 | 
 664 |   Unfortunately, this example allows IP addresses with arbitrary one, two, or
 665 |   three digit numbers separated by periods even though a valid IP address
 666 |   cannot have numbers larger than 255. It would be nice to arithmetically
 667 |   compare a captured number N to enforce N<256, but this is not possible with
 668 |   regular expressions alone. The next example tests various alternatives based
 669 |   on the starting digits to guarantee the limited range of numbers by pattern
 670 |   matching. This shows that an expression can become cumbersome even when
 671 |   looking for a pattern that is simple to describe.
 672 | 
 673 |   =========================================================================
 674 |   *19* : Find  IP addresses (more accurately) >
 675 | 
 676 |      /\(\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\)\.\)\{3\}\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\)
 677 | <
 678 |      OR >
 679 | 
 680 |      /\v((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)
 681 | <
 682 |      192.168.1.1
 683 |      127.0.0.1
 684 |      3.142             (NOTE: non-IP addresses are not matched)
 685 |      1.2.3
 686 |      999.999.999.999   (NOTE: Invalid IP addresses are not matched)
 687 |   =========================================================================
 688 | 
 689 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 690 |   Lesson 4.3: Backreferences~
 691 | 
 692 |   When subexpressions are grouped with parentheses, the text that matches the
 693 |   subexpression is available further along in the regular expression itself.
 694 |   Groups are numbered sequentially as encountered in reading from left to
 695 |   right, starting with 1.
 696 | 
 697 |   A "backreference" is used to search for a recurrence of previously matched
 698 |   text that has been captured by a group. For example,  \1  means, "match the
 699 |   text that was captured by group 1". Here is an example:
 700 | 
 701 |   =========================================================================
 702 |   *20* : Find  repeated words >
 703 | 
 704 |      /\<\(\w\+\)\>\s*\1\>
 705 | 
 706 |      OR
 707 | 
 708 |      /\v<(\w+)>\s*\1>
 709 | <
 710 |      It wasn't that that was impossible.
 711 |   =========================================================================
 712 | 
 713 |   This works by capturing a string of at least one alphanumeric character
 714 |   within group 1  \(\w\+\)  , but only if it begins and ends a word. It then
 715 |   looks for any amount of whitespace  \s*  followed by a repetition of the
 716 |   captured text  \1  ending at the end of a word.
 717 | 
 718 | 
 719 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 720 |   Lesson 5.1: Captures~
 721 | 
 722 |   In the followng examples, the Vim regex snippet is given first followed by
 723 |   the equivalent Perl Compatible Regular Expression (PCRE) for those familiar
 724 |   with that style of regular expressions. If you're not familiar with PCRE,
 725 |   don't worry as you will not require a knowledge of it to complete this
 726 |   tutorial or use Vim.
 727 | 
 728 |   NOTE: Some of Vim's way of handling certain regex features differs from
 729 |         PCRE. Some people complain about this and wonder why Vim didn't just
 730 |         use the already existing PCRE way. The reason is that Vim started
 731 |         getting some of these things at the same time Perl did, or even
 732 |         beforehand. So, it's not that Vim decided to flout history and Go Its
 733 |         Own Way just to be difficult.  Respect that Vim -can- do these things
 734 |         and has been able to do so for a long time now; and suck it up and
 735 |         learn Vim's way. :-)
 736 | 
 737 |   1. Match exp and capture it in an automatically numbered group:
 738 | 
 739 |   Vim:  \(exp\)
 740 |   PCRE: (exp)
 741 | 
 742 |   2. Match exp, but do not capture it:
 743 | 
 744 |   Vim:  \%(exp\)
 745 |   PCRE: (?:exp)
 746 | 
 747 |   The  \%(exp\)  form does not alter the matching behavior, it just doesn't
 748 |   capture it in a group like the  \(exp\)  form.
 749 | 
 750 |   The next four are so-called lookahead or lookbehind assertions. They look for
 751 |   things that go before or after the current match without including them in the
 752 |   match. It is important to understand that these expressions match a position
 753 |   like  ^  or  \<  and never match any text. For this reason, they are known as
 754 |   "zero-width assertions".
 755 | 
 756 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 757 |   Lesson 5.2: Positive lookahead~
 758 | 
 759 |   Matches with zero width if \(exp\) matches at the current position
 760 | 
 761 |   Vim:  \(exp\)\@=
 762 |   PCRE: (?=exp)
 763 | 
 764 |   \(exp\)\@=  is the "zero-width positive lookahead assertion". It matches a
 765 |   position in the text that precedes a given suffix expression, but doesn't
 766 |   include the suffix in the match:
 767 | 
 768 |   =========================================================================
 769 |   *22* : Find  the beginning of words ending with "ing" >
 770 | 
 771 |      /\<\w\+\(ing\>\)\@=
 772 | 
 773 | <    OR >
 774 | 
 775 |      /\v<\w+(ing>)@=
 776 | <
 777 |      Beguiling
 778 |      Alluringly
 779 | 
 780 |      NOTE: Vim provides two other very powerful regex operators:  |/\zs|  and
 781 |            |/\ze|  which can be used in many similar situations as the
 782 |            lookaround operators. The pattern above can also be expressed as: >
 783 | 
 784 |      /\ze\w\+ing\>
 785 | <
 786 |      Pleasing
 787 |      Obligingly
 788 |   =========================================================================
 789 | 
 790 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 791 |   Lesson 5.3: Positive lookbehind~
 792 | 
 793 |   Matches with zero width if \(exp\) matches just before what follows:
 794 | 
 795 |   Vim:  \(exp\)\@<=
 796 |   PCRE: (?<=exp)
 797 | 
 798 |   \(exp\)\@<=  is the "zero-width positive lookbehind assertion". It matches
 799 |   the position following a prefix, but doesn't include the prefix in the
 800 |   match:
 801 | 
 802 |   =========================================================================
 803 |   *23* : Find  The end of words starting with "re" >
 804 | 
 805 |      /\(\<re\)\@<=\w\+\>
 806 | 
 807 | <   OR (using the |/\zs| atom:) >
 808 | 
 809 |     /\<re\zs\w\+\>
 810 | <
 811 |      Remember, regular repetition results in ready reflexes.
 812 |   =========================================================================
 813 | 
 814 |   Here is an example that could be used repeatedly to insert commas into
 815 |   numbers in groups of three digits:
 816 | 
 817 |   =========================================================================
 818 |   *24* : Find  Three digits at the end of a word, preceded by a digit >
 819 | 
 820 |      /\d\@<=\d\{3\}\>
 821 | 
 822 | <    OR >
 823 | 
 824 |      /\v\d@<=\d{3}>
 825 | <
 826 |      123456789
 827 |      123456,789
 828 |      123,456,789       (NOTE: This number correctly doesn't match)
 829 |   =========================================================================
 830 | 
 831 |   Here is an example that looks for both a prefix and a suffix:
 832 | 
 833 |   =========================================================================
 834 |   *25* : Find  Alphanumeric strings bounded by whitespace >
 835 | 
 836 |      /\s\@<=\w\+\s\@=
 837 | <
 838 |      It's right here.
 839 |   =========================================================================
 840 | 
 841 |   Quiz~
 842 | 
 843 |   Use the pattern in example |24| to add commas to the following number:
 844 | 
 845 | ---> 12345678901
 846 | 
 847 |      ANSWER {{{~
 848 |      Use the following substitution command when your cursor is on the
 849 |      line above marked ---> :   :s/\v\d@<=\d{3}>/,&/
 850 |      You will need to execute it three times to add all the necessary commas.
 851 |      Tip: You can use the   &   key in normal mode to re-execute the last
 852 |      substitution.
 853 |      }}}
 854 | 
 855 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 856 |   Lesson 5.4: Negative lookahead~
 857 | 
 858 |   Matches with zero width if \(exp\) does NOT match at the current position
 859 | 
 860 |   Vim:  \(exp\)\@!
 861 |   PCRE: (?!exp)
 862 | 
 863 |   Earlier, we saw how to search for a character that is not a specific
 864 |   character or the member of a character class. What if we simply want to
 865 |   verify that a character is not present, but don't want to match anything?
 866 |   For example, what if we are searching for words in which the letter "q" is
 867 |   not followed by the letter "u"? We could try:
 868 | 
 869 |   =========================================================================
 870 |   *26* : Find  Words with "q" followed by NOT "u" >
 871 | 
 872 |      /\<\w*q[^u]\w*\>
 873 | <
 874 |      Iqaluit is the capital of Nunavut and Canada's coolest arctic city.
 875 |      QWERTY put the "q" in Compaq but not Iraq.
 876 |      Inqorrectly spelled!
 877 |   =========================================================================
 878 | 
 879 |   Run the example and you will see that it fails when "q" is the last letter
 880 |   of a word, as in "Compaq". This is because  [^u]  always matches a character.
 881 |   If "q" is the last character of the word, it will match the whitespace
 882 |   character that follows, so in the example the expression ends up matching
 883 |   two whole words. Negative lookaround solves this problem because it matches
 884 |   a position and does not consume any text. As with positive lookaround, it
 885 |   can also be used to match the position of an arbitrarily complex
 886 |   subexpression, rather than just a single character. We can now do a better
 887 |   job:
 888 | 
 889 |   =========================================================================
 890 |   *27* : Find  words with "q" not followed by "u" >
 891 | 
 892 |      /\<\w*qu\@!\w*\>
 893 | <
 894 |      Iqaluit is the capital of Nunavut and Canada's coolest arctic city.
 895 |      QWERTY put the "q" in Compaq but not Iraq.
 896 |      Inqorrectly spelled!
 897 |   =========================================================================
 898 | 
 899 |   We used the "zero-width negative lookahead assertion",  \(exp\)\@!  , which
 900 |   succeeds only if the suffix "exp" is not present. Here is another example:
 901 | 
 902 |   =========================================================================
 903 |   *28* : Find  Three digits not followed by another digit >
 904 | 
 905 |      /\d\{3\}\d\@!
 906 | <
 907 |      123
 908 |      123A
 909 |      123 456
 910 |      1234              (NOTE: Matches the -last- three digits, perhaps
 911 |      123456                   surprisingly! How would you force a match
 912 |                               of three digits only?
 913 |                                 ANSWER {{{~
 914 |                                 /\<\d\{3\}\d\@!
 915 |                                 }}}
 916 |                        )
 917 |   =========================================================================
 918 | 
 919 | 
 920 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 921 |   Lesson 5.5: Negative lookbehind~
 922 | 
 923 |   Matches with zero width if \(exp\) matches just before what follows:
 924 | 
 925 |   Vim:  \(exp\)\@<!
 926 |   PCRE: (?<!exp)
 927 | 
 928 |   Similarly, we can use  \(exp\)\@<!  , the "zero-width negative lookbehind
 929 |   assertion", to search for a position in the text at which the prefix "exp"
 930 |   is not present:
 931 | 
 932 |   =========================================================================
 933 |   *29* : Find  Strings of 7 alphanumerics not preceded by a letter or space >
 934 | 
 935 |      /[a-z ]\@<!\w\{7\}
 936 | 
 937 |      OR, if you are not ignoring case (:help 'ignorecase')
 938 | 
 939 |      /[a-zA-Z ]\@<!\w\{7\}
 940 | 
 941 |      OR
 942 | 
 943 |      /\c[a-z ]\@<!\w\{7\}
 944 | <
 945 |      :Vimmers of Penzance: I am the very model of a modern modal editor.
 946 |   =========================================================================
 947 | 
 948 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 949 |   Lesson 5.6: Mixed Lookaround~
 950 | 
 951 |   Here is one more example using lookaround:
 952 | 
 953 |   NOTE: For this type of backreferencing, you will need to use Vim's old
 954 |   regex engine. Read   :help 'regexpengine'   for an explanation. To enable
 955 |   the old regex engine, do:   :set regexpengine=1
 956 | 
 957 |   =========================================================================
 958 |   *30* : Find  Text between HTML tags >
 959 | 
 960 |      /\%(<\1>\)\@<=.*\%(<\/\(\w\+\)>\)\@=
 961 | 
 962 |      OR
 963 | 
 964 |      /\v%(\<\1\>)@<=.*%(\<\/(\w+)\>)@=
 965 | 
 966 |      OR
 967 | 
 968 |      /<\(\w\+\).\{-}>\zs.*\ze<\/\1>
 969 | <
 970 |      <level>Can I play, daddy?</level>
 971 |   =========================================================================
 972 | 
 973 |   This searches for the corresponding closing HTML tag using positive
 974 |   lookbehind and the opening original tag using positive lookahead, thus
 975 |   capturing the intervening text but excluding both tags.
 976 | 
 977 |   NOTE: If you read that last sentence and felt there was something backwards
 978 |         about it... you're right. The part of the pattern after  \@<=  and
 979 |         \@<!  are checked for a match first, thus things like  \1  don't work
 980 |         to reference  \(\)  inside the preceding atom. It does work the other
 981 |         way around as illustrated in the pattern above.
 982 | 
 983 |         Bram was surprised that this pattern actually works at all and
 984 |         suggested that it is probably an indication of bugs in the backreferencing
 985 |         regexp engine. He also kindly provided the much simpler (and more
 986 |         efficient) regexp using the much preferred \zs and \ze atoms:
 987 | 
 988 |             /<\(\w\+\).\{-}>\zs.*\ze<\/\1>
 989 | 
 990 |         In practice, the \zs and \ze atoms are almost always the better
 991 |         choice.
 992 | 
 993 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 994 |   Lesson 6.1: Greedy and Lazy~
 995 | 
 996 |   When a regular expression has a quantifier that can accept a range of
 997 |   repetitions, like  .*  , the normal behavior is to match as many characters
 998 |   as possible. Consider the following regular expression:
 999 | 
1000 |   =========================================================================
1001 |   *32* : Find  The longest string starting with a and ending with b >
1002 | 
1003 |      /a.*b
1004 | <
1005 |      aabab
1006 |   =========================================================================
1007 | 
1008 |   If this is used to search the string "aabab", it will match the entire
1009 |   string "aabab". This is called "greedy" matching. Sometimes, we prefer
1010 |   "lazy" matching in which a match using the minimum number of repetitions is
1011 |   found. The quantifiers we've been playing with so far can all be turned into
1012 |   "lazy" quantifiers by replacing the quantifier with a  \{-}  form. See
1013 |   |/\{-| for the specific details. Thus  \{-}  means "match any number of
1014 |   repetitions, but use the smallest number of repetitions that still leads to
1015 |   a successful match". Now let's try the lazy version of example |32|:
1016 | 
1017 |   NOTE: The  \{-}  form is equivalent to PCRE: *?
1018 | 
1019 |   =========================================================================
1020 |   *33* : Find  The shortest string starting with a and ending with b >
1021 | 
1022 |      /a.\{-}b
1023 | <
1024 |      aabab
1025 | 
1026 |      NOTE: If you press  n  after running this search, you will notice that it
1027 |            first matched "aab" and then "ab".
1028 |   =========================================================================
1029 | 
1030 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1031 |   Lesson 6.2: Vim's Lazy Operators:~
1032 | 
1033 |   NOTE: See the manual sections |/\{| and |non-greedy| for more details.
1034 | 
1035 |   Repeat any number of times, but as few as possible:
1036 |   Vim:  \{-}
1037 |   PCRE: *?
1038 | 
1039 |   Repeat one or more times, but as few as possible:
1040 |   Vim:  \{-1,}
1041 |   PCRE: +?
1042 | 
1043 |   Repeat zero or one time, but as few as possible:
1044 |   Vim:  \{-,1}
1045 |   PCRE: ??
1046 | 
1047 |   Repeat at least n, but no more than m times, but as few as possible:
1048 |   Vim:  {-n,m}
1049 |   PCRE: {n,m}?
1050 | 
1051 |   Repeat at least n times, but as few as possible:
1052 |   Vim:  {-n,}
1053 |   PCRE: {n,}?
1054 | 
1055 | 
1056 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1057 |   Exercise 1.1: Inserting thousands separators into numbers.~
1058 | 
1059 |   Visually select the following 10 lines of numbers and write a regular
1060 |   expression that inserts a thousands separator (such as ,) to make them read
1061 |   correctly as shown in the set below.
1062 | 
1063 |      1
1064 |      12
1065 |      123
1066 |      1234
1067 |      12345
1068 |      123456
1069 |      1234567
1070 |      12345678
1071 |      123456789
1072 |      1234567890
1073 | 
1074 |      -=-=-
1075 | 
1076 |      1
1077 |      12
1078 |      123
1079 |      1,234
1080 |      12,345
1081 |      123,456
1082 |      1,234,567
1083 |      12,345,678
1084 |      123,456,789
1085 |      1,234,567,890
1086 | <
1087 | 
1088 |   SOLUTION >
1089 |      :'<,'>s/\d\@<=\(\(\d\{3}\)\+\d\@!\)\@=/,/g
1090 | <
1091 | 
1092 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1093 |   Appendix 1: VimRegEx - a Vim Regex Visualisation Tool~
1094 | 
1095 |   The VimRegEx plugin is a tool which attempts to graphically show you the
1096 |   various portions of your regular expression and the parts of a sample text
1097 |   it matches. Using a tool like this can quickly help you to craft complicated
1098 |   regular expressions with more ease and confidence.
1099 | 
1100 |   Install VimRegEx from:
1101 |     * http://www.vim.org/scripts/script.php?script_id=1091     OR
1102 |     * use the Vim Addon Manager:
1103 |         http://www.vim.org/scripts/script.php?script_id=2905
1104 | 
1105 | 
1106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1107 |   This concludes the Vim Regex Tutor.  It was intended to give a brief
1108 |   overview of regular expressions in the Vim editor - just enough to allow you
1109 |   to start exploring the wonderful and powerful world of regexes on your own.
1110 |   It is far from complete as Vim has many many more regex features.  To learn
1111 |   more about regexes in Vim, see  |'pattern.txt'|
1112 | 
1113 |   If you want to read a book, I suggest the latest edition of:
1114 | 
1115 |     Mastering Regular Expressions, by Jeffrey Friedl.
1116 | 
1117 |   This tutorial is a rework by Barry Arthur of the original .Net version by
1118 |   Jim Hollenhorst at:
1119 | 
1120 |     http://www.codeproject.com/KB/dotnet/regextutorial.aspx
1121 | 
1122 |   The layout is based on the original  vimtutor  by Michael C. Pierce and
1123 |   Robert K. Ware.
1124 | 
1125 |   Thanks to the following people for prviding feedback, fixing bugs and
1126 |   offering suggestions to improve VimRegexTutor: Ben Fritz, Israel Chauca.
1127 | 
1128 |   Licencensed under the same terms as Vim itself.
1129 | 
1130 |   Send mistakes and suggestions to barry.arthur@gmail.com or register an Issue
1131 |   at https://github.com/dahu/VimRegexTutor
1132 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1133 | 
1134 | # vim:tw=78:ts=8:ft=help:norl:noro:hlsearch:ignorecase fdm=marker
1135 | 


--------------------------------------------------------------------------------
/tutor/regextutor.utf-8:
--------------------------------------------------------------------------------
   1 | ===============================================================================
   2 | = W e l c o m e   t o   t h e   V I M   R e g e x   T u t o r  -  Version 0.2 =
   3 | ===============================================================================
   4 | 
   5 |   Vim is a very powerful editor with a very powerful search and replace
   6 |   system based on Regular Expressions. This tutor is designed to describe
   7 |   enough of those features that you will be able to more powerfully use
   8 |   Vim as an all-purpose editor.
   9 | 
  10 |   The approximate time required to complete the tutor is 30 minutes,
  11 |   depending upon how much time is spent with experimentation.
  12 | 
  13 |   ATTENTION:~
  14 |   The commands in the lessons will modify the text.  Make a copy of this
  15 |   file to practise on (if you started "vimregextutor" this is already a
  16 |   copy).
  17 | 
  18 |   It is important to remember that this tutor is set up to teach by use.
  19 |   That means that you need to execute the commands to learn them
  20 |   properly.  If you only read the text, you will forget the commands!
  21 | 
  22 |   If you haven't already completed the vimtutor, it is highly recommended
  23 |   that you do so first before attempting this tutorial.
  24 | 
  25 |   Occasional references to the Vim documentation are made throughout this
  26 |   tutorial. Such entries look like this: |'ignorecase'|. To open those
  27 |   entries, type (in normal mode - so press <ESCAPE> to leave insert mode
  28 |   first)  :help  followed by the exact entry. In this case, you would
  29 |   type:  :help 'ignorecase'
  30 | 
  31 |   What the Heck is a Regular Expression Anyway?~
  32 | 
  33 |   I'm sure you are familiar with the use of "wildcard" characters for
  34 |   pattern matching. For example, if you want to find all the text files
  35 |   in a directory, you search for "*.txt", knowing that the asterisk is
  36 |   interpreted as a wildcard that can match any sequence of characters.
  37 |   Regular expressions are just an elaborate extension of this capability.
  38 | 
  39 |   When manipulating text, it is frequently necessary to locate strings
  40 |   that match complex patterns. Regular expressions were invented to
  41 |   describe such patterns. Thus, a regular expression is just a shorthand
  42 |   code for a pattern. For example, the pattern  \w\+  is a concise way to
  43 |   say "match any non-null strings of alphanumeric characters". Vim
  44 |   provides a rich and powerful regular expression vocabulary with which
  45 |   you can readily and efficiently search and replace text.
  46 | 
  47 |   A good way to learn the arcane syntax of regular expressions is by
  48 |   starting with examples and then experimenting with your own creations.
  49 |   This tutorial introduces the basics of regular expressions, giving many
  50 |   common examples. The additional Vim plugin, regexcoach, can be used to
  51 |   try out the examples and to experiment with your own regular
  52 |   expressions.
  53 | 
  54 |   NOTE: All of the exercises in this tutorial will use plain (non-|magic|-al)
  55 |         regular expressions. Frequently a very-magic  (using the |\v| option)
  56 |         equivalent is shown alongside the original. The purpose of magic and
  57 |         the \v option is explained in Lesson 2.3.
  58 | 
  59 |   Let's get started!
  60 | 
  61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  62 |   Lesson 1.1: Searching for Elvis~
  63 | 
  64 |   Suppose you spend all your free time scanning documents looking for evidence
  65 |   that Elvis is still alive. You could search with the following regular
  66 |   expression:
  67 | 
  68 |   =========================================================================
  69 |   *1* : Find  elvis >
  70 | 
  71 |     /elvis
  72 | < =========================================================================
  73 | 
  74 |   This is a perfectly valid regular expression that searches for an exact
  75 |   sequence of characters. In Vim, you can set the |'ignorecase'| option
  76 |   to ignore the case of characters, so this expression will match
  77 |   "Elvis", "ELVIS", or "eLvIs". Unfortunately, it will also match the
  78 |   last five letters of the word "pelvis".
  79 | 
  80 |   Your Turn~
  81 | 
  82 |   1. Make sure you have search highlighting enabled:  :set hlsearch
  83 | 
  84 |   2. Go to the line below marked --->
  85 | 
  86 |   3. type  /elvis  <ENTER>
  87 | 
  88 |   4. Confirm that the letters "elvis" are highlighted
  89 | 
  90 | --->  a) Rare are the sightings of elves, as are those of elvis himself.
  91 |       b) The pelvis of elvis was seldom still.
  92 | 
  93 |   NOTE: You can use  n  to move to the next match and  :nohl  to clear
  94 |         the search highlight.
  95 | 
  96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  97 |   Lesson 1.2: Limiting the search to a whole word~
  98 | 
  99 |   Did you notice how the word "pelvis" is partially matched with the
 100 |   search  /elvis  ? We can improve the expression as follows:
 101 | 
 102 |   =========================================================================
 103 |   *2* : Find  elvis  as a whole word >
 104 | 
 105 |     /\<elvis\>
 106 | < =========================================================================
 107 | 
 108 |   Now things are getting a little more interesting. The  \<  is a special code
 109 |   that means, "match the position at the beginning of any word". Likewise,  \>
 110 |   means "match the position at the end of any word". This expression will only
 111 |   match complete words spelled "elvis" with any combination of lower case or
 112 |   capital letters (if |'ignorecase'| is enabled).
 113 | 
 114 |   Your Turn~
 115 | 
 116 |   1. Go to the line below marked --->
 117 | 
 118 |   2. type  /\<elvis\>  <ENTER>
 119 | 
 120 |   3. Confirm that only the word "elvis" (and not "pelvis") is highlighted
 121 | 
 122 | --->  a) Rare are the sightings of elves, as are those of elvis himself.
 123 |       b) The pelvis of elvis was seldom still.
 124 | 
 125 |   Did you notice how the word "pelvis" is now NOT matched with this
 126 |   enhanced regex?
 127 | 
 128 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 129 |   Lesson 1.3: Searching for everything between two words~
 130 | 
 131 |   Suppose you want to find all lines in which the word "elvis" is followed by
 132 |   the word "alive." The period or dot  .  is a special code that matches any
 133 |   character other than a newline. The asterisk  *  means repeat the previous
 134 |   term as many times as necessary to guarantee a match. Thus,  .*  means
 135 |   "match any number of characters other than newline". It is now a simple
 136 |   matter to build an expression that means 'search for the word "elvis"
 137 |   followed anywhere thereafter on the same line by the word "alive"'.
 138 | 
 139 |   =========================================================================
 140 |   *3* : Find  elvis  followed by anything and then followed by  alive >
 141 | 
 142 |     /\<elvis\>.*\<alive\>
 143 | < =========================================================================
 144 | 
 145 |   With just a few special characters we are beginning to build powerful
 146 |   regular expressions and they are already becoming hard for we humans to
 147 |   read.
 148 | 
 149 |   Your Turn~
 150 | 
 151 |   1. Go to the line below marked --->
 152 | 
 153 |   2. type  /\<elvis\>.*\<alive\>  <ENTER>
 154 | 
 155 |   3. Confirm that everything between "elvis" and "alive" is highlighted
 156 | 
 157 | --->  Rare are the sightings of elvis, more so those of him being alive.
 158 | 
 159 | 
 160 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 161 |   Lesson 2.1: Determining the Validity of Phone Numbers~
 162 | 
 163 |   Suppose you're editing a file which contains seven-digit phone numbers
 164 |   and you want to verify that the phone numbers are in the correct format,
 165 |   "xxx-xxxx", where each "x" is a digit. The following expression will
 166 |   search through text looking for such a string:
 167 | 
 168 |   =========================================================================
 169 |   *4* : Find  seven-digit phone number >
 170 | 
 171 |     /\<\d\d\d-\d\d\d\d
 172 | < =========================================================================
 173 | 
 174 |   Your Turn~
 175 | 
 176 |   1. Go to the line below marked --->
 177 | 
 178 |   2. type  /\<\d\d\d-\d\d\d\d  <ENTER>
 179 | 
 180 |   3. Confirm that the seven-digit phone number is highlighted
 181 | 
 182 | --->  123-4567
 183 | 
 184 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 185 |   Lesson 2.2: Specifying repetition more concisely~
 186 | 
 187 |   Each  \d  means "match any single digit". The "-" has no special
 188 |   meaning and is interpreted literally, matching a hyphen. To avoid the
 189 |   annoying repetition, we can use a shorthand notation that means the
 190 |   same thing:
 191 | 
 192 |   =========================================================================
 193 |   *5.1* : Find  seven-digit phone number  (more concisely) >
 194 | 
 195 |     /\<\d\{3\}-\d\{4\}
 196 | < =========================================================================
 197 | 
 198 |   Your Turn~
 199 | 
 200 |   1. Go to the line below marked --->
 201 | 
 202 |   2. type  /\<\d\{3\}-\d\{4\}  <ENTER>
 203 | 
 204 |   3. Confirm that the seven-digit phone number is highlighted
 205 | 
 206 | --->  123-4567
 207 | 
 208 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 209 |   Lesson 2.3: It's Magic~
 210 | 
 211 |   The  \{3\}  following the  \d  means "repeat the preceding character
 212 |   three times". The  {  and  }  in that expression need to be escaped
 213 |   with  \  which can quickly become annoying for large expressions. Vim
 214 |   uses the  \m   regex pattern to enable |'magic'| mode, which helps to
 215 |   reduce the amount of  \  escaping needed in some patterns.
 216 |   Additionally, Vim also provides the  \v  pattern to enable "very magic"
 217 |   mode, even further reducing the need to escape certain elements.
 218 |   Using magic, the previous pattern becomes:
 219 | 
 220 |   =========================================================================
 221 |   *5.2* : Find  seven-digit phone number  (using magic for better
 222 |        readability) >
 223 | 
 224 |     /\m\<\d\{3}-\d\{4}
 225 | < =========================================================================
 226 | 
 227 |   OR, with very magic:
 228 | 
 229 |   =========================================================================
 230 |   *5.3* : Find  seven-digit phone number  (using very magic for even
 231 |        better readability) >
 232 | 
 233 |      /\v<\d{3}-\d{4}
 234 | < =========================================================================
 235 | 
 236 |   Your Turn~
 237 | 
 238 |   1. Go to the line below marked --->
 239 | 
 240 |   2. type  /\v<\d{3}-\d{4}  <ENTER>
 241 | 
 242 |   3. Confirm that the seven-digit phone number is highlighted
 243 | 
 244 | --->  123-4567
 245 | 
 246 | 
 247 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 248 |   Lesson 3.1: Special Characters:  *~
 249 | 
 250 |   You should get to know a few characters with special meaning. You already
 251 |   met  \<  .  *  and  \d . To match any whitespace characters, like spaces and
 252 |   tabs, use  \s  . Similarly,  \w   matches any alphanumeric character, and \n
 253 |   matches newlines. The special form   \_s   to match spaces, tabs AND
 254 |   newlines is discussed in Lesson 3.11.
 255 | 
 256 |   Let's try a few more examples:
 257 | 
 258 |   =========================================================================
 259 |   *6* : Find  words that start with the letter a >
 260 | 
 261 |     /\<a\w*\>
 262 | < =========================================================================
 263 | 
 264 |   Your Turn~
 265 | 
 266 |   1. Go to the line below marked --->
 267 | 
 268 |   2. type  /\<a\w*\>  <ENTER>
 269 | 
 270 |   3. Confirm that all of the words starting with "a" are highlighted
 271 | 
 272 | --->  an apple a day keeps the aardvarks away
 273 | 
 274 |   This works by searching for the beginning of a word  \<  , then the letter
 275 |   "a", then any number of repetitions of alphanumeric characters  \w*  , then
 276 |   the end of a word  \> .
 277 | 
 278 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 279 |   Lesson 3.2: Special Characters:  \+~
 280 | 
 281 |   =========================================================================
 282 |   *7* : Find  repeated strings of digits >
 283 | 
 284 |     /\d\+
 285 | < =========================================================================
 286 | 
 287 |   Here, the  \+  is similar to  *  , except it requires at least one
 288 |   repetition.
 289 | 
 290 |   Your Turn~
 291 | 
 292 |   1. Go to the line below marked --->
 293 | 
 294 |   2. type  /\d\+  <ENTER>
 295 | 
 296 |   3. Confirm that all of the digits and only the digits are highlighted
 297 | 
 298 | --->  apple 1234 5678 900,000 1.23 13:45 %^@#
 299 | 
 300 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 301 |   Lesson 3.3: Six Letter Words~
 302 | 
 303 |   =========================================================================
 304 |   *8* : Find  six letter words >
 305 | 
 306 |     /\<\w\{6\}\>
 307 | 
 308 | <   OR, using very-magic: >
 309 | 
 310 |     /\v<\w{6}>
 311 | < =========================================================================
 312 | 
 313 |   Your Turn~
 314 | 
 315 |   1. Go to the line below marked --->
 316 | 
 317 |   2. Type  /\<\w\{6\}\>   <ENTER>
 318 | 
 319 |   3. Confirm that the word "attend" is highlighted
 320 | 
 321 | --->  Happy times and fine edits attend on thee.
 322 | 
 323 |   Start experimenting by inventing your own expressions. See
 324 |   |pattern-overview|, |ordinary-atom|, and |character-classes| for Vim's
 325 |   special regular expression characters.
 326 | 
 327 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 328 |   Lesson 3.4: In the beginning~
 329 | 
 330 |   The special characters  ^  and  $  are used when looking for something that
 331 |   must start at the beginning of the line and end at the end of the line,
 332 |   respectively.
 333 |   This is especially useful for matching exact text. For example, to find a
 334 |   line containing only a seven-digit phone number, you might use:
 335 | 
 336 |   =========================================================================
 337 |   *9* : Find  a seven-digit phone number on a line by itself >
 338 | 
 339 |     /^\d\{3\}-\d\{4\}$
 340 | 
 341 | <   OR >
 342 | 
 343 |     /^\v\d{3}-\d{4}$
 344 | < =========================================================================
 345 | 
 346 |   This is the same as example [|5|], but anchored to the whole line,
 347 |   with nothing else before or after the matched text. This start of line
 348 |   character must be placed at the start of the regex pattern, otherwise it
 349 |   will represent a literal "^". The special character \_^  matches a newline
 350 |   at any position in the regex.
 351 | 
 352 |   Your Turn~
 353 | 
 354 |   1. Go to the line below marked --->
 355 | 
 356 |   2. Type  /^\d\{3\}-\d\{4\}$   <ENTER>
 357 | 
 358 |   3. Confirm that the line with only a seven-digit phone number is highlighted
 359 | 
 360 | --->
 361 | My phone number is 123-4567, or
 362 | 123-4568 after hours.
 363 | 123-4567
 364 | 
 365 |   Did you notice that even though the first two lines contained a seven-digit
 366 |   phone number, they were not matched because they contained other characters?
 367 | 
 368 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 369 |   Lesson 3.5: Escaped characters~
 370 | 
 371 |   A problem occurs if you actually want to match one of the special
 372 |   characters, like  ^  or  $  . Use the backslash to remove the special
 373 |   meaning. Thus,  \^  ,  \.  , and  \\  match the literal characters "^",
 374 |   ".", and "\" respectively.
 375 | 
 376 |   Your Turn~
 377 | 
 378 |   1. Go to the line below marked --->
 379 | 
 380 |   2. Type  /\^_\^  <ENTER>
 381 | 
 382 |   3. Confirm that the smiley "^_^" is highlighted
 383 | 
 384 | --->  ^_^   $_$   @_%   *_#   !_!   ()_+   <>_{}
 385 | 
 386 |   Can you match the other smilies? Which of those other punctuation symbols
 387 |   need escaping and which do not?
 388 | 
 389 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 390 |   Lesson 3.6: Repetitions~
 391 | 
 392 |   You've seen that  \{3\}  and  *  can be used to indicate repetition of a
 393 |   single character. Later, you'll see how the same syntax can be used to
 394 |   repeat entire subexpressions. There are several other ways to specify a
 395 |   repetition, as shown in |pattern-overview|.
 396 | 
 397 |   NOTE: You no longer need explicit instructions on how to experiment with
 398 |         the patterns in this tutorial. All future search lessons will assume
 399 |         that you use the patterns provided to experiment with and learn from.
 400 |         Test data is available inside each excercise block showing expected
 401 |         matchable text. Where appropriate, false matches and missed text are
 402 |         also provided to emphasise the current learning point.
 403 | 
 404 |   =========================================================================
 405 |   *10* : Find  all five and six letter words >
 406 | 
 407 |      /\<\w\{5,6\}\>
 408 | 
 409 | <    OR >
 410 | 
 411 |      /\v<\w{5,6}>
 412 | <
 413 |      I am a Bear of Very Little Brain, and long words Bother Me.
 414 |   =========================================================================
 415 | 
 416 |   =========================================================================
 417 |   *11* : Find  ten digit phone numbers >
 418 | 
 419 |      /\<\d\{3\}\s\d\{3\}-\d\{4\}
 420 | 
 421 | <    OR >
 422 | 
 423 |      /\v<\d{3}\s\d{3}-\d{4}
 424 | <
 425 |      For a good edit, call 846 968-7615
 426 |   =========================================================================
 427 | 
 428 |   =========================================================================
 429 |   *12* : Find  Social Security Number >
 430 | 
 431 |      /\d\{3\}-\d\{2\}-\d\{4\}
 432 | 
 433 | <    OR >
 434 | 
 435 |      /\v\d{3}-\d{2}-\d{4}
 436 | <
 437 |      111-21-1211
 438 |   =========================================================================
 439 | 
 440 |   =========================================================================
 441 |   *13.1* : Find  The first word in the line >
 442 | 
 443 |      /^\w\+
 444 | <
 445 | Fools to the left of me,
 446 |   jokers to the right,
 447 |   here I am stuck in the middle with Vim.
 448 |   =========================================================================
 449 |   =========================================================================
 450 |   *13.2* : Find  The first word in the line, ignoring leading whitespace >
 451 | 
 452 |      /^\s*\w\+
 453 | <
 454 | Fools to the left of me,
 455 |   jokers to the right,
 456 |   here I am stuck in the middle with Vim.
 457 |   =========================================================================
 458 |   =========================================================================
 459 |   *13.2* : Find  The first word in the line, ignoring AND skipping leading
 460 |                whitespace >
 461 | 
 462 |      /^\s*\zs\w\+
 463 | <
 464 | Fools to the left of me,
 465 |   jokers to the right,
 466 |   here I am stuck in the middle with Vim.
 467 | 
 468 |   NOTE: The special operator   \zs   is discussed in Lesson 5.2
 469 |   =========================================================================
 470 | 
 471 |   Exercise~
 472 | 
 473 |   Use the   $   anchor to match the last 'line' word on this line
 474 | 
 475 |     ANSWER {{{~
 476 |     /line$
 477 |     }}}
 478 | 
 479 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 480 |   Lesson 3.7: Character Classes~
 481 | 
 482 |   It is simple to find alphanumerics, digits, and whitespace, but what if we
 483 |   want to find anything from some other set of characters? This is easily done
 484 |   by listing the desired characters within square brackets. Thus, "[aeiou]"
 485 |   matches any vowel and "[.?!]" matches the punctuation at the end of a
 486 |   sentence. In this example, notice that the "." and "?" lose their special
 487 |   meanings within square brackets and are interpreted literally. We can also
 488 |   specify a range of characters, so "[a-z0-9]" means, "match any lowercase
 489 |   letter of the alphabet, or any digit".
 490 | 
 491 |   Let's try a more complicated expression that searches for telephone numbers.
 492 | 
 493 |   =========================================================================
 494 |   *14* : Find  A ten digit phone number >
 495 | 
 496 |      /(\?\d\{3\}[) ]\s\?\d\{3\}[- ]\d\{4\}
 497 | 
 498 | <    OR >
 499 | 
 500 |      /\v\(?\d{3}[) ]\s?\d{3}[- ]\d{4}
 501 | <
 502 |      (800) 325-3535
 503 |      650 555 1212
 504 |      650) 555-1212     (NOTE: Example of a false positive)
 505 |      Just dial (800)
 506 |      325-3535 now!     (NOTE: Example of a false negative)
 507 |   =========================================================================
 508 | 
 509 |   This expression will find phone numbers in several formats, like "(800)
 510 |   325-3535" or "650 555 1212". The  (\?  searches for zero or one left
 511 |   parentheses,  [) ]  searches for a right parenthesis or a space. The  \s\?
 512 |   searches for zero or one whitespace characters. Unfortunately, it will also
 513 |   find cases like "650) 555-1212" in which the parenthesis is not balanced.
 514 |   Below, you'll see how to use alternatives to eliminate this problem. Another
 515 |   problem you might have noticed is that, by default in Vim, searches do not
 516 |   span across multiple lines. This is evident in the first phone number above:
 517 |   (800) 325-3535, which matches on this line, but not above because it's
 518 |   split over two lines. Solutions to this problem will also be shown below.
 519 | 
 520 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 521 |   Lesson 3.8: Negation~
 522 | 
 523 |   Sometimes we need to search for a character that is NOT a member of an
 524 |   easily defined class of characters. The following table shows how this can
 525 |   be specified:
 526 | 
 527 |     \W         Match any character that is NOT alphanumeric
 528 | 
 529 |     \S         Match any character that is NOT whitespace
 530 | 
 531 |     \D         Match any character that is NOT a digit
 532 | 
 533 |     [^x]       Match any character that is NOT x
 534 | 
 535 |     [^aeiou]   Match any character that is NOT one of the characters aeiou
 536 | 
 537 |   =========================================================================
 538 |   *15* : Find  All strings that do not contain whitespace characters >
 539 | 
 540 |      /\S\+
 541 | <
 542 |      Blessed are those who in the name of Vim edit righteously.
 543 |   =========================================================================
 544 | 
 545 |   Later, we'll see how to use "lookahead" (|/\@=| , |/\@!|) and "lookbehind"
 546 |   (|/\@<=| , |/\@<!|) to search for the absence of more complex patterns.
 547 | 
 548 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 549 |   Lesson 3.9: Alternatives~
 550 | 
 551 |   To select between several alternatives, allowing a match if either one is
 552 |   satisfied, use the pipe  |  symbol to separate the alternatives. For
 553 |   example, Zip Codes come in two flavors, one with 5 digits, the other with 9
 554 |   digits and a hyphen. We can find either with this expression:
 555 | 
 556 |   =========================================================================
 557 |   *16* : Find  Nine and five digit Zip Codes >
 558 | 
 559 |      /\<\d\{5\}-\d\{4\}\>\|\<\d\{5\}\>
 560 | 
 561 | <    OR >
 562 | 
 563 |      /\v<\d{5}-\d{4}>|<\d{5}>
 564 | <
 565 |      12345-1234
 566 |      12345
 567 |      123-456           (NOTE: non-zip codes don't match)
 568 |   =========================================================================
 569 | 
 570 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 571 |   Lesson 3.10: Order of Alternatives is Important~
 572 | 
 573 |   When using alternatives, the order is important since the matching algorithm
 574 |   will attempt to match the leftmost alternative first. If the order is
 575 |   reversed in this example, the expression will only find the 5 digit Zip
 576 |   Codes and fail to find the 9 digit ones.
 577 | 
 578 |   =========================================================================
 579 |   Try it: >
 580 |      /\v<\d{5}>|<\d{5}-\d{4}>
 581 | <
 582 |           12345        (NOTE: This still matches...
 583 |           12345-1234          ...but this does not)
 584 |   =========================================================================
 585 | 
 586 |   We can use alternatives to improve the expression for ten digit phone
 587 |   numbers, allowing the area code to appear either delimited by whitespace or
 588 |   parenthesis:
 589 | 
 590 |   =========================================================================
 591 |   *17.1* : Find  Ten digit phone numbers, a better way >
 592 | 
 593 |      /\((\d\{3\})\|\d\{3\}\)\s\?\d\{3\}[- ]\d\{4\}
 594 | 
 595 | <    OR >
 596 | 
 597 |      /\v(\(\d{3}\)|\d{3})\s?\d{3}[- ]\d{4}
 598 | <
 599 |      (800) 325-3535
 600 |      650 555 1212
 601 |      650) 555-1212     (NOTE: Badly formatted numbers no longer match)
 602 |      Just dial (800)
 603 |      325-3535 now!     (NOTE: Numbers split over a line still fail to match)
 604 |   =========================================================================
 605 | 
 606 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 607 |   Lesson 3.11: Newlines are NOT Matched by Default~
 608 | 
 609 |   That fixes the problem of accidentally matching badly formatted phone
 610 |   numbers, but not the problem of phone numbers split over a line. Let's fix
 611 |   that problem now. Vim limits regex matches to a single line by default. Put
 612 |   another way, Vim -excludes- newlines in matches by default. To override
 613 |   this, we need to use the special   |/\_|   modifiers which -include-
 614 |   newlines in the match. As an example, use   |/\_s|   to capture whitespace
 615 |   (like   \s   ) including newlines. Let's use   \_s   to fix our phone number
 616 |   problem:
 617 | 
 618 |   =========================================================================
 619 |   *17.2* : Find  Ten digit phone numbers, an even better way (with newlines) >
 620 | 
 621 |      /\((\d\{3\})\|\d\{3\}\)\_s*\d\{3\}\_s*-\?\_s*\d\{4\}
 622 | 
 623 | <    OR >
 624 | 
 625 |      /\v(\(\d{3}\)|\d{3})\_s*\d{3}\_[- ]?\d{4}
 626 | <
 627 |      (800) 325-3535
 628 |      650 555 1212
 629 |      650) 555-1212     (NOTE: Badly formatted numbers still don't match)
 630 |      Just dial (800)
 631 |      325-3535 now!     (NOTE: Numbers split over a line now match correctly)
 632 |   =========================================================================
 633 | 
 634 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 635 |   Lesson 4.1: Introducing Grouping~
 636 | 
 637 |   Parentheses may be used to delimit a subexpression to allow repetition or
 638 |   other special treatment. For example:
 639 | 
 640 |   =========================================================================
 641 |   *18* : Find  A simple IP address >
 642 | 
 643 |      /\(\d\{1,3\}\.\)\{3\}\d\{1,3\}
 644 | 
 645 | <    OR >
 646 | 
 647 |      /\v(\d{1,3}\.){3}\d{1,3}
 648 | <
 649 |      192.168.1.1
 650 |      127.0.0.1
 651 |      3.142             (NOTE: non-IP addresses are not matched)
 652 |      1.2.3
 653 |      999.999.999.999   (NOTE: Invalid IP addresses are falsely matched)
 654 |   =========================================================================
 655 | 
 656 |   The first part of the expression searches for a one to three digit number
 657 |   followed by a literal period  .  . This is enclosed in parentheses and
 658 |   repeated three times using the  \{3\}  quantifier, followed by the same
 659 |   expression without the trailing period.
 660 | 
 661 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 662 |   Lesson 4.2: Groups within Groups~
 663 | 
 664 |   Unfortunately, this example allows IP addresses with arbitrary one, two, or
 665 |   three digit numbers separated by periods even though a valid IP address
 666 |   cannot have numbers larger than 255. It would be nice to arithmetically
 667 |   compare a captured number N to enforce N<256, but this is not possible with
 668 |   regular expressions alone. The next example tests various alternatives based
 669 |   on the starting digits to guarantee the limited range of numbers by pattern
 670 |   matching. This shows that an expression can become cumbersome even when
 671 |   looking for a pattern that is simple to describe.
 672 | 
 673 |   =========================================================================
 674 |   *19* : Find  IP addresses (more accurately) >
 675 | 
 676 |      /\(\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\)\.\)\{3\}\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\)
 677 | <
 678 |      OR >
 679 | 
 680 |      /\v((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)
 681 | <
 682 |      192.168.1.1
 683 |      127.0.0.1
 684 |      3.142             (NOTE: non-IP addresses are not matched)
 685 |      1.2.3
 686 |      999.999.999.999   (NOTE: Invalid IP addresses are not matched)
 687 |   =========================================================================
 688 | 
 689 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 690 |   Lesson 4.3: Backreferences~
 691 | 
 692 |   When subexpressions are grouped with parentheses, the text that matches the
 693 |   subexpression is available further along in the regular expression itself.
 694 |   Groups are numbered sequentially as encountered in reading from left to
 695 |   right, starting with 1.
 696 | 
 697 |   A "backreference" is used to search for a recurrence of previously matched
 698 |   text that has been captured by a group. For example,  \1  means, "match the
 699 |   text that was captured by group 1". Here is an example:
 700 | 
 701 |   =========================================================================
 702 |   *20* : Find  repeated words >
 703 | 
 704 |      /\<\(\w\+\)\>\s*\1\>
 705 | 
 706 |      OR
 707 | 
 708 |      /\v<(\w+)>\s*\1>
 709 | <
 710 |      It wasn't that that was impossible.
 711 |   =========================================================================
 712 | 
 713 |   This works by capturing a string of at least one alphanumeric character
 714 |   within group 1  \(\w\+\)  , but only if it begins and ends a word. It then
 715 |   looks for any amount of whitespace  \s*  followed by a repetition of the
 716 |   captured text  \1  ending at the end of a word.
 717 | 
 718 | 
 719 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 720 |   Lesson 5.1: Captures~
 721 | 
 722 |   In the followng examples, the Vim regex snippet is given first followed by
 723 |   the equivalent Perl Compatible Regular Expression (PCRE) for those familiar
 724 |   with that style of regular expressions. If you're not familiar with PCRE,
 725 |   don't worry as you will not require a knowledge of it to complete this
 726 |   tutorial or use Vim.
 727 | 
 728 |   NOTE: Some of Vim's way of handling certain regex features differs from
 729 |         PCRE. Some people complain about this and wonder why Vim didn't just
 730 |         use the already existing PCRE way. The reason is that Vim started
 731 |         getting some of these things at the same time Perl did, or even
 732 |         beforehand. So, it's not that Vim decided to flout history and Go Its
 733 |         Own Way just to be difficult.  Respect that Vim -can- do these things
 734 |         and has been able to do so for a long time now; and suck it up and
 735 |         learn Vim's way. :-)
 736 | 
 737 |   1. Match exp and capture it in an automatically numbered group:
 738 | 
 739 |   Vim:  \(exp\)
 740 |   PCRE: (exp)
 741 | 
 742 |   2. Match exp, but do not capture it:
 743 | 
 744 |   Vim:  \%(exp\)
 745 |   PCRE: (?:exp)
 746 | 
 747 |   The  \%(exp\)  form does not alter the matching behavior, it just doesn't
 748 |   capture it in a group like the  \(exp\)  form.
 749 | 
 750 |   The next four are so-called lookahead or lookbehind assertions. They look for
 751 |   things that go before or after the current match without including them in the
 752 |   match. It is important to understand that these expressions match a position
 753 |   like  ^  or  \<  and never match any text. For this reason, they are known as
 754 |   "zero-width assertions".
 755 | 
 756 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 757 |   Lesson 5.2: Positive lookahead~
 758 | 
 759 |   Matches with zero width if \(exp\) matches at the current position
 760 | 
 761 |   Vim:  \(exp\)\@=
 762 |   PCRE: (?=exp)
 763 | 
 764 |   \(exp\)\@=  is the "zero-width positive lookahead assertion". It matches a
 765 |   position in the text that precedes a given suffix expression, but doesn't
 766 |   include the suffix in the match:
 767 | 
 768 |   =========================================================================
 769 |   *22* : Find  the beginning of words ending with "ing" >
 770 | 
 771 |      /\<\w\+\(ing\>\)\@=
 772 | 
 773 | <    OR >
 774 | 
 775 |      /\v<\w+(ing>)@=
 776 | <
 777 |      Beguiling
 778 |      Alluringly
 779 | 
 780 |      NOTE: Vim provides two other very powerful regex operators:  |/\zs|  and
 781 |            |/\ze|  which can be used in many similar situations as the
 782 |            lookaround operators. The pattern above can also be expressed as: >
 783 | 
 784 |      /\ze\w\+ing\>
 785 | <
 786 |      Pleasing
 787 |      Obligingly
 788 |   =========================================================================
 789 | 
 790 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 791 |   Lesson 5.3: Positive lookbehind~
 792 | 
 793 |   Matches with zero width if \(exp\) matches just before what follows:
 794 | 
 795 |   Vim:  \(exp\)\@<=
 796 |   PCRE: (?<=exp)
 797 | 
 798 |   \(exp\)\@<=  is the "zero-width positive lookbehind assertion". It matches
 799 |   the position following a prefix, but doesn't include the prefix in the
 800 |   match:
 801 | 
 802 |   =========================================================================
 803 |   *23* : Find  The end of words starting with "re" >
 804 | 
 805 |      /\(\<re\)\@<=\w\+\>
 806 | 
 807 | <   OR (using the |/\zs| atom:) >
 808 | 
 809 |     /\<re\zs\w\+\>
 810 | <
 811 |      Remember, regular repetition results in ready reflexes.
 812 |   =========================================================================
 813 | 
 814 |   Here is an example that could be used repeatedly to insert commas into
 815 |   numbers in groups of three digits:
 816 | 
 817 |   =========================================================================
 818 |   *24* : Find  Three digits at the end of a word, preceded by a digit >
 819 | 
 820 |      /\d\@<=\d\{3\}\>
 821 | 
 822 | <    OR >
 823 | 
 824 |      /\v\d@<=\d{3}>
 825 | <
 826 |      123456789
 827 |      123456,789
 828 |      123,456,789       (NOTE: This number correctly doesn't match)
 829 |   =========================================================================
 830 | 
 831 |   Here is an example that looks for both a prefix and a suffix:
 832 | 
 833 |   =========================================================================
 834 |   *25* : Find  Alphanumeric strings bounded by whitespace >
 835 | 
 836 |      /\s\@<=\w\+\s\@=
 837 | <
 838 |      It's right here.
 839 |   =========================================================================
 840 | 
 841 |   Quiz~
 842 | 
 843 |   Use the pattern in example |24| to add commas to the following number:
 844 | 
 845 | ---> 12345678901
 846 | 
 847 |      ANSWER {{{~
 848 |      Use the following substitution command when your cursor is on the
 849 |      line above marked ---> :   :s/\v\d@<=\d{3}>/,&/
 850 |      You will need to execute it three times to add all the necessary commas.
 851 |      Tip: You can use the   &   key in normal mode to re-execute the last
 852 |      substitution.
 853 |      }}}
 854 | 
 855 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 856 |   Lesson 5.4: Negative lookahead~
 857 | 
 858 |   Matches with zero width if \(exp\) does NOT match at the current position
 859 | 
 860 |   Vim:  \(exp\)\@!
 861 |   PCRE: (?!exp)
 862 | 
 863 |   Earlier, we saw how to search for a character that is not a specific
 864 |   character or the member of a character class. What if we simply want to
 865 |   verify that a character is not present, but don't want to match anything?
 866 |   For example, what if we are searching for words in which the letter "q" is
 867 |   not followed by the letter "u"? We could try:
 868 | 
 869 |   =========================================================================
 870 |   *26* : Find  Words with "q" followed by NOT "u" >
 871 | 
 872 |      /\<\w*q[^u]\w*\>
 873 | <
 874 |      Iqaluit is the capital of Nunavut and Canada's coolest arctic city.
 875 |      QWERTY put the "q" in Compaq but not Iraq.
 876 |      Inqorrectly spelled!
 877 |   =========================================================================
 878 | 
 879 |   Run the example and you will see that it fails when "q" is the last letter
 880 |   of a word, as in "Compaq". This is because  [^u]  always matches a character.
 881 |   If "q" is the last character of the word, it will match the whitespace
 882 |   character that follows, so in the example the expression ends up matching
 883 |   two whole words. Negative lookaround solves this problem because it matches
 884 |   a position and does not consume any text. As with positive lookaround, it
 885 |   can also be used to match the position of an arbitrarily complex
 886 |   subexpression, rather than just a single character. We can now do a better
 887 |   job:
 888 | 
 889 |   =========================================================================
 890 |   *27* : Find  words with "q" not followed by "u" >
 891 | 
 892 |      /\<\w*qu\@!\w*\>
 893 | <
 894 |      Iqaluit is the capital of Nunavut and Canada's coolest arctic city.
 895 |      QWERTY put the "q" in Compaq but not Iraq.
 896 |      Inqorrectly spelled!
 897 |   =========================================================================
 898 | 
 899 |   We used the "zero-width negative lookahead assertion",  \(exp\)\@!  , which
 900 |   succeeds only if the suffix "exp" is not present. Here is another example:
 901 | 
 902 |   =========================================================================
 903 |   *28* : Find  Three digits not followed by another digit >
 904 | 
 905 |      /\d\{3\}\d\@!
 906 | <
 907 |      123
 908 |      123A
 909 |      123 456
 910 |      1234              (NOTE: Matches the -last- three digits, perhaps
 911 |      123456                   surprisingly! How would you force a match
 912 |                               of three digits only?
 913 |                                 ANSWER {{{~
 914 |                                 /\<\d\{3\}\d\@!
 915 |                                 }}}
 916 |                        )
 917 |   =========================================================================
 918 | 
 919 | 
 920 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 921 |   Lesson 5.5: Negative lookbehind~
 922 | 
 923 |   Matches with zero width if \(exp\) matches just before what follows:
 924 | 
 925 |   Vim:  \(exp\)\@<!
 926 |   PCRE: (?<!exp)
 927 | 
 928 |   Similarly, we can use  \(exp\)\@<!  , the "zero-width negative lookbehind
 929 |   assertion", to search for a position in the text at which the prefix "exp"
 930 |   is not present:
 931 | 
 932 |   =========================================================================
 933 |   *29* : Find  Strings of 7 alphanumerics not preceded by a letter or space >
 934 | 
 935 |      /[a-z ]\@<!\w\{7\}
 936 | 
 937 |      OR, if you are not ignoring case (:help 'ignorecase')
 938 | 
 939 |      /[a-zA-Z ]\@<!\w\{7\}
 940 | 
 941 |      OR
 942 | 
 943 |      /\c[a-z ]\@<!\w\{7\}
 944 | <
 945 |      :Vimmers of Penzance: I am the very model of a modern modal editor.
 946 |   =========================================================================
 947 | 
 948 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 949 |   Lesson 5.6: Mixed Lookaround~
 950 | 
 951 |   Here is one more example using lookaround:
 952 | 
 953 |   NOTE: For this type of backreferencing, you will need to use Vim's old
 954 |   regex engine. Read   :help 'regexpengine'   for an explanation. To enable
 955 |   the old regex engine, do:   :set regexpengine=1
 956 | 
 957 |   =========================================================================
 958 |   *30* : Find  Text between HTML tags >
 959 | 
 960 |      /\%(<\1>\)\@<=.*\%(<\/\(\w\+\)>\)\@=
 961 | 
 962 |      OR
 963 | 
 964 |      /\v%(\<\1\>)@<=.*%(\<\/(\w+)\>)@=
 965 | 
 966 |      OR
 967 | 
 968 |      /<\(\w\+\).\{-}>\zs.*\ze<\/\1>
 969 | <
 970 |      <level>Can I play, daddy?</level>
 971 |   =========================================================================
 972 | 
 973 |   This searches for the corresponding closing HTML tag using positive
 974 |   lookbehind and the opening original tag using positive lookahead, thus
 975 |   capturing the intervening text but excluding both tags.
 976 | 
 977 |   NOTE: If you read that last sentence and felt there was something backwards
 978 |         about it... you're right. The part of the pattern after  \@<=  and
 979 |         \@<!  are checked for a match first, thus things like  \1  don't work
 980 |         to reference  \(\)  inside the preceding atom. It does work the other
 981 |         way around as illustrated in the pattern above.
 982 | 
 983 |         Bram was surprised that this pattern actually works at all and
 984 |         suggested that it is probably an indication of bugs in the backreferencing
 985 |         regexp engine. He also kindly provided the much simpler (and more
 986 |         efficient) regexp using the much preferred \zs and \ze atoms:
 987 | 
 988 |             /<\(\w\+\).\{-}>\zs.*\ze<\/\1>
 989 | 
 990 |         In practice, the \zs and \ze atoms are almost always the better
 991 |         choice.
 992 | 
 993 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 994 |   Lesson 6.1: Greedy and Lazy~
 995 | 
 996 |   When a regular expression has a quantifier that can accept a range of
 997 |   repetitions, like  .*  , the normal behavior is to match as many characters
 998 |   as possible. Consider the following regular expression:
 999 | 
1000 |   =========================================================================
1001 |   *32* : Find  The longest string starting with a and ending with b >
1002 | 
1003 |      /a.*b
1004 | <
1005 |      aabab
1006 |   =========================================================================
1007 | 
1008 |   If this is used to search the string "aabab", it will match the entire
1009 |   string "aabab". This is called "greedy" matching. Sometimes, we prefer
1010 |   "lazy" matching in which a match using the minimum number of repetitions is
1011 |   found. The quantifiers we've been playing with so far can all be turned into
1012 |   "lazy" quantifiers by replacing the quantifier with a  \{-}  form. See
1013 |   |/\{-| for the specific details. Thus  \{-}  means "match any number of
1014 |   repetitions, but use the smallest number of repetitions that still leads to
1015 |   a successful match". Now let's try the lazy version of example |32|:
1016 | 
1017 |   NOTE: The  \{-}  form is equivalent to PCRE: *?
1018 | 
1019 |   =========================================================================
1020 |   *33* : Find  The shortest string starting with a and ending with b >
1021 | 
1022 |      /a.\{-}b
1023 | <
1024 |      aabab
1025 | 
1026 |      NOTE: If you press  n  after running this search, you will notice that it
1027 |            first matched "aab" and then "ab".
1028 |   =========================================================================
1029 | 
1030 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1031 |   Lesson 6.2: Vim's Lazy Operators:~
1032 | 
1033 |   NOTE: See the manual sections |/\{| and |non-greedy| for more details.
1034 | 
1035 |   Repeat any number of times, but as few as possible:
1036 |   Vim:  \{-}
1037 |   PCRE: *?
1038 | 
1039 |   Repeat one or more times, but as few as possible:
1040 |   Vim:  \{-1,}
1041 |   PCRE: +?
1042 | 
1043 |   Repeat zero or one time, but as few as possible:
1044 |   Vim:  \{-,1}
1045 |   PCRE: ??
1046 | 
1047 |   Repeat at least n, but no more than m times, but as few as possible:
1048 |   Vim:  {-n,m}
1049 |   PCRE: {n,m}?
1050 | 
1051 |   Repeat at least n times, but as few as possible:
1052 |   Vim:  {-n,}
1053 |   PCRE: {n,}?
1054 | 
1055 | 
1056 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1057 |   Exercise 1.1: Inserting thousands separators into numbers.~
1058 | 
1059 |   Visually select the following 10 lines of numbers and write a regular
1060 |   expression that inserts a thousands separator (such as ,) to make them read
1061 |   correctly as shown in the set below.
1062 | 
1063 |      1
1064 |      12
1065 |      123
1066 |      1234
1067 |      12345
1068 |      123456
1069 |      1234567
1070 |      12345678
1071 |      123456789
1072 |      1234567890
1073 | 
1074 |      -=-=-
1075 | 
1076 |      1
1077 |      12
1078 |      123
1079 |      1,234
1080 |      12,345
1081 |      123,456
1082 |      1,234,567
1083 |      12,345,678
1084 |      123,456,789
1085 |      1,234,567,890
1086 | <
1087 | 
1088 |   SOLUTION >
1089 |      :'<,'>s/\d\@<=\(\(\d\{3}\)\+\d\@!\)\@=/,/g
1090 | <
1091 | 
1092 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1093 |   Appendix 1: VimRegEx - a Vim Regex Visualisation Tool~
1094 | 
1095 |   The VimRegEx plugin is a tool which attempts to graphically show you the
1096 |   various portions of your regular expression and the parts of a sample text
1097 |   it matches. Using a tool like this can quickly help you to craft complicated
1098 |   regular expressions with more ease and confidence.
1099 | 
1100 |   Install VimRegEx from:
1101 |     * http://www.vim.org/scripts/script.php?script_id=1091     OR
1102 |     * use the Vim Addon Manager:
1103 |         http://www.vim.org/scripts/script.php?script_id=2905
1104 | 
1105 | 
1106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1107 |   This concludes the Vim Regex Tutor.  It was intended to give a brief
1108 |   overview of regular expressions in the Vim editor - just enough to allow you
1109 |   to start exploring the wonderful and powerful world of regexes on your own.
1110 |   It is far from complete as Vim has many many more regex features.  To learn
1111 |   more about regexes in Vim, see  |'pattern.txt'|
1112 | 
1113 |   If you want to read a book, I suggest the latest edition of:
1114 | 
1115 |     Mastering Regular Expressions, by Jeffrey Friedl.
1116 | 
1117 |   This tutorial is a rework by Barry Arthur of the original .Net version by
1118 |   Jim Hollenhorst at:
1119 | 
1120 |     http://www.codeproject.com/KB/dotnet/regextutorial.aspx
1121 | 
1122 |   The layout is based on the original  vimtutor  by Michael C. Pierce and
1123 |   Robert K. Ware.
1124 | 
1125 |   Thanks to the following people for prviding feedback, fixing bugs and
1126 |   offering suggestions to improve VimRegexTutor: Ben Fritz, Israel Chauca.
1127 | 
1128 |   Licencensed under the same terms as Vim itself.
1129 | 
1130 |   Send mistakes and suggestions to barry.arthur@gmail.com or register an Issue
1131 |   at https://github.com/dahu/VimRegexTutor
1132 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1133 | 
1134 | # vim:tw=78:ts=8:ft=help:norl:noro:hlsearch:ignorecase fdm=marker
1135 | 


--------------------------------------------------------------------------------
/tutor/regextutor.vim:
--------------------------------------------------------------------------------
  1 | " Vim regextutor support file
  2 | " Maintainer:  Barry Arthur <barry.arthur@gmail.com>
  3 | " Last Change: 2011 Jul 22
  4 | 
  5 | " NOTE: This was shamelessly stolen from Eduardo F. Amatria's
  6 | " (<eferna1@platea.pntic.mec.es>) original tutor.vim.
  7 | " None of the language varients exist for vimregextutor yet, but Eduardo's
  8 | " code was left here in the vain hope that one day it would be useful. :)
  9 | 
 10 | " This Vim script is used for detecting if a translation of the
 11 | " regextutor file exists, i.e., a regextutor.xx file, where xx is the
 12 | " language.  If the translation does not exist, or no extension is given, it
 13 | " defaults to the English version.
 14 | 
 15 | " It is invoked by the vimregextutor shell script.
 16 | 
 17 | " 1. Build the extension of the file, if any:
 18 | let s:ext = ""
 19 | if strlen($xx) > 1
 20 |   let s:ext = "." . $xx
 21 | else
 22 |   let s:lang = ""
 23 |   " Check that a potential value has at least two letters.
 24 |   " Ignore "1043" and "C".
 25 |   if exists("v:lang") && v:lang =~ '\a\a'
 26 |     let s:lang = v:lang
 27 |   elseif $LC_ALL =~ '\a\a'
 28 |     let s:lang = $LC_ALL
 29 |   elseif $LANG =~ '\a\a'
 30 |     let s:lang = $LANG
 31 |   endif
 32 |   if s:lang != ""
 33 |     " Remove "@euro" (ignoring case), it may be at the end
 34 |     let s:lang = substitute(s:lang, '\c@euro', '', '')
 35 |     " On MS-Windows it may be German_Germany.1252 or Polish_Poland.1250.  How
 36 |     " about other languages?
 37 |     if s:lang =~ "German"
 38 |       let s:ext = ".de"
 39 |     elseif s:lang =~ "Polish"
 40 |       let s:ext = ".pl"
 41 |     elseif s:lang =~ "Slovak"
 42 |       let s:ext = ".sk"
 43 |     elseif s:lang =~ "Czech"
 44 |       let s:ext = ".cs"
 45 |     elseif s:lang =~ "Dutch"
 46 |       let s:ext = ".nl"
 47 |     else
 48 |       let s:ext = "." . strpart(s:lang, 0, 2)
 49 |     endif
 50 |   endif
 51 | endif
 52 | 
 53 | " Somehow ".ge" (Germany) is sometimes used for ".de" (Deutsch).
 54 | if s:ext =~? '\.ge'
 55 |   let s:ext = ".de"
 56 | endif
 57 | 
 58 | if s:ext =~? '\.en'
 59 |   let s:ext = ""
 60 | endif
 61 | 
 62 | " The japanese regextutor is available in two encodings, guess which one to use
 63 | " The "sjis" one is actually "cp932", it doesn't matter for this text.
 64 | if s:ext =~? '\.ja'
 65 |   if &enc =~ "euc"
 66 |     let s:ext = ".ja.euc"
 67 |   elseif &enc != "utf-8"
 68 |     let s:ext = ".ja.sjis"
 69 |   endif
 70 | endif
 71 | 
 72 | " The korean regextutor is available in two encodings, guess which one to use
 73 | if s:ext =~? '\.ko'
 74 |   if &enc != "utf-8"
 75 |     let s:ext = ".ko.euc"
 76 |   endif
 77 | endif
 78 | 
 79 | " The Chinese regextutor is available in two encodings, guess which one to use
 80 | " This segment is from the above lines and modified by
 81 | " Mendel L Chan <beos@turbolinux.com.cn> for Chinese vim regextutorial
 82 | if s:ext =~? '\.zh'
 83 |   if &enc =~ 'big5\|cp950'
 84 |     let s:ext = ".zh.big5"
 85 |   elseif &enc != 'utf-8'
 86 |     let s:ext = ".zh.euc"
 87 |   endif
 88 | endif
 89 | 
 90 | " The Polish regextutor is available in two encodings, guess which one to use.
 91 | if s:ext =~? '\.pl'
 92 |   if &enc =~ 1250
 93 |     let s:ext = ".pl.cp1250"
 94 |   endif
 95 | endif
 96 | 
 97 | " The Turkish regextutor is available in two encodings, guess which one to use
 98 | if s:ext =~? '\.tr'
 99 |   if &enc == "iso-8859-9"
100 |     let s:ext = ".tr.iso9"
101 |   endif
102 | endif
103 | 
104 | " The Greek regextutor is available in three encodings, guess what to use.
105 | " We used ".gr" (Greece) instead of ".el" (Greek); accept both.
106 | if s:ext =~? '\.gr\|\.el'
107 |   if &enc == "iso-8859-7"
108 |     let s:ext = ".el"
109 |   elseif &enc == "utf-8"
110 |     let s:ext = ".el.utf-8"
111 |   elseif &enc =~ 737
112 |     let s:ext = ".el.cp737"
113 |   endif
114 | endif
115 | 
116 | " The Slovak regextutor is available in three encodings, guess which one to use
117 | if s:ext =~? '\.sk'
118 |   if &enc =~ 1250
119 |     let s:ext = ".sk.cp1250"
120 |   endif
121 | endif
122 | 
123 | " The Czech regextutor is available in three encodings, guess which one to use
124 | if s:ext =~? '\.cs'
125 |   if &enc =~ 1250
126 |     let s:ext = ".cs.cp1250"
127 |   endif
128 | endif
129 | 
130 | " The Russian regextutor is available in three encodings, guess which one to use.
131 | if s:ext =~? '\.ru'
132 |   if &enc =~ '1251'
133 |     let s:ext = '.ru.cp1251'
134 |   elseif &enc =~ 'koi8'
135 |     let s:ext = '.ru'
136 |   endif
137 | endif
138 | 
139 | " The Hungarian regextutor is available in three encodings, guess which one to use.
140 | if s:ext =~? '\.hu'
141 |   if &enc =~ 1250
142 |     let s:ext = ".hu.cp1250"
143 |   elseif &enc =~ 'iso-8859-2'
144 |     let s:ext = '.hu'
145 |   endif
146 | endif
147 | 
148 | " The Croatian regextutor is available in three encodings, guess which one to use.
149 | if s:ext =~? '\.hr'
150 |   if &enc =~ 1250
151 |     let s:ext = ".hr.cp1250"
152 |   elseif &enc =~ 'iso-8859-2'
153 |     let s:ext = '.hr'
154 |   endif
155 | endif
156 | 
157 | " Esperanto is only available in utf-8
158 | if s:ext =~? '\.eo'
159 |   let s:ext = ".eo.utf-8"
160 | endif
161 | " Vietnamese is only available in utf-8
162 | if s:ext =~? '\.vi'
163 |   let s:ext = ".vi.utf-8"
164 | endif
165 | 
166 | " If 'encoding' is utf-8 s:ext must end in utf-8.
167 | if &enc == 'utf-8' && s:ext !~ '\.utf-8'
168 |   let s:ext .= '.utf-8'
169 | endif
170 | 
171 | " 2. Build the name of the file:
172 | let s:regextutorfile = "tutor/regextutor"
173 | let s:regextutorxx = findfile(s:regextutorfile . s:ext, &rtp)
174 | 
175 | " 3. Finding the file:
176 | if filereadable(s:regextutorxx)
177 |   echo "wtf"
178 |   let $REGEXTUTOR = s:regextutorxx
179 | else
180 |   echo "wtf2"
181 |   let $REGEXTUTOR = findfile(s:regextutorfile, &rtp)
182 |   echo "The file " . s:regextutorxx . " does not exist.\n"
183 |   echo "Copying English version: " . $REGEXTUTOR
184 |   4sleep
185 | endif
186 | 
187 | " 4. Making the copy and exiting Vim:
188 | e $REGEXTUTOR
189 | wq! $REGEXTUTORCOPY
190 | 


--------------------------------------------------------------------------------
/vimgor:
--------------------------------------------------------------------------------
1 | pcre-tutor is <reply>Lea Verou's Demystifying Regular Expressions ( http://www.youtube.com/watch?v=EkluES9Rvak ) is a good introduction to PCRE.
2 | 
3 | regtutor is <reply>The command :VimRegexTutor provides a Vim style regular expression tutorial in the same manner as vimtutor ::: https://github.com/dahu/VimRegexTutor . Also, http://vimregex.com/ is a reasonable online tutorial. See pcre-tutor
4 | 


--------------------------------------------------------------------------------