├── README.asciidoc ├── bin └── vimregextutor ├── plugin └── vimregextutor.vim ├── tutor ├── regextutor ├── regextutor.utf-8 └── regextutor.vim └── vimgor /README.asciidoc: -------------------------------------------------------------------------------- 1 | == VimRegexTutor 2 | 3 | __A "hands on" regular expression tutorial for users of the Vim editor.__ 4 | 5 | TIP: If you like VimRegexTutor and want to share the W00t!, I'm grateful for 6 | https://www.gittip.com/bairuidahu/[tips] or 7 | http://of-vim-and-vigor.blogspot.com/[beverages]. 8 | 9 | [horizontal] 10 | **Project Page** :: https://github.com/dahu/VimRegexTutor 11 | **Maintainer** :: Barry Arthur 12 | **Status** :: Beta, feedback welcome 13 | **Adapted From** :: http://www.codeproject.com/Articles/9099/The-30-Minute-Regex-Tutorial 14 | 15 | Most new users can get through it in less than one hour. The result is that you 16 | can use simple regular expressions using the Vim editor. 17 | 18 | === `:VimRegexTutor` 19 | 20 | The `:VimRegextutor` command will open a new tab containing an editable copy of 21 | the tutorial. Have at it! 22 | 23 | === Old School 24 | 25 | `regextutor` is a file that contains the tutorial lessons. You can simply 26 | execute `vim regextutor` and then follow the instructions in the lessons. The 27 | lessons tell you to modify the file, so *DON'T DO THIS ON YOUR ORIGINAL COPY*. 28 | 29 | On Unix you can also use the `vimregextutor` program. It will make a scratch 30 | copy of the tutor first. Currently, this program needs to be manually installed 31 | into a directory in your PATH. Assuming you used pathogen to install 32 | VimRegexTutor, you could do something like: 33 | 34 | cd 35 | mkdir -p bin 36 | ln -s $HOME/.vim/bundle/VimRegexTutor/bin/vimregextutor $HOME/bin/vimregextutor 37 | 38 | **NOTE:** This is a once-off setup and won't need to be repeated after 39 | upgrading VimRegexTutor. 40 | -------------------------------------------------------------------------------- /bin/vimregextutor: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | # Start Vim on a copy of the regextutor file. 4 | 5 | # Usage: vimregextutor [-g] [xx] 6 | # Where optional argument -g starts vimtutor in gvim (GUI) instead of vim. 7 | # and xx is a language code like "es" or "nl". 8 | # When an argument is given, it tries loading that tutor. 9 | # When this fails or no argument was given, it tries using 'v:lang' 10 | # When that also fails, it uses the English version. 11 | 12 | # Vim could be called "vim" or "vi". Also check for "vimN", for people who 13 | # have Vim installed with its version number. 14 | # We anticipate up to a future Vim 8 version :-). 15 | seq="vim vim8 vim75 vim74 vim73 vim72 vim71 vim70 vim7 vim6 vi" 16 | if test "$1" = "-g"; then 17 | # Try to use the GUI version of Vim if possible, it will fall back 18 | # on Vim if Gvim is not installed. 19 | seq="gvim gvim8 gvim75 gvim74 gvim73 gvim72 gvim71 gvim70 gvim7 gvim6 $seq" 20 | shift 21 | fi 22 | 23 | xx=$1 24 | export xx 25 | 26 | # We need a temp file for the copy. First try using a standard command. 27 | tmp="${TMPDIR-/tmp}" 28 | REGEXTUTORCOPY=`mktemp $tmp/regextutorXXXXXX || tempfile -p regextutor || echo none` 29 | 30 | # If the standard commands failed then create a directory to put the copy in. 31 | # That is a secure way to make a temp file. 32 | if test "$REGEXTUTORCOPY" = none; then 33 | tmpdir=$tmp/vimregextutor$$ 34 | OLD_UMASK=`umask` 35 | umask 077 36 | getout=no 37 | mkdir $tmpdir || getout=yes 38 | umask $OLD_UMASK 39 | if test $getout = yes; then 40 | echo "Could not create directory for regextutor copy, exiting." 41 | exit 1 42 | fi 43 | REGEXTUTORCOPY=$tmpdir/tutorcopy 44 | touch $REGEXTUTORCOPY 45 | TODELETE=$tmpdir 46 | else 47 | TODELETE=$REGEXTUTORCOPY 48 | fi 49 | 50 | export REGEXTUTORCOPY 51 | 52 | # remove the copy of the tutor on exit 53 | trap "rm -rf $TODELETE" 0 1 2 3 9 11 13 15 54 | 55 | for i in $seq; do 56 | testvim=`which $i 2>/dev/null` 57 | if test -f "$testvim"; then 58 | VIM=$i 59 | break 60 | fi 61 | done 62 | 63 | # When no Vim version was found fall back to "vim", you'll get an error message 64 | # below. 65 | if test -z "$VIM"; then 66 | VIM=vim 67 | fi 68 | 69 | # Use Vim to copy the tutor, it knows the value of $VIMRUNTIME 70 | # The script regextutor.vim tells Vim which file to copy 71 | $VIM -f -c 'ru tutor/regextutor.vim' 72 | 73 | # Start vim 74 | $VIM -f $REGEXTUTORCOPY 75 | -------------------------------------------------------------------------------- /plugin/vimregextutor.vim: -------------------------------------------------------------------------------- 1 | " Vim global plugin for learning and practicing Vim style regular expressions 2 | " Maintainer: Barry Arthur 3 | " Version: 0.1 4 | " License: Vim License (see :help license) 5 | " Location: plugin/vimregextutor.vim 6 | " Website: https://github.com/dahu/vimregextutor 7 | " 8 | " See vimregextutor.txt for help. This can be accessed by doing: 9 | " 10 | " :helptags ~/.vim/doc 11 | " :help vimregextutor 12 | 13 | " Vimscript Setup: {{{1 14 | " Allow use of line continuation. 15 | let s:save_cpo = &cpo 16 | set cpo&vim 17 | 18 | "if exists("g:loaded_vimregextutor") 19 | " \ || v:version < 700 20 | " \ || v:version == 703 && !has('patch338') 21 | " \ || &compatible 22 | " let &cpo = s:save_cpo 23 | " finish 24 | "endif 25 | let g:loaded_vimregextutor = 1 26 | 27 | let s:script_file = expand(':p:h:h') 28 | 29 | " Public Interface: {{{1 30 | function! VimRegexTutor() 31 | tabnew 32 | setlocal buftype=nofile 33 | setlocal bufhidden=hide 34 | setlocal noswapfile 35 | call setline(1, readfile(s:script_file . '/tutor/regextutor')) 36 | 1 37 | endfunction 38 | 39 | " Commands: {{{1 40 | command! -nargs=0 -bar VimRegexTutor call VimRegexTutor() 41 | command! -nargs=0 -bar RegexTutor call VimRegextutor() 42 | 43 | " Teardown: {{{1 44 | " reset &cpo back to users setting 45 | let &cpo = s:save_cpo 46 | 47 | " Template From: https://github.com/dahu/Area-41/ 48 | " vim: set sw=2 sts=2 et fdm=marker: 49 | -------------------------------------------------------------------------------- /tutor/regextutor: -------------------------------------------------------------------------------- 1 | =============================================================================== 2 | = W e l c o m e t o t h e V I M R e g e x T u t o r - Version 0.2 = 3 | =============================================================================== 4 | 5 | Vim is a very powerful editor with a very powerful search and replace 6 | system based on Regular Expressions. This tutor is designed to describe 7 | enough of those features that you will be able to more powerfully use 8 | Vim as an all-purpose editor. 9 | 10 | The approximate time required to complete the tutor is 30 minutes, 11 | depending upon how much time is spent with experimentation. 12 | 13 | ATTENTION:~ 14 | The commands in the lessons will modify the text. Make a copy of this 15 | file to practise on (if you started "vimregextutor" this is already a 16 | copy). 17 | 18 | It is important to remember that this tutor is set up to teach by use. 19 | That means that you need to execute the commands to learn them 20 | properly. If you only read the text, you will forget the commands! 21 | 22 | If you haven't already completed the vimtutor, it is highly recommended 23 | that you do so first before attempting this tutorial. 24 | 25 | Occasional references to the Vim documentation are made throughout this 26 | tutorial. Such entries look like this: |'ignorecase'|. To open those 27 | entries, type (in normal mode - so press to leave insert mode 28 | first) :help followed by the exact entry. In this case, you would 29 | type: :help 'ignorecase' 30 | 31 | What the Heck is a Regular Expression Anyway?~ 32 | 33 | I'm sure you are familiar with the use of "wildcard" characters for 34 | pattern matching. For example, if you want to find all the text files 35 | in a directory, you search for "*.txt", knowing that the asterisk is 36 | interpreted as a wildcard that can match any sequence of characters. 37 | Regular expressions are just an elaborate extension of this capability. 38 | 39 | When manipulating text, it is frequently necessary to locate strings 40 | that match complex patterns. Regular expressions were invented to 41 | describe such patterns. Thus, a regular expression is just a shorthand 42 | code for a pattern. For example, the pattern \w\+ is a concise way to 43 | say "match any non-null strings of alphanumeric characters". Vim 44 | provides a rich and powerful regular expression vocabulary with which 45 | you can readily and efficiently search and replace text. 46 | 47 | A good way to learn the arcane syntax of regular expressions is by 48 | starting with examples and then experimenting with your own creations. 49 | This tutorial introduces the basics of regular expressions, giving many 50 | common examples. The additional Vim plugin, regexcoach, can be used to 51 | try out the examples and to experiment with your own regular 52 | expressions. 53 | 54 | NOTE: All of the exercises in this tutorial will use plain (non-|magic|-al) 55 | regular expressions. Frequently a very-magic (using the |\v| option) 56 | equivalent is shown alongside the original. The purpose of magic and 57 | the \v option is explained in Lesson 2.3. 58 | 59 | Let's get started! 60 | 61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 62 | Lesson 1.1: Searching for Elvis~ 63 | 64 | Suppose you spend all your free time scanning documents looking for evidence 65 | that Elvis is still alive. You could search with the following regular 66 | expression: 67 | 68 | ========================================================================= 69 | *1* : Find elvis > 70 | 71 | /elvis 72 | < ========================================================================= 73 | 74 | This is a perfectly valid regular expression that searches for an exact 75 | sequence of characters. In Vim, you can set the |'ignorecase'| option 76 | to ignore the case of characters, so this expression will match 77 | "Elvis", "ELVIS", or "eLvIs". Unfortunately, it will also match the 78 | last five letters of the word "pelvis". 79 | 80 | Your Turn~ 81 | 82 | 1. Make sure you have search highlighting enabled: :set hlsearch 83 | 84 | 2. Go to the line below marked ---> 85 | 86 | 3. type /elvis 87 | 88 | 4. Confirm that the letters "elvis" are highlighted 89 | 90 | ---> a) Rare are the sightings of elves, as are those of elvis himself. 91 | b) The pelvis of elvis was seldom still. 92 | 93 | NOTE: You can use n to move to the next match and :nohl to clear 94 | the search highlight. 95 | 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 97 | Lesson 1.2: Limiting the search to a whole word~ 98 | 99 | Did you notice how the word "pelvis" is partially matched with the 100 | search /elvis ? We can improve the expression as follows: 101 | 102 | ========================================================================= 103 | *2* : Find elvis as a whole word > 104 | 105 | /\ 106 | < ========================================================================= 107 | 108 | Now things are getting a little more interesting. The \< is a special code 109 | that means, "match the position at the beginning of any word". Likewise, \> 110 | means "match the position at the end of any word". This expression will only 111 | match complete words spelled "elvis" with any combination of lower case or 112 | capital letters (if |'ignorecase'| is enabled). 113 | 114 | Your Turn~ 115 | 116 | 1. Go to the line below marked ---> 117 | 118 | 2. type /\ 119 | 120 | 3. Confirm that only the word "elvis" (and not "pelvis") is highlighted 121 | 122 | ---> a) Rare are the sightings of elves, as are those of elvis himself. 123 | b) The pelvis of elvis was seldom still. 124 | 125 | Did you notice how the word "pelvis" is now NOT matched with this 126 | enhanced regex? 127 | 128 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 129 | Lesson 1.3: Searching for everything between two words~ 130 | 131 | Suppose you want to find all lines in which the word "elvis" is followed by 132 | the word "alive." The period or dot . is a special code that matches any 133 | character other than a newline. The asterisk * means repeat the previous 134 | term as many times as necessary to guarantee a match. Thus, .* means 135 | "match any number of characters other than newline". It is now a simple 136 | matter to build an expression that means 'search for the word "elvis" 137 | followed anywhere thereafter on the same line by the word "alive"'. 138 | 139 | ========================================================================= 140 | *3* : Find elvis followed by anything and then followed by alive > 141 | 142 | /\.*\ 143 | < ========================================================================= 144 | 145 | With just a few special characters we are beginning to build powerful 146 | regular expressions and they are already becoming hard for we humans to 147 | read. 148 | 149 | Your Turn~ 150 | 151 | 1. Go to the line below marked ---> 152 | 153 | 2. type /\.*\ 154 | 155 | 3. Confirm that everything between "elvis" and "alive" is highlighted 156 | 157 | ---> Rare are the sightings of elvis, more so those of him being alive. 158 | 159 | 160 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 161 | Lesson 2.1: Determining the Validity of Phone Numbers~ 162 | 163 | Suppose you're editing a file which contains seven-digit phone numbers 164 | and you want to verify that the phone numbers are in the correct format, 165 | "xxx-xxxx", where each "x" is a digit. The following expression will 166 | search through text looking for such a string: 167 | 168 | ========================================================================= 169 | *4* : Find seven-digit phone number > 170 | 171 | /\<\d\d\d-\d\d\d\d 172 | < ========================================================================= 173 | 174 | Your Turn~ 175 | 176 | 1. Go to the line below marked ---> 177 | 178 | 2. type /\<\d\d\d-\d\d\d\d 179 | 180 | 3. Confirm that the seven-digit phone number is highlighted 181 | 182 | ---> 123-4567 183 | 184 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 185 | Lesson 2.2: Specifying repetition more concisely~ 186 | 187 | Each \d means "match any single digit". The "-" has no special 188 | meaning and is interpreted literally, matching a hyphen. To avoid the 189 | annoying repetition, we can use a shorthand notation that means the 190 | same thing: 191 | 192 | ========================================================================= 193 | *5.1* : Find seven-digit phone number (more concisely) > 194 | 195 | /\<\d\{3\}-\d\{4\} 196 | < ========================================================================= 197 | 198 | Your Turn~ 199 | 200 | 1. Go to the line below marked ---> 201 | 202 | 2. type /\<\d\{3\}-\d\{4\} 203 | 204 | 3. Confirm that the seven-digit phone number is highlighted 205 | 206 | ---> 123-4567 207 | 208 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 209 | Lesson 2.3: It's Magic~ 210 | 211 | The \{3\} following the \d means "repeat the preceding character 212 | three times". The { and } in that expression need to be escaped 213 | with \ which can quickly become annoying for large expressions. Vim 214 | uses the \m regex pattern to enable |'magic'| mode, which helps to 215 | reduce the amount of \ escaping needed in some patterns. 216 | Additionally, Vim also provides the \v pattern to enable "very magic" 217 | mode, even further reducing the need to escape certain elements. 218 | Using magic, the previous pattern becomes: 219 | 220 | ========================================================================= 221 | *5.2* : Find seven-digit phone number (using magic for better 222 | readability) > 223 | 224 | /\m\<\d\{3}-\d\{4} 225 | < ========================================================================= 226 | 227 | OR, with very magic: 228 | 229 | ========================================================================= 230 | *5.3* : Find seven-digit phone number (using very magic for even 231 | better readability) > 232 | 233 | /\v<\d{3}-\d{4} 234 | < ========================================================================= 235 | 236 | Your Turn~ 237 | 238 | 1. Go to the line below marked ---> 239 | 240 | 2. type /\v<\d{3}-\d{4} 241 | 242 | 3. Confirm that the seven-digit phone number is highlighted 243 | 244 | ---> 123-4567 245 | 246 | 247 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 248 | Lesson 3.1: Special Characters: *~ 249 | 250 | You should get to know a few characters with special meaning. You already 251 | met \< . * and \d . To match any whitespace characters, like spaces and 252 | tabs, use \s . Similarly, \w matches any alphanumeric character, and \n 253 | matches newlines. The special form \_s to match spaces, tabs AND 254 | newlines is discussed in Lesson 3.11. 255 | 256 | Let's try a few more examples: 257 | 258 | ========================================================================= 259 | *6* : Find words that start with the letter a > 260 | 261 | /\ 262 | < ========================================================================= 263 | 264 | Your Turn~ 265 | 266 | 1. Go to the line below marked ---> 267 | 268 | 2. type /\ 269 | 270 | 3. Confirm that all of the words starting with "a" are highlighted 271 | 272 | ---> an apple a day keeps the aardvarks away 273 | 274 | This works by searching for the beginning of a word \< , then the letter 275 | "a", then any number of repetitions of alphanumeric characters \w* , then 276 | the end of a word \> . 277 | 278 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 279 | Lesson 3.2: Special Characters: \+~ 280 | 281 | ========================================================================= 282 | *7* : Find repeated strings of digits > 283 | 284 | /\d\+ 285 | < ========================================================================= 286 | 287 | Here, the \+ is similar to * , except it requires at least one 288 | repetition. 289 | 290 | Your Turn~ 291 | 292 | 1. Go to the line below marked ---> 293 | 294 | 2. type /\d\+ 295 | 296 | 3. Confirm that all of the digits and only the digits are highlighted 297 | 298 | ---> apple 1234 5678 900,000 1.23 13:45 %^@# 299 | 300 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 301 | Lesson 3.3: Six Letter Words~ 302 | 303 | ========================================================================= 304 | *8* : Find six letter words > 305 | 306 | /\<\w\{6\}\> 307 | 308 | < OR, using very-magic: > 309 | 310 | /\v<\w{6}> 311 | < ========================================================================= 312 | 313 | Your Turn~ 314 | 315 | 1. Go to the line below marked ---> 316 | 317 | 2. Type /\<\w\{6\}\> 318 | 319 | 3. Confirm that the word "attend" is highlighted 320 | 321 | ---> Happy times and fine edits attend on thee. 322 | 323 | Start experimenting by inventing your own expressions. See 324 | |pattern-overview|, |ordinary-atom|, and |character-classes| for Vim's 325 | special regular expression characters. 326 | 327 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 328 | Lesson 3.4: In the beginning~ 329 | 330 | The special characters ^ and $ are used when looking for something that 331 | must start at the beginning of the line and end at the end of the line, 332 | respectively. 333 | This is especially useful for matching exact text. For example, to find a 334 | line containing only a seven-digit phone number, you might use: 335 | 336 | ========================================================================= 337 | *9* : Find a seven-digit phone number on a line by itself > 338 | 339 | /^\d\{3\}-\d\{4\}$ 340 | 341 | < OR > 342 | 343 | /^\v\d{3}-\d{4}$ 344 | < ========================================================================= 345 | 346 | This is the same as example [|5|], but anchored to the whole line, 347 | with nothing else before or after the matched text. This start of line 348 | character must be placed at the start of the regex pattern, otherwise it 349 | will represent a literal "^". The special character \_^ matches a newline 350 | at any position in the regex. 351 | 352 | Your Turn~ 353 | 354 | 1. Go to the line below marked ---> 355 | 356 | 2. Type /^\d\{3\}-\d\{4\}$ 357 | 358 | 3. Confirm that the line with only a seven-digit phone number is highlighted 359 | 360 | ---> 361 | My phone number is 123-4567, or 362 | 123-4568 after hours. 363 | 123-4567 364 | 365 | Did you notice that even though the first two lines contained a seven-digit 366 | phone number, they were not matched because they contained other characters? 367 | 368 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 369 | Lesson 3.5: Escaped characters~ 370 | 371 | A problem occurs if you actually want to match one of the special 372 | characters, like ^ or $ . Use the backslash to remove the special 373 | meaning. Thus, \^ , \. , and \\ match the literal characters "^", 374 | ".", and "\" respectively. 375 | 376 | Your Turn~ 377 | 378 | 1. Go to the line below marked ---> 379 | 380 | 2. Type /\^_\^ 381 | 382 | 3. Confirm that the smiley "^_^" is highlighted 383 | 384 | ---> ^_^ $_$ @_% *_# !_! ()_+ <>_{} 385 | 386 | Can you match the other smilies? Which of those other punctuation symbols 387 | need escaping and which do not? 388 | 389 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 390 | Lesson 3.6: Repetitions~ 391 | 392 | You've seen that \{3\} and * can be used to indicate repetition of a 393 | single character. Later, you'll see how the same syntax can be used to 394 | repeat entire subexpressions. There are several other ways to specify a 395 | repetition, as shown in |pattern-overview|. 396 | 397 | NOTE: You no longer need explicit instructions on how to experiment with 398 | the patterns in this tutorial. All future search lessons will assume 399 | that you use the patterns provided to experiment with and learn from. 400 | Test data is available inside each excercise block showing expected 401 | matchable text. Where appropriate, false matches and missed text are 402 | also provided to emphasise the current learning point. 403 | 404 | ========================================================================= 405 | *10* : Find all five and six letter words > 406 | 407 | /\<\w\{5,6\}\> 408 | 409 | < OR > 410 | 411 | /\v<\w{5,6}> 412 | < 413 | I am a Bear of Very Little Brain, and long words Bother Me. 414 | ========================================================================= 415 | 416 | ========================================================================= 417 | *11* : Find ten digit phone numbers > 418 | 419 | /\<\d\{3\}\s\d\{3\}-\d\{4\} 420 | 421 | < OR > 422 | 423 | /\v<\d{3}\s\d{3}-\d{4} 424 | < 425 | For a good edit, call 846 968-7615 426 | ========================================================================= 427 | 428 | ========================================================================= 429 | *12* : Find Social Security Number > 430 | 431 | /\d\{3\}-\d\{2\}-\d\{4\} 432 | 433 | < OR > 434 | 435 | /\v\d{3}-\d{2}-\d{4} 436 | < 437 | 111-21-1211 438 | ========================================================================= 439 | 440 | ========================================================================= 441 | *13.1* : Find The first word in the line > 442 | 443 | /^\w\+ 444 | < 445 | Fools to the left of me, 446 | jokers to the right, 447 | here I am stuck in the middle with Vim. 448 | ========================================================================= 449 | ========================================================================= 450 | *13.2* : Find The first word in the line, ignoring leading whitespace > 451 | 452 | /^\s*\w\+ 453 | < 454 | Fools to the left of me, 455 | jokers to the right, 456 | here I am stuck in the middle with Vim. 457 | ========================================================================= 458 | ========================================================================= 459 | *13.2* : Find The first word in the line, ignoring AND skipping leading 460 | whitespace > 461 | 462 | /^\s*\zs\w\+ 463 | < 464 | Fools to the left of me, 465 | jokers to the right, 466 | here I am stuck in the middle with Vim. 467 | 468 | NOTE: The special operator \zs is discussed in Lesson 5.2 469 | ========================================================================= 470 | 471 | Exercise~ 472 | 473 | Use the $ anchor to match the last 'line' word on this line 474 | 475 | ANSWER {{{~ 476 | /line$ 477 | }}} 478 | 479 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 480 | Lesson 3.7: Character Classes~ 481 | 482 | It is simple to find alphanumerics, digits, and whitespace, but what if we 483 | want to find anything from some other set of characters? This is easily done 484 | by listing the desired characters within square brackets. Thus, "[aeiou]" 485 | matches any vowel and "[.?!]" matches the punctuation at the end of a 486 | sentence. In this example, notice that the "." and "?" lose their special 487 | meanings within square brackets and are interpreted literally. We can also 488 | specify a range of characters, so "[a-z0-9]" means, "match any lowercase 489 | letter of the alphabet, or any digit". 490 | 491 | Let's try a more complicated expression that searches for telephone numbers. 492 | 493 | ========================================================================= 494 | *14* : Find A ten digit phone number > 495 | 496 | /(\?\d\{3\}[) ]\s\?\d\{3\}[- ]\d\{4\} 497 | 498 | < OR > 499 | 500 | /\v\(?\d{3}[) ]\s?\d{3}[- ]\d{4} 501 | < 502 | (800) 325-3535 503 | 650 555 1212 504 | 650) 555-1212 (NOTE: Example of a false positive) 505 | Just dial (800) 506 | 325-3535 now! (NOTE: Example of a false negative) 507 | ========================================================================= 508 | 509 | This expression will find phone numbers in several formats, like "(800) 510 | 325-3535" or "650 555 1212". The (\? searches for zero or one left 511 | parentheses, [) ] searches for a right parenthesis or a space. The \s\? 512 | searches for zero or one whitespace characters. Unfortunately, it will also 513 | find cases like "650) 555-1212" in which the parenthesis is not balanced. 514 | Below, you'll see how to use alternatives to eliminate this problem. Another 515 | problem you might have noticed is that, by default in Vim, searches do not 516 | span across multiple lines. This is evident in the first phone number above: 517 | (800) 325-3535, which matches on this line, but not above because it's 518 | split over two lines. Solutions to this problem will also be shown below. 519 | 520 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 521 | Lesson 3.8: Negation~ 522 | 523 | Sometimes we need to search for a character that is NOT a member of an 524 | easily defined class of characters. The following table shows how this can 525 | be specified: 526 | 527 | \W Match any character that is NOT alphanumeric 528 | 529 | \S Match any character that is NOT whitespace 530 | 531 | \D Match any character that is NOT a digit 532 | 533 | [^x] Match any character that is NOT x 534 | 535 | [^aeiou] Match any character that is NOT one of the characters aeiou 536 | 537 | ========================================================================= 538 | *15* : Find All strings that do not contain whitespace characters > 539 | 540 | /\S\+ 541 | < 542 | Blessed are those who in the name of Vim edit righteously. 543 | ========================================================================= 544 | 545 | Later, we'll see how to use "lookahead" (|/\@=| , |/\@!|) and "lookbehind" 546 | (|/\@<=| , |/\@ 558 | 559 | /\<\d\{5\}-\d\{4\}\>\|\<\d\{5\}\> 560 | 561 | < OR > 562 | 563 | /\v<\d{5}-\d{4}>|<\d{5}> 564 | < 565 | 12345-1234 566 | 12345 567 | 123-456 (NOTE: non-zip codes don't match) 568 | ========================================================================= 569 | 570 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 571 | Lesson 3.10: Order of Alternatives is Important~ 572 | 573 | When using alternatives, the order is important since the matching algorithm 574 | will attempt to match the leftmost alternative first. If the order is 575 | reversed in this example, the expression will only find the 5 digit Zip 576 | Codes and fail to find the 9 digit ones. 577 | 578 | ========================================================================= 579 | Try it: > 580 | /\v<\d{5}>|<\d{5}-\d{4}> 581 | < 582 | 12345 (NOTE: This still matches... 583 | 12345-1234 ...but this does not) 584 | ========================================================================= 585 | 586 | We can use alternatives to improve the expression for ten digit phone 587 | numbers, allowing the area code to appear either delimited by whitespace or 588 | parenthesis: 589 | 590 | ========================================================================= 591 | *17.1* : Find Ten digit phone numbers, a better way > 592 | 593 | /\((\d\{3\})\|\d\{3\}\)\s\?\d\{3\}[- ]\d\{4\} 594 | 595 | < OR > 596 | 597 | /\v(\(\d{3}\)|\d{3})\s?\d{3}[- ]\d{4} 598 | < 599 | (800) 325-3535 600 | 650 555 1212 601 | 650) 555-1212 (NOTE: Badly formatted numbers no longer match) 602 | Just dial (800) 603 | 325-3535 now! (NOTE: Numbers split over a line still fail to match) 604 | ========================================================================= 605 | 606 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 607 | Lesson 3.11: Newlines are NOT Matched by Default~ 608 | 609 | That fixes the problem of accidentally matching badly formatted phone 610 | numbers, but not the problem of phone numbers split over a line. Let's fix 611 | that problem now. Vim limits regex matches to a single line by default. Put 612 | another way, Vim -excludes- newlines in matches by default. To override 613 | this, we need to use the special |/\_| modifiers which -include- 614 | newlines in the match. As an example, use |/\_s| to capture whitespace 615 | (like \s ) including newlines. Let's use \_s to fix our phone number 616 | problem: 617 | 618 | ========================================================================= 619 | *17.2* : Find Ten digit phone numbers, an even better way (with newlines) > 620 | 621 | /\((\d\{3\})\|\d\{3\}\)\_s*\d\{3\}\_s*-\?\_s*\d\{4\} 622 | 623 | < OR > 624 | 625 | /\v(\(\d{3}\)|\d{3})\_s*\d{3}\_[- ]?\d{4} 626 | < 627 | (800) 325-3535 628 | 650 555 1212 629 | 650) 555-1212 (NOTE: Badly formatted numbers still don't match) 630 | Just dial (800) 631 | 325-3535 now! (NOTE: Numbers split over a line now match correctly) 632 | ========================================================================= 633 | 634 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 635 | Lesson 4.1: Introducing Grouping~ 636 | 637 | Parentheses may be used to delimit a subexpression to allow repetition or 638 | other special treatment. For example: 639 | 640 | ========================================================================= 641 | *18* : Find A simple IP address > 642 | 643 | /\(\d\{1,3\}\.\)\{3\}\d\{1,3\} 644 | 645 | < OR > 646 | 647 | /\v(\d{1,3}\.){3}\d{1,3} 648 | < 649 | 192.168.1.1 650 | 127.0.0.1 651 | 3.142 (NOTE: non-IP addresses are not matched) 652 | 1.2.3 653 | 999.999.999.999 (NOTE: Invalid IP addresses are falsely matched) 654 | ========================================================================= 655 | 656 | The first part of the expression searches for a one to three digit number 657 | followed by a literal period . . This is enclosed in parentheses and 658 | repeated three times using the \{3\} quantifier, followed by the same 659 | expression without the trailing period. 660 | 661 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 662 | Lesson 4.2: Groups within Groups~ 663 | 664 | Unfortunately, this example allows IP addresses with arbitrary one, two, or 665 | three digit numbers separated by periods even though a valid IP address 666 | cannot have numbers larger than 255. It would be nice to arithmetically 667 | compare a captured number N to enforce N<256, but this is not possible with 668 | regular expressions alone. The next example tests various alternatives based 669 | on the starting digits to guarantee the limited range of numbers by pattern 670 | matching. This shows that an expression can become cumbersome even when 671 | looking for a pattern that is simple to describe. 672 | 673 | ========================================================================= 674 | *19* : Find IP addresses (more accurately) > 675 | 676 | /\(\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\)\.\)\{3\}\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\) 677 | < 678 | OR > 679 | 680 | /\v((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?) 681 | < 682 | 192.168.1.1 683 | 127.0.0.1 684 | 3.142 (NOTE: non-IP addresses are not matched) 685 | 1.2.3 686 | 999.999.999.999 (NOTE: Invalid IP addresses are not matched) 687 | ========================================================================= 688 | 689 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 690 | Lesson 4.3: Backreferences~ 691 | 692 | When subexpressions are grouped with parentheses, the text that matches the 693 | subexpression is available further along in the regular expression itself. 694 | Groups are numbered sequentially as encountered in reading from left to 695 | right, starting with 1. 696 | 697 | A "backreference" is used to search for a recurrence of previously matched 698 | text that has been captured by a group. For example, \1 means, "match the 699 | text that was captured by group 1". Here is an example: 700 | 701 | ========================================================================= 702 | *20* : Find repeated words > 703 | 704 | /\<\(\w\+\)\>\s*\1\> 705 | 706 | OR 707 | 708 | /\v<(\w+)>\s*\1> 709 | < 710 | It wasn't that that was impossible. 711 | ========================================================================= 712 | 713 | This works by capturing a string of at least one alphanumeric character 714 | within group 1 \(\w\+\) , but only if it begins and ends a word. It then 715 | looks for any amount of whitespace \s* followed by a repetition of the 716 | captured text \1 ending at the end of a word. 717 | 718 | 719 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 720 | Lesson 5.1: Captures~ 721 | 722 | In the followng examples, the Vim regex snippet is given first followed by 723 | the equivalent Perl Compatible Regular Expression (PCRE) for those familiar 724 | with that style of regular expressions. If you're not familiar with PCRE, 725 | don't worry as you will not require a knowledge of it to complete this 726 | tutorial or use Vim. 727 | 728 | NOTE: Some of Vim's way of handling certain regex features differs from 729 | PCRE. Some people complain about this and wonder why Vim didn't just 730 | use the already existing PCRE way. The reason is that Vim started 731 | getting some of these things at the same time Perl did, or even 732 | beforehand. So, it's not that Vim decided to flout history and Go Its 733 | Own Way just to be difficult. Respect that Vim -can- do these things 734 | and has been able to do so for a long time now; and suck it up and 735 | learn Vim's way. :-) 736 | 737 | 1. Match exp and capture it in an automatically numbered group: 738 | 739 | Vim: \(exp\) 740 | PCRE: (exp) 741 | 742 | 2. Match exp, but do not capture it: 743 | 744 | Vim: \%(exp\) 745 | PCRE: (?:exp) 746 | 747 | The \%(exp\) form does not alter the matching behavior, it just doesn't 748 | capture it in a group like the \(exp\) form. 749 | 750 | The next four are so-called lookahead or lookbehind assertions. They look for 751 | things that go before or after the current match without including them in the 752 | match. It is important to understand that these expressions match a position 753 | like ^ or \< and never match any text. For this reason, they are known as 754 | "zero-width assertions". 755 | 756 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 757 | Lesson 5.2: Positive lookahead~ 758 | 759 | Matches with zero width if \(exp\) matches at the current position 760 | 761 | Vim: \(exp\)\@= 762 | PCRE: (?=exp) 763 | 764 | \(exp\)\@= is the "zero-width positive lookahead assertion". It matches a 765 | position in the text that precedes a given suffix expression, but doesn't 766 | include the suffix in the match: 767 | 768 | ========================================================================= 769 | *22* : Find the beginning of words ending with "ing" > 770 | 771 | /\<\w\+\(ing\>\)\@= 772 | 773 | < OR > 774 | 775 | /\v<\w+(ing>)@= 776 | < 777 | Beguiling 778 | Alluringly 779 | 780 | NOTE: Vim provides two other very powerful regex operators: |/\zs| and 781 | |/\ze| which can be used in many similar situations as the 782 | lookaround operators. The pattern above can also be expressed as: > 783 | 784 | /\ze\w\+ing\> 785 | < 786 | Pleasing 787 | Obligingly 788 | ========================================================================= 789 | 790 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 791 | Lesson 5.3: Positive lookbehind~ 792 | 793 | Matches with zero width if \(exp\) matches just before what follows: 794 | 795 | Vim: \(exp\)\@<= 796 | PCRE: (?<=exp) 797 | 798 | \(exp\)\@<= is the "zero-width positive lookbehind assertion". It matches 799 | the position following a prefix, but doesn't include the prefix in the 800 | match: 801 | 802 | ========================================================================= 803 | *23* : Find The end of words starting with "re" > 804 | 805 | /\(\ 806 | 807 | < OR (using the |/\zs| atom:) > 808 | 809 | /\ 810 | < 811 | Remember, regular repetition results in ready reflexes. 812 | ========================================================================= 813 | 814 | Here is an example that could be used repeatedly to insert commas into 815 | numbers in groups of three digits: 816 | 817 | ========================================================================= 818 | *24* : Find Three digits at the end of a word, preceded by a digit > 819 | 820 | /\d\@<=\d\{3\}\> 821 | 822 | < OR > 823 | 824 | /\v\d@<=\d{3}> 825 | < 826 | 123456789 827 | 123456,789 828 | 123,456,789 (NOTE: This number correctly doesn't match) 829 | ========================================================================= 830 | 831 | Here is an example that looks for both a prefix and a suffix: 832 | 833 | ========================================================================= 834 | *25* : Find Alphanumeric strings bounded by whitespace > 835 | 836 | /\s\@<=\w\+\s\@= 837 | < 838 | It's right here. 839 | ========================================================================= 840 | 841 | Quiz~ 842 | 843 | Use the pattern in example |24| to add commas to the following number: 844 | 845 | ---> 12345678901 846 | 847 | ANSWER {{{~ 848 | Use the following substitution command when your cursor is on the 849 | line above marked ---> : :s/\v\d@<=\d{3}>/,&/ 850 | You will need to execute it three times to add all the necessary commas. 851 | Tip: You can use the & key in normal mode to re-execute the last 852 | substitution. 853 | }}} 854 | 855 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 856 | Lesson 5.4: Negative lookahead~ 857 | 858 | Matches with zero width if \(exp\) does NOT match at the current position 859 | 860 | Vim: \(exp\)\@! 861 | PCRE: (?!exp) 862 | 863 | Earlier, we saw how to search for a character that is not a specific 864 | character or the member of a character class. What if we simply want to 865 | verify that a character is not present, but don't want to match anything? 866 | For example, what if we are searching for words in which the letter "q" is 867 | not followed by the letter "u"? We could try: 868 | 869 | ========================================================================= 870 | *26* : Find Words with "q" followed by NOT "u" > 871 | 872 | /\<\w*q[^u]\w*\> 873 | < 874 | Iqaluit is the capital of Nunavut and Canada's coolest arctic city. 875 | QWERTY put the "q" in Compaq but not Iraq. 876 | Inqorrectly spelled! 877 | ========================================================================= 878 | 879 | Run the example and you will see that it fails when "q" is the last letter 880 | of a word, as in "Compaq". This is because [^u] always matches a character. 881 | If "q" is the last character of the word, it will match the whitespace 882 | character that follows, so in the example the expression ends up matching 883 | two whole words. Negative lookaround solves this problem because it matches 884 | a position and does not consume any text. As with positive lookaround, it 885 | can also be used to match the position of an arbitrarily complex 886 | subexpression, rather than just a single character. We can now do a better 887 | job: 888 | 889 | ========================================================================= 890 | *27* : Find words with "q" not followed by "u" > 891 | 892 | /\<\w*qu\@!\w*\> 893 | < 894 | Iqaluit is the capital of Nunavut and Canada's coolest arctic city. 895 | QWERTY put the "q" in Compaq but not Iraq. 896 | Inqorrectly spelled! 897 | ========================================================================= 898 | 899 | We used the "zero-width negative lookahead assertion", \(exp\)\@! , which 900 | succeeds only if the suffix "exp" is not present. Here is another example: 901 | 902 | ========================================================================= 903 | *28* : Find Three digits not followed by another digit > 904 | 905 | /\d\{3\}\d\@! 906 | < 907 | 123 908 | 123A 909 | 123 456 910 | 1234 (NOTE: Matches the -last- three digits, perhaps 911 | 123456 surprisingly! How would you force a match 912 | of three digits only? 913 | ANSWER {{{~ 914 | /\<\d\{3\}\d\@! 915 | }}} 916 | ) 917 | ========================================================================= 918 | 919 | 920 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 921 | Lesson 5.5: Negative lookbehind~ 922 | 923 | Matches with zero width if \(exp\) matches just before what follows: 924 | 925 | Vim: \(exp\)\@ 934 | 935 | /[a-z ]\@ 959 | 960 | /\%(<\1>\)\@<=.*\%(<\/\(\w\+\)>\)\@= 961 | 962 | OR 963 | 964 | /\v%(\<\1\>)@<=.*%(\<\/(\w+)\>)@= 965 | 966 | OR 967 | 968 | /<\(\w\+\).\{-}>\zs.*\ze<\/\1> 969 | < 970 | Can I play, daddy? 971 | ========================================================================= 972 | 973 | This searches for the corresponding closing HTML tag using positive 974 | lookbehind and the opening original tag using positive lookahead, thus 975 | capturing the intervening text but excluding both tags. 976 | 977 | NOTE: If you read that last sentence and felt there was something backwards 978 | about it... you're right. The part of the pattern after \@<= and 979 | \@\zs.*\ze<\/\1> 989 | 990 | In practice, the \zs and \ze atoms are almost always the better 991 | choice. 992 | 993 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 994 | Lesson 6.1: Greedy and Lazy~ 995 | 996 | When a regular expression has a quantifier that can accept a range of 997 | repetitions, like .* , the normal behavior is to match as many characters 998 | as possible. Consider the following regular expression: 999 | 1000 | ========================================================================= 1001 | *32* : Find The longest string starting with a and ending with b > 1002 | 1003 | /a.*b 1004 | < 1005 | aabab 1006 | ========================================================================= 1007 | 1008 | If this is used to search the string "aabab", it will match the entire 1009 | string "aabab". This is called "greedy" matching. Sometimes, we prefer 1010 | "lazy" matching in which a match using the minimum number of repetitions is 1011 | found. The quantifiers we've been playing with so far can all be turned into 1012 | "lazy" quantifiers by replacing the quantifier with a \{-} form. See 1013 | |/\{-| for the specific details. Thus \{-} means "match any number of 1014 | repetitions, but use the smallest number of repetitions that still leads to 1015 | a successful match". Now let's try the lazy version of example |32|: 1016 | 1017 | NOTE: The \{-} form is equivalent to PCRE: *? 1018 | 1019 | ========================================================================= 1020 | *33* : Find The shortest string starting with a and ending with b > 1021 | 1022 | /a.\{-}b 1023 | < 1024 | aabab 1025 | 1026 | NOTE: If you press n after running this search, you will notice that it 1027 | first matched "aab" and then "ab". 1028 | ========================================================================= 1029 | 1030 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1031 | Lesson 6.2: Vim's Lazy Operators:~ 1032 | 1033 | NOTE: See the manual sections |/\{| and |non-greedy| for more details. 1034 | 1035 | Repeat any number of times, but as few as possible: 1036 | Vim: \{-} 1037 | PCRE: *? 1038 | 1039 | Repeat one or more times, but as few as possible: 1040 | Vim: \{-1,} 1041 | PCRE: +? 1042 | 1043 | Repeat zero or one time, but as few as possible: 1044 | Vim: \{-,1} 1045 | PCRE: ?? 1046 | 1047 | Repeat at least n, but no more than m times, but as few as possible: 1048 | Vim: {-n,m} 1049 | PCRE: {n,m}? 1050 | 1051 | Repeat at least n times, but as few as possible: 1052 | Vim: {-n,} 1053 | PCRE: {n,}? 1054 | 1055 | 1056 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1057 | Exercise 1.1: Inserting thousands separators into numbers.~ 1058 | 1059 | Visually select the following 10 lines of numbers and write a regular 1060 | expression that inserts a thousands separator (such as ,) to make them read 1061 | correctly as shown in the set below. 1062 | 1063 | 1 1064 | 12 1065 | 123 1066 | 1234 1067 | 12345 1068 | 123456 1069 | 1234567 1070 | 12345678 1071 | 123456789 1072 | 1234567890 1073 | 1074 | -=-=- 1075 | 1076 | 1 1077 | 12 1078 | 123 1079 | 1,234 1080 | 12,345 1081 | 123,456 1082 | 1,234,567 1083 | 12,345,678 1084 | 123,456,789 1085 | 1,234,567,890 1086 | < 1087 | 1088 | SOLUTION > 1089 | :'<,'>s/\d\@<=\(\(\d\{3}\)\+\d\@!\)\@=/,/g 1090 | < 1091 | 1092 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1093 | Appendix 1: VimRegEx - a Vim Regex Visualisation Tool~ 1094 | 1095 | The VimRegEx plugin is a tool which attempts to graphically show you the 1096 | various portions of your regular expression and the parts of a sample text 1097 | it matches. Using a tool like this can quickly help you to craft complicated 1098 | regular expressions with more ease and confidence. 1099 | 1100 | Install VimRegEx from: 1101 | * http://www.vim.org/scripts/script.php?script_id=1091 OR 1102 | * use the Vim Addon Manager: 1103 | http://www.vim.org/scripts/script.php?script_id=2905 1104 | 1105 | 1106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1107 | This concludes the Vim Regex Tutor. It was intended to give a brief 1108 | overview of regular expressions in the Vim editor - just enough to allow you 1109 | to start exploring the wonderful and powerful world of regexes on your own. 1110 | It is far from complete as Vim has many many more regex features. To learn 1111 | more about regexes in Vim, see |'pattern.txt'| 1112 | 1113 | If you want to read a book, I suggest the latest edition of: 1114 | 1115 | Mastering Regular Expressions, by Jeffrey Friedl. 1116 | 1117 | This tutorial is a rework by Barry Arthur of the original .Net version by 1118 | Jim Hollenhorst at: 1119 | 1120 | http://www.codeproject.com/KB/dotnet/regextutorial.aspx 1121 | 1122 | The layout is based on the original vimtutor by Michael C. Pierce and 1123 | Robert K. Ware. 1124 | 1125 | Thanks to the following people for prviding feedback, fixing bugs and 1126 | offering suggestions to improve VimRegexTutor: Ben Fritz, Israel Chauca. 1127 | 1128 | Licencensed under the same terms as Vim itself. 1129 | 1130 | Send mistakes and suggestions to barry.arthur@gmail.com or register an Issue 1131 | at https://github.com/dahu/VimRegexTutor 1132 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1133 | 1134 | # vim:tw=78:ts=8:ft=help:norl:noro:hlsearch:ignorecase fdm=marker 1135 | -------------------------------------------------------------------------------- /tutor/regextutor.utf-8: -------------------------------------------------------------------------------- 1 | =============================================================================== 2 | = W e l c o m e t o t h e V I M R e g e x T u t o r - Version 0.2 = 3 | =============================================================================== 4 | 5 | Vim is a very powerful editor with a very powerful search and replace 6 | system based on Regular Expressions. This tutor is designed to describe 7 | enough of those features that you will be able to more powerfully use 8 | Vim as an all-purpose editor. 9 | 10 | The approximate time required to complete the tutor is 30 minutes, 11 | depending upon how much time is spent with experimentation. 12 | 13 | ATTENTION:~ 14 | The commands in the lessons will modify the text. Make a copy of this 15 | file to practise on (if you started "vimregextutor" this is already a 16 | copy). 17 | 18 | It is important to remember that this tutor is set up to teach by use. 19 | That means that you need to execute the commands to learn them 20 | properly. If you only read the text, you will forget the commands! 21 | 22 | If you haven't already completed the vimtutor, it is highly recommended 23 | that you do so first before attempting this tutorial. 24 | 25 | Occasional references to the Vim documentation are made throughout this 26 | tutorial. Such entries look like this: |'ignorecase'|. To open those 27 | entries, type (in normal mode - so press to leave insert mode 28 | first) :help followed by the exact entry. In this case, you would 29 | type: :help 'ignorecase' 30 | 31 | What the Heck is a Regular Expression Anyway?~ 32 | 33 | I'm sure you are familiar with the use of "wildcard" characters for 34 | pattern matching. For example, if you want to find all the text files 35 | in a directory, you search for "*.txt", knowing that the asterisk is 36 | interpreted as a wildcard that can match any sequence of characters. 37 | Regular expressions are just an elaborate extension of this capability. 38 | 39 | When manipulating text, it is frequently necessary to locate strings 40 | that match complex patterns. Regular expressions were invented to 41 | describe such patterns. Thus, a regular expression is just a shorthand 42 | code for a pattern. For example, the pattern \w\+ is a concise way to 43 | say "match any non-null strings of alphanumeric characters". Vim 44 | provides a rich and powerful regular expression vocabulary with which 45 | you can readily and efficiently search and replace text. 46 | 47 | A good way to learn the arcane syntax of regular expressions is by 48 | starting with examples and then experimenting with your own creations. 49 | This tutorial introduces the basics of regular expressions, giving many 50 | common examples. The additional Vim plugin, regexcoach, can be used to 51 | try out the examples and to experiment with your own regular 52 | expressions. 53 | 54 | NOTE: All of the exercises in this tutorial will use plain (non-|magic|-al) 55 | regular expressions. Frequently a very-magic (using the |\v| option) 56 | equivalent is shown alongside the original. The purpose of magic and 57 | the \v option is explained in Lesson 2.3. 58 | 59 | Let's get started! 60 | 61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 62 | Lesson 1.1: Searching for Elvis~ 63 | 64 | Suppose you spend all your free time scanning documents looking for evidence 65 | that Elvis is still alive. You could search with the following regular 66 | expression: 67 | 68 | ========================================================================= 69 | *1* : Find elvis > 70 | 71 | /elvis 72 | < ========================================================================= 73 | 74 | This is a perfectly valid regular expression that searches for an exact 75 | sequence of characters. In Vim, you can set the |'ignorecase'| option 76 | to ignore the case of characters, so this expression will match 77 | "Elvis", "ELVIS", or "eLvIs". Unfortunately, it will also match the 78 | last five letters of the word "pelvis". 79 | 80 | Your Turn~ 81 | 82 | 1. Make sure you have search highlighting enabled: :set hlsearch 83 | 84 | 2. Go to the line below marked ---> 85 | 86 | 3. type /elvis 87 | 88 | 4. Confirm that the letters "elvis" are highlighted 89 | 90 | ---> a) Rare are the sightings of elves, as are those of elvis himself. 91 | b) The pelvis of elvis was seldom still. 92 | 93 | NOTE: You can use n to move to the next match and :nohl to clear 94 | the search highlight. 95 | 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 97 | Lesson 1.2: Limiting the search to a whole word~ 98 | 99 | Did you notice how the word "pelvis" is partially matched with the 100 | search /elvis ? We can improve the expression as follows: 101 | 102 | ========================================================================= 103 | *2* : Find elvis as a whole word > 104 | 105 | /\ 106 | < ========================================================================= 107 | 108 | Now things are getting a little more interesting. The \< is a special code 109 | that means, "match the position at the beginning of any word". Likewise, \> 110 | means "match the position at the end of any word". This expression will only 111 | match complete words spelled "elvis" with any combination of lower case or 112 | capital letters (if |'ignorecase'| is enabled). 113 | 114 | Your Turn~ 115 | 116 | 1. Go to the line below marked ---> 117 | 118 | 2. type /\ 119 | 120 | 3. Confirm that only the word "elvis" (and not "pelvis") is highlighted 121 | 122 | ---> a) Rare are the sightings of elves, as are those of elvis himself. 123 | b) The pelvis of elvis was seldom still. 124 | 125 | Did you notice how the word "pelvis" is now NOT matched with this 126 | enhanced regex? 127 | 128 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 129 | Lesson 1.3: Searching for everything between two words~ 130 | 131 | Suppose you want to find all lines in which the word "elvis" is followed by 132 | the word "alive." The period or dot . is a special code that matches any 133 | character other than a newline. The asterisk * means repeat the previous 134 | term as many times as necessary to guarantee a match. Thus, .* means 135 | "match any number of characters other than newline". It is now a simple 136 | matter to build an expression that means 'search for the word "elvis" 137 | followed anywhere thereafter on the same line by the word "alive"'. 138 | 139 | ========================================================================= 140 | *3* : Find elvis followed by anything and then followed by alive > 141 | 142 | /\.*\ 143 | < ========================================================================= 144 | 145 | With just a few special characters we are beginning to build powerful 146 | regular expressions and they are already becoming hard for we humans to 147 | read. 148 | 149 | Your Turn~ 150 | 151 | 1. Go to the line below marked ---> 152 | 153 | 2. type /\.*\ 154 | 155 | 3. Confirm that everything between "elvis" and "alive" is highlighted 156 | 157 | ---> Rare are the sightings of elvis, more so those of him being alive. 158 | 159 | 160 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 161 | Lesson 2.1: Determining the Validity of Phone Numbers~ 162 | 163 | Suppose you're editing a file which contains seven-digit phone numbers 164 | and you want to verify that the phone numbers are in the correct format, 165 | "xxx-xxxx", where each "x" is a digit. The following expression will 166 | search through text looking for such a string: 167 | 168 | ========================================================================= 169 | *4* : Find seven-digit phone number > 170 | 171 | /\<\d\d\d-\d\d\d\d 172 | < ========================================================================= 173 | 174 | Your Turn~ 175 | 176 | 1. Go to the line below marked ---> 177 | 178 | 2. type /\<\d\d\d-\d\d\d\d 179 | 180 | 3. Confirm that the seven-digit phone number is highlighted 181 | 182 | ---> 123-4567 183 | 184 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 185 | Lesson 2.2: Specifying repetition more concisely~ 186 | 187 | Each \d means "match any single digit". The "-" has no special 188 | meaning and is interpreted literally, matching a hyphen. To avoid the 189 | annoying repetition, we can use a shorthand notation that means the 190 | same thing: 191 | 192 | ========================================================================= 193 | *5.1* : Find seven-digit phone number (more concisely) > 194 | 195 | /\<\d\{3\}-\d\{4\} 196 | < ========================================================================= 197 | 198 | Your Turn~ 199 | 200 | 1. Go to the line below marked ---> 201 | 202 | 2. type /\<\d\{3\}-\d\{4\} 203 | 204 | 3. Confirm that the seven-digit phone number is highlighted 205 | 206 | ---> 123-4567 207 | 208 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 209 | Lesson 2.3: It's Magic~ 210 | 211 | The \{3\} following the \d means "repeat the preceding character 212 | three times". The { and } in that expression need to be escaped 213 | with \ which can quickly become annoying for large expressions. Vim 214 | uses the \m regex pattern to enable |'magic'| mode, which helps to 215 | reduce the amount of \ escaping needed in some patterns. 216 | Additionally, Vim also provides the \v pattern to enable "very magic" 217 | mode, even further reducing the need to escape certain elements. 218 | Using magic, the previous pattern becomes: 219 | 220 | ========================================================================= 221 | *5.2* : Find seven-digit phone number (using magic for better 222 | readability) > 223 | 224 | /\m\<\d\{3}-\d\{4} 225 | < ========================================================================= 226 | 227 | OR, with very magic: 228 | 229 | ========================================================================= 230 | *5.3* : Find seven-digit phone number (using very magic for even 231 | better readability) > 232 | 233 | /\v<\d{3}-\d{4} 234 | < ========================================================================= 235 | 236 | Your Turn~ 237 | 238 | 1. Go to the line below marked ---> 239 | 240 | 2. type /\v<\d{3}-\d{4} 241 | 242 | 3. Confirm that the seven-digit phone number is highlighted 243 | 244 | ---> 123-4567 245 | 246 | 247 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 248 | Lesson 3.1: Special Characters: *~ 249 | 250 | You should get to know a few characters with special meaning. You already 251 | met \< . * and \d . To match any whitespace characters, like spaces and 252 | tabs, use \s . Similarly, \w matches any alphanumeric character, and \n 253 | matches newlines. The special form \_s to match spaces, tabs AND 254 | newlines is discussed in Lesson 3.11. 255 | 256 | Let's try a few more examples: 257 | 258 | ========================================================================= 259 | *6* : Find words that start with the letter a > 260 | 261 | /\ 262 | < ========================================================================= 263 | 264 | Your Turn~ 265 | 266 | 1. Go to the line below marked ---> 267 | 268 | 2. type /\ 269 | 270 | 3. Confirm that all of the words starting with "a" are highlighted 271 | 272 | ---> an apple a day keeps the aardvarks away 273 | 274 | This works by searching for the beginning of a word \< , then the letter 275 | "a", then any number of repetitions of alphanumeric characters \w* , then 276 | the end of a word \> . 277 | 278 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 279 | Lesson 3.2: Special Characters: \+~ 280 | 281 | ========================================================================= 282 | *7* : Find repeated strings of digits > 283 | 284 | /\d\+ 285 | < ========================================================================= 286 | 287 | Here, the \+ is similar to * , except it requires at least one 288 | repetition. 289 | 290 | Your Turn~ 291 | 292 | 1. Go to the line below marked ---> 293 | 294 | 2. type /\d\+ 295 | 296 | 3. Confirm that all of the digits and only the digits are highlighted 297 | 298 | ---> apple 1234 5678 900,000 1.23 13:45 %^@# 299 | 300 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 301 | Lesson 3.3: Six Letter Words~ 302 | 303 | ========================================================================= 304 | *8* : Find six letter words > 305 | 306 | /\<\w\{6\}\> 307 | 308 | < OR, using very-magic: > 309 | 310 | /\v<\w{6}> 311 | < ========================================================================= 312 | 313 | Your Turn~ 314 | 315 | 1. Go to the line below marked ---> 316 | 317 | 2. Type /\<\w\{6\}\> 318 | 319 | 3. Confirm that the word "attend" is highlighted 320 | 321 | ---> Happy times and fine edits attend on thee. 322 | 323 | Start experimenting by inventing your own expressions. See 324 | |pattern-overview|, |ordinary-atom|, and |character-classes| for Vim's 325 | special regular expression characters. 326 | 327 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 328 | Lesson 3.4: In the beginning~ 329 | 330 | The special characters ^ and $ are used when looking for something that 331 | must start at the beginning of the line and end at the end of the line, 332 | respectively. 333 | This is especially useful for matching exact text. For example, to find a 334 | line containing only a seven-digit phone number, you might use: 335 | 336 | ========================================================================= 337 | *9* : Find a seven-digit phone number on a line by itself > 338 | 339 | /^\d\{3\}-\d\{4\}$ 340 | 341 | < OR > 342 | 343 | /^\v\d{3}-\d{4}$ 344 | < ========================================================================= 345 | 346 | This is the same as example [|5|], but anchored to the whole line, 347 | with nothing else before or after the matched text. This start of line 348 | character must be placed at the start of the regex pattern, otherwise it 349 | will represent a literal "^". The special character \_^ matches a newline 350 | at any position in the regex. 351 | 352 | Your Turn~ 353 | 354 | 1. Go to the line below marked ---> 355 | 356 | 2. Type /^\d\{3\}-\d\{4\}$ 357 | 358 | 3. Confirm that the line with only a seven-digit phone number is highlighted 359 | 360 | ---> 361 | My phone number is 123-4567, or 362 | 123-4568 after hours. 363 | 123-4567 364 | 365 | Did you notice that even though the first two lines contained a seven-digit 366 | phone number, they were not matched because they contained other characters? 367 | 368 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 369 | Lesson 3.5: Escaped characters~ 370 | 371 | A problem occurs if you actually want to match one of the special 372 | characters, like ^ or $ . Use the backslash to remove the special 373 | meaning. Thus, \^ , \. , and \\ match the literal characters "^", 374 | ".", and "\" respectively. 375 | 376 | Your Turn~ 377 | 378 | 1. Go to the line below marked ---> 379 | 380 | 2. Type /\^_\^ 381 | 382 | 3. Confirm that the smiley "^_^" is highlighted 383 | 384 | ---> ^_^ $_$ @_% *_# !_! ()_+ <>_{} 385 | 386 | Can you match the other smilies? Which of those other punctuation symbols 387 | need escaping and which do not? 388 | 389 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 390 | Lesson 3.6: Repetitions~ 391 | 392 | You've seen that \{3\} and * can be used to indicate repetition of a 393 | single character. Later, you'll see how the same syntax can be used to 394 | repeat entire subexpressions. There are several other ways to specify a 395 | repetition, as shown in |pattern-overview|. 396 | 397 | NOTE: You no longer need explicit instructions on how to experiment with 398 | the patterns in this tutorial. All future search lessons will assume 399 | that you use the patterns provided to experiment with and learn from. 400 | Test data is available inside each excercise block showing expected 401 | matchable text. Where appropriate, false matches and missed text are 402 | also provided to emphasise the current learning point. 403 | 404 | ========================================================================= 405 | *10* : Find all five and six letter words > 406 | 407 | /\<\w\{5,6\}\> 408 | 409 | < OR > 410 | 411 | /\v<\w{5,6}> 412 | < 413 | I am a Bear of Very Little Brain, and long words Bother Me. 414 | ========================================================================= 415 | 416 | ========================================================================= 417 | *11* : Find ten digit phone numbers > 418 | 419 | /\<\d\{3\}\s\d\{3\}-\d\{4\} 420 | 421 | < OR > 422 | 423 | /\v<\d{3}\s\d{3}-\d{4} 424 | < 425 | For a good edit, call 846 968-7615 426 | ========================================================================= 427 | 428 | ========================================================================= 429 | *12* : Find Social Security Number > 430 | 431 | /\d\{3\}-\d\{2\}-\d\{4\} 432 | 433 | < OR > 434 | 435 | /\v\d{3}-\d{2}-\d{4} 436 | < 437 | 111-21-1211 438 | ========================================================================= 439 | 440 | ========================================================================= 441 | *13.1* : Find The first word in the line > 442 | 443 | /^\w\+ 444 | < 445 | Fools to the left of me, 446 | jokers to the right, 447 | here I am stuck in the middle with Vim. 448 | ========================================================================= 449 | ========================================================================= 450 | *13.2* : Find The first word in the line, ignoring leading whitespace > 451 | 452 | /^\s*\w\+ 453 | < 454 | Fools to the left of me, 455 | jokers to the right, 456 | here I am stuck in the middle with Vim. 457 | ========================================================================= 458 | ========================================================================= 459 | *13.2* : Find The first word in the line, ignoring AND skipping leading 460 | whitespace > 461 | 462 | /^\s*\zs\w\+ 463 | < 464 | Fools to the left of me, 465 | jokers to the right, 466 | here I am stuck in the middle with Vim. 467 | 468 | NOTE: The special operator \zs is discussed in Lesson 5.2 469 | ========================================================================= 470 | 471 | Exercise~ 472 | 473 | Use the $ anchor to match the last 'line' word on this line 474 | 475 | ANSWER {{{~ 476 | /line$ 477 | }}} 478 | 479 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 480 | Lesson 3.7: Character Classes~ 481 | 482 | It is simple to find alphanumerics, digits, and whitespace, but what if we 483 | want to find anything from some other set of characters? This is easily done 484 | by listing the desired characters within square brackets. Thus, "[aeiou]" 485 | matches any vowel and "[.?!]" matches the punctuation at the end of a 486 | sentence. In this example, notice that the "." and "?" lose their special 487 | meanings within square brackets and are interpreted literally. We can also 488 | specify a range of characters, so "[a-z0-9]" means, "match any lowercase 489 | letter of the alphabet, or any digit". 490 | 491 | Let's try a more complicated expression that searches for telephone numbers. 492 | 493 | ========================================================================= 494 | *14* : Find A ten digit phone number > 495 | 496 | /(\?\d\{3\}[) ]\s\?\d\{3\}[- ]\d\{4\} 497 | 498 | < OR > 499 | 500 | /\v\(?\d{3}[) ]\s?\d{3}[- ]\d{4} 501 | < 502 | (800) 325-3535 503 | 650 555 1212 504 | 650) 555-1212 (NOTE: Example of a false positive) 505 | Just dial (800) 506 | 325-3535 now! (NOTE: Example of a false negative) 507 | ========================================================================= 508 | 509 | This expression will find phone numbers in several formats, like "(800) 510 | 325-3535" or "650 555 1212". The (\? searches for zero or one left 511 | parentheses, [) ] searches for a right parenthesis or a space. The \s\? 512 | searches for zero or one whitespace characters. Unfortunately, it will also 513 | find cases like "650) 555-1212" in which the parenthesis is not balanced. 514 | Below, you'll see how to use alternatives to eliminate this problem. Another 515 | problem you might have noticed is that, by default in Vim, searches do not 516 | span across multiple lines. This is evident in the first phone number above: 517 | (800) 325-3535, which matches on this line, but not above because it's 518 | split over two lines. Solutions to this problem will also be shown below. 519 | 520 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 521 | Lesson 3.8: Negation~ 522 | 523 | Sometimes we need to search for a character that is NOT a member of an 524 | easily defined class of characters. The following table shows how this can 525 | be specified: 526 | 527 | \W Match any character that is NOT alphanumeric 528 | 529 | \S Match any character that is NOT whitespace 530 | 531 | \D Match any character that is NOT a digit 532 | 533 | [^x] Match any character that is NOT x 534 | 535 | [^aeiou] Match any character that is NOT one of the characters aeiou 536 | 537 | ========================================================================= 538 | *15* : Find All strings that do not contain whitespace characters > 539 | 540 | /\S\+ 541 | < 542 | Blessed are those who in the name of Vim edit righteously. 543 | ========================================================================= 544 | 545 | Later, we'll see how to use "lookahead" (|/\@=| , |/\@!|) and "lookbehind" 546 | (|/\@<=| , |/\@ 558 | 559 | /\<\d\{5\}-\d\{4\}\>\|\<\d\{5\}\> 560 | 561 | < OR > 562 | 563 | /\v<\d{5}-\d{4}>|<\d{5}> 564 | < 565 | 12345-1234 566 | 12345 567 | 123-456 (NOTE: non-zip codes don't match) 568 | ========================================================================= 569 | 570 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 571 | Lesson 3.10: Order of Alternatives is Important~ 572 | 573 | When using alternatives, the order is important since the matching algorithm 574 | will attempt to match the leftmost alternative first. If the order is 575 | reversed in this example, the expression will only find the 5 digit Zip 576 | Codes and fail to find the 9 digit ones. 577 | 578 | ========================================================================= 579 | Try it: > 580 | /\v<\d{5}>|<\d{5}-\d{4}> 581 | < 582 | 12345 (NOTE: This still matches... 583 | 12345-1234 ...but this does not) 584 | ========================================================================= 585 | 586 | We can use alternatives to improve the expression for ten digit phone 587 | numbers, allowing the area code to appear either delimited by whitespace or 588 | parenthesis: 589 | 590 | ========================================================================= 591 | *17.1* : Find Ten digit phone numbers, a better way > 592 | 593 | /\((\d\{3\})\|\d\{3\}\)\s\?\d\{3\}[- ]\d\{4\} 594 | 595 | < OR > 596 | 597 | /\v(\(\d{3}\)|\d{3})\s?\d{3}[- ]\d{4} 598 | < 599 | (800) 325-3535 600 | 650 555 1212 601 | 650) 555-1212 (NOTE: Badly formatted numbers no longer match) 602 | Just dial (800) 603 | 325-3535 now! (NOTE: Numbers split over a line still fail to match) 604 | ========================================================================= 605 | 606 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 607 | Lesson 3.11: Newlines are NOT Matched by Default~ 608 | 609 | That fixes the problem of accidentally matching badly formatted phone 610 | numbers, but not the problem of phone numbers split over a line. Let's fix 611 | that problem now. Vim limits regex matches to a single line by default. Put 612 | another way, Vim -excludes- newlines in matches by default. To override 613 | this, we need to use the special |/\_| modifiers which -include- 614 | newlines in the match. As an example, use |/\_s| to capture whitespace 615 | (like \s ) including newlines. Let's use \_s to fix our phone number 616 | problem: 617 | 618 | ========================================================================= 619 | *17.2* : Find Ten digit phone numbers, an even better way (with newlines) > 620 | 621 | /\((\d\{3\})\|\d\{3\}\)\_s*\d\{3\}\_s*-\?\_s*\d\{4\} 622 | 623 | < OR > 624 | 625 | /\v(\(\d{3}\)|\d{3})\_s*\d{3}\_[- ]?\d{4} 626 | < 627 | (800) 325-3535 628 | 650 555 1212 629 | 650) 555-1212 (NOTE: Badly formatted numbers still don't match) 630 | Just dial (800) 631 | 325-3535 now! (NOTE: Numbers split over a line now match correctly) 632 | ========================================================================= 633 | 634 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 635 | Lesson 4.1: Introducing Grouping~ 636 | 637 | Parentheses may be used to delimit a subexpression to allow repetition or 638 | other special treatment. For example: 639 | 640 | ========================================================================= 641 | *18* : Find A simple IP address > 642 | 643 | /\(\d\{1,3\}\.\)\{3\}\d\{1,3\} 644 | 645 | < OR > 646 | 647 | /\v(\d{1,3}\.){3}\d{1,3} 648 | < 649 | 192.168.1.1 650 | 127.0.0.1 651 | 3.142 (NOTE: non-IP addresses are not matched) 652 | 1.2.3 653 | 999.999.999.999 (NOTE: Invalid IP addresses are falsely matched) 654 | ========================================================================= 655 | 656 | The first part of the expression searches for a one to three digit number 657 | followed by a literal period . . This is enclosed in parentheses and 658 | repeated three times using the \{3\} quantifier, followed by the same 659 | expression without the trailing period. 660 | 661 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 662 | Lesson 4.2: Groups within Groups~ 663 | 664 | Unfortunately, this example allows IP addresses with arbitrary one, two, or 665 | three digit numbers separated by periods even though a valid IP address 666 | cannot have numbers larger than 255. It would be nice to arithmetically 667 | compare a captured number N to enforce N<256, but this is not possible with 668 | regular expressions alone. The next example tests various alternatives based 669 | on the starting digits to guarantee the limited range of numbers by pattern 670 | matching. This shows that an expression can become cumbersome even when 671 | looking for a pattern that is simple to describe. 672 | 673 | ========================================================================= 674 | *19* : Find IP addresses (more accurately) > 675 | 676 | /\(\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\)\.\)\{3\}\(2[0-4]\d\|25[0-5]\|[01]\?\d\d\?\) 677 | < 678 | OR > 679 | 680 | /\v((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?) 681 | < 682 | 192.168.1.1 683 | 127.0.0.1 684 | 3.142 (NOTE: non-IP addresses are not matched) 685 | 1.2.3 686 | 999.999.999.999 (NOTE: Invalid IP addresses are not matched) 687 | ========================================================================= 688 | 689 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 690 | Lesson 4.3: Backreferences~ 691 | 692 | When subexpressions are grouped with parentheses, the text that matches the 693 | subexpression is available further along in the regular expression itself. 694 | Groups are numbered sequentially as encountered in reading from left to 695 | right, starting with 1. 696 | 697 | A "backreference" is used to search for a recurrence of previously matched 698 | text that has been captured by a group. For example, \1 means, "match the 699 | text that was captured by group 1". Here is an example: 700 | 701 | ========================================================================= 702 | *20* : Find repeated words > 703 | 704 | /\<\(\w\+\)\>\s*\1\> 705 | 706 | OR 707 | 708 | /\v<(\w+)>\s*\1> 709 | < 710 | It wasn't that that was impossible. 711 | ========================================================================= 712 | 713 | This works by capturing a string of at least one alphanumeric character 714 | within group 1 \(\w\+\) , but only if it begins and ends a word. It then 715 | looks for any amount of whitespace \s* followed by a repetition of the 716 | captured text \1 ending at the end of a word. 717 | 718 | 719 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 720 | Lesson 5.1: Captures~ 721 | 722 | In the followng examples, the Vim regex snippet is given first followed by 723 | the equivalent Perl Compatible Regular Expression (PCRE) for those familiar 724 | with that style of regular expressions. If you're not familiar with PCRE, 725 | don't worry as you will not require a knowledge of it to complete this 726 | tutorial or use Vim. 727 | 728 | NOTE: Some of Vim's way of handling certain regex features differs from 729 | PCRE. Some people complain about this and wonder why Vim didn't just 730 | use the already existing PCRE way. The reason is that Vim started 731 | getting some of these things at the same time Perl did, or even 732 | beforehand. So, it's not that Vim decided to flout history and Go Its 733 | Own Way just to be difficult. Respect that Vim -can- do these things 734 | and has been able to do so for a long time now; and suck it up and 735 | learn Vim's way. :-) 736 | 737 | 1. Match exp and capture it in an automatically numbered group: 738 | 739 | Vim: \(exp\) 740 | PCRE: (exp) 741 | 742 | 2. Match exp, but do not capture it: 743 | 744 | Vim: \%(exp\) 745 | PCRE: (?:exp) 746 | 747 | The \%(exp\) form does not alter the matching behavior, it just doesn't 748 | capture it in a group like the \(exp\) form. 749 | 750 | The next four are so-called lookahead or lookbehind assertions. They look for 751 | things that go before or after the current match without including them in the 752 | match. It is important to understand that these expressions match a position 753 | like ^ or \< and never match any text. For this reason, they are known as 754 | "zero-width assertions". 755 | 756 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 757 | Lesson 5.2: Positive lookahead~ 758 | 759 | Matches with zero width if \(exp\) matches at the current position 760 | 761 | Vim: \(exp\)\@= 762 | PCRE: (?=exp) 763 | 764 | \(exp\)\@= is the "zero-width positive lookahead assertion". It matches a 765 | position in the text that precedes a given suffix expression, but doesn't 766 | include the suffix in the match: 767 | 768 | ========================================================================= 769 | *22* : Find the beginning of words ending with "ing" > 770 | 771 | /\<\w\+\(ing\>\)\@= 772 | 773 | < OR > 774 | 775 | /\v<\w+(ing>)@= 776 | < 777 | Beguiling 778 | Alluringly 779 | 780 | NOTE: Vim provides two other very powerful regex operators: |/\zs| and 781 | |/\ze| which can be used in many similar situations as the 782 | lookaround operators. The pattern above can also be expressed as: > 783 | 784 | /\ze\w\+ing\> 785 | < 786 | Pleasing 787 | Obligingly 788 | ========================================================================= 789 | 790 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 791 | Lesson 5.3: Positive lookbehind~ 792 | 793 | Matches with zero width if \(exp\) matches just before what follows: 794 | 795 | Vim: \(exp\)\@<= 796 | PCRE: (?<=exp) 797 | 798 | \(exp\)\@<= is the "zero-width positive lookbehind assertion". It matches 799 | the position following a prefix, but doesn't include the prefix in the 800 | match: 801 | 802 | ========================================================================= 803 | *23* : Find The end of words starting with "re" > 804 | 805 | /\(\ 806 | 807 | < OR (using the |/\zs| atom:) > 808 | 809 | /\ 810 | < 811 | Remember, regular repetition results in ready reflexes. 812 | ========================================================================= 813 | 814 | Here is an example that could be used repeatedly to insert commas into 815 | numbers in groups of three digits: 816 | 817 | ========================================================================= 818 | *24* : Find Three digits at the end of a word, preceded by a digit > 819 | 820 | /\d\@<=\d\{3\}\> 821 | 822 | < OR > 823 | 824 | /\v\d@<=\d{3}> 825 | < 826 | 123456789 827 | 123456,789 828 | 123,456,789 (NOTE: This number correctly doesn't match) 829 | ========================================================================= 830 | 831 | Here is an example that looks for both a prefix and a suffix: 832 | 833 | ========================================================================= 834 | *25* : Find Alphanumeric strings bounded by whitespace > 835 | 836 | /\s\@<=\w\+\s\@= 837 | < 838 | It's right here. 839 | ========================================================================= 840 | 841 | Quiz~ 842 | 843 | Use the pattern in example |24| to add commas to the following number: 844 | 845 | ---> 12345678901 846 | 847 | ANSWER {{{~ 848 | Use the following substitution command when your cursor is on the 849 | line above marked ---> : :s/\v\d@<=\d{3}>/,&/ 850 | You will need to execute it three times to add all the necessary commas. 851 | Tip: You can use the & key in normal mode to re-execute the last 852 | substitution. 853 | }}} 854 | 855 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 856 | Lesson 5.4: Negative lookahead~ 857 | 858 | Matches with zero width if \(exp\) does NOT match at the current position 859 | 860 | Vim: \(exp\)\@! 861 | PCRE: (?!exp) 862 | 863 | Earlier, we saw how to search for a character that is not a specific 864 | character or the member of a character class. What if we simply want to 865 | verify that a character is not present, but don't want to match anything? 866 | For example, what if we are searching for words in which the letter "q" is 867 | not followed by the letter "u"? We could try: 868 | 869 | ========================================================================= 870 | *26* : Find Words with "q" followed by NOT "u" > 871 | 872 | /\<\w*q[^u]\w*\> 873 | < 874 | Iqaluit is the capital of Nunavut and Canada's coolest arctic city. 875 | QWERTY put the "q" in Compaq but not Iraq. 876 | Inqorrectly spelled! 877 | ========================================================================= 878 | 879 | Run the example and you will see that it fails when "q" is the last letter 880 | of a word, as in "Compaq". This is because [^u] always matches a character. 881 | If "q" is the last character of the word, it will match the whitespace 882 | character that follows, so in the example the expression ends up matching 883 | two whole words. Negative lookaround solves this problem because it matches 884 | a position and does not consume any text. As with positive lookaround, it 885 | can also be used to match the position of an arbitrarily complex 886 | subexpression, rather than just a single character. We can now do a better 887 | job: 888 | 889 | ========================================================================= 890 | *27* : Find words with "q" not followed by "u" > 891 | 892 | /\<\w*qu\@!\w*\> 893 | < 894 | Iqaluit is the capital of Nunavut and Canada's coolest arctic city. 895 | QWERTY put the "q" in Compaq but not Iraq. 896 | Inqorrectly spelled! 897 | ========================================================================= 898 | 899 | We used the "zero-width negative lookahead assertion", \(exp\)\@! , which 900 | succeeds only if the suffix "exp" is not present. Here is another example: 901 | 902 | ========================================================================= 903 | *28* : Find Three digits not followed by another digit > 904 | 905 | /\d\{3\}\d\@! 906 | < 907 | 123 908 | 123A 909 | 123 456 910 | 1234 (NOTE: Matches the -last- three digits, perhaps 911 | 123456 surprisingly! How would you force a match 912 | of three digits only? 913 | ANSWER {{{~ 914 | /\<\d\{3\}\d\@! 915 | }}} 916 | ) 917 | ========================================================================= 918 | 919 | 920 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 921 | Lesson 5.5: Negative lookbehind~ 922 | 923 | Matches with zero width if \(exp\) matches just before what follows: 924 | 925 | Vim: \(exp\)\@ 934 | 935 | /[a-z ]\@ 959 | 960 | /\%(<\1>\)\@<=.*\%(<\/\(\w\+\)>\)\@= 961 | 962 | OR 963 | 964 | /\v%(\<\1\>)@<=.*%(\<\/(\w+)\>)@= 965 | 966 | OR 967 | 968 | /<\(\w\+\).\{-}>\zs.*\ze<\/\1> 969 | < 970 | Can I play, daddy? 971 | ========================================================================= 972 | 973 | This searches for the corresponding closing HTML tag using positive 974 | lookbehind and the opening original tag using positive lookahead, thus 975 | capturing the intervening text but excluding both tags. 976 | 977 | NOTE: If you read that last sentence and felt there was something backwards 978 | about it... you're right. The part of the pattern after \@<= and 979 | \@\zs.*\ze<\/\1> 989 | 990 | In practice, the \zs and \ze atoms are almost always the better 991 | choice. 992 | 993 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 994 | Lesson 6.1: Greedy and Lazy~ 995 | 996 | When a regular expression has a quantifier that can accept a range of 997 | repetitions, like .* , the normal behavior is to match as many characters 998 | as possible. Consider the following regular expression: 999 | 1000 | ========================================================================= 1001 | *32* : Find The longest string starting with a and ending with b > 1002 | 1003 | /a.*b 1004 | < 1005 | aabab 1006 | ========================================================================= 1007 | 1008 | If this is used to search the string "aabab", it will match the entire 1009 | string "aabab". This is called "greedy" matching. Sometimes, we prefer 1010 | "lazy" matching in which a match using the minimum number of repetitions is 1011 | found. The quantifiers we've been playing with so far can all be turned into 1012 | "lazy" quantifiers by replacing the quantifier with a \{-} form. See 1013 | |/\{-| for the specific details. Thus \{-} means "match any number of 1014 | repetitions, but use the smallest number of repetitions that still leads to 1015 | a successful match". Now let's try the lazy version of example |32|: 1016 | 1017 | NOTE: The \{-} form is equivalent to PCRE: *? 1018 | 1019 | ========================================================================= 1020 | *33* : Find The shortest string starting with a and ending with b > 1021 | 1022 | /a.\{-}b 1023 | < 1024 | aabab 1025 | 1026 | NOTE: If you press n after running this search, you will notice that it 1027 | first matched "aab" and then "ab". 1028 | ========================================================================= 1029 | 1030 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1031 | Lesson 6.2: Vim's Lazy Operators:~ 1032 | 1033 | NOTE: See the manual sections |/\{| and |non-greedy| for more details. 1034 | 1035 | Repeat any number of times, but as few as possible: 1036 | Vim: \{-} 1037 | PCRE: *? 1038 | 1039 | Repeat one or more times, but as few as possible: 1040 | Vim: \{-1,} 1041 | PCRE: +? 1042 | 1043 | Repeat zero or one time, but as few as possible: 1044 | Vim: \{-,1} 1045 | PCRE: ?? 1046 | 1047 | Repeat at least n, but no more than m times, but as few as possible: 1048 | Vim: {-n,m} 1049 | PCRE: {n,m}? 1050 | 1051 | Repeat at least n times, but as few as possible: 1052 | Vim: {-n,} 1053 | PCRE: {n,}? 1054 | 1055 | 1056 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1057 | Exercise 1.1: Inserting thousands separators into numbers.~ 1058 | 1059 | Visually select the following 10 lines of numbers and write a regular 1060 | expression that inserts a thousands separator (such as ,) to make them read 1061 | correctly as shown in the set below. 1062 | 1063 | 1 1064 | 12 1065 | 123 1066 | 1234 1067 | 12345 1068 | 123456 1069 | 1234567 1070 | 12345678 1071 | 123456789 1072 | 1234567890 1073 | 1074 | -=-=- 1075 | 1076 | 1 1077 | 12 1078 | 123 1079 | 1,234 1080 | 12,345 1081 | 123,456 1082 | 1,234,567 1083 | 12,345,678 1084 | 123,456,789 1085 | 1,234,567,890 1086 | < 1087 | 1088 | SOLUTION > 1089 | :'<,'>s/\d\@<=\(\(\d\{3}\)\+\d\@!\)\@=/,/g 1090 | < 1091 | 1092 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1093 | Appendix 1: VimRegEx - a Vim Regex Visualisation Tool~ 1094 | 1095 | The VimRegEx plugin is a tool which attempts to graphically show you the 1096 | various portions of your regular expression and the parts of a sample text 1097 | it matches. Using a tool like this can quickly help you to craft complicated 1098 | regular expressions with more ease and confidence. 1099 | 1100 | Install VimRegEx from: 1101 | * http://www.vim.org/scripts/script.php?script_id=1091 OR 1102 | * use the Vim Addon Manager: 1103 | http://www.vim.org/scripts/script.php?script_id=2905 1104 | 1105 | 1106 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1107 | This concludes the Vim Regex Tutor. It was intended to give a brief 1108 | overview of regular expressions in the Vim editor - just enough to allow you 1109 | to start exploring the wonderful and powerful world of regexes on your own. 1110 | It is far from complete as Vim has many many more regex features. To learn 1111 | more about regexes in Vim, see |'pattern.txt'| 1112 | 1113 | If you want to read a book, I suggest the latest edition of: 1114 | 1115 | Mastering Regular Expressions, by Jeffrey Friedl. 1116 | 1117 | This tutorial is a rework by Barry Arthur of the original .Net version by 1118 | Jim Hollenhorst at: 1119 | 1120 | http://www.codeproject.com/KB/dotnet/regextutorial.aspx 1121 | 1122 | The layout is based on the original vimtutor by Michael C. Pierce and 1123 | Robert K. Ware. 1124 | 1125 | Thanks to the following people for prviding feedback, fixing bugs and 1126 | offering suggestions to improve VimRegexTutor: Ben Fritz, Israel Chauca. 1127 | 1128 | Licencensed under the same terms as Vim itself. 1129 | 1130 | Send mistakes and suggestions to barry.arthur@gmail.com or register an Issue 1131 | at https://github.com/dahu/VimRegexTutor 1132 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1133 | 1134 | # vim:tw=78:ts=8:ft=help:norl:noro:hlsearch:ignorecase fdm=marker 1135 | -------------------------------------------------------------------------------- /tutor/regextutor.vim: -------------------------------------------------------------------------------- 1 | " Vim regextutor support file 2 | " Maintainer: Barry Arthur 3 | " Last Change: 2011 Jul 22 4 | 5 | " NOTE: This was shamelessly stolen from Eduardo F. Amatria's 6 | " () original tutor.vim. 7 | " None of the language varients exist for vimregextutor yet, but Eduardo's 8 | " code was left here in the vain hope that one day it would be useful. :) 9 | 10 | " This Vim script is used for detecting if a translation of the 11 | " regextutor file exists, i.e., a regextutor.xx file, where xx is the 12 | " language. If the translation does not exist, or no extension is given, it 13 | " defaults to the English version. 14 | 15 | " It is invoked by the vimregextutor shell script. 16 | 17 | " 1. Build the extension of the file, if any: 18 | let s:ext = "" 19 | if strlen($xx) > 1 20 | let s:ext = "." . $xx 21 | else 22 | let s:lang = "" 23 | " Check that a potential value has at least two letters. 24 | " Ignore "1043" and "C". 25 | if exists("v:lang") && v:lang =~ '\a\a' 26 | let s:lang = v:lang 27 | elseif $LC_ALL =~ '\a\a' 28 | let s:lang = $LC_ALL 29 | elseif $LANG =~ '\a\a' 30 | let s:lang = $LANG 31 | endif 32 | if s:lang != "" 33 | " Remove "@euro" (ignoring case), it may be at the end 34 | let s:lang = substitute(s:lang, '\c@euro', '', '') 35 | " On MS-Windows it may be German_Germany.1252 or Polish_Poland.1250. How 36 | " about other languages? 37 | if s:lang =~ "German" 38 | let s:ext = ".de" 39 | elseif s:lang =~ "Polish" 40 | let s:ext = ".pl" 41 | elseif s:lang =~ "Slovak" 42 | let s:ext = ".sk" 43 | elseif s:lang =~ "Czech" 44 | let s:ext = ".cs" 45 | elseif s:lang =~ "Dutch" 46 | let s:ext = ".nl" 47 | else 48 | let s:ext = "." . strpart(s:lang, 0, 2) 49 | endif 50 | endif 51 | endif 52 | 53 | " Somehow ".ge" (Germany) is sometimes used for ".de" (Deutsch). 54 | if s:ext =~? '\.ge' 55 | let s:ext = ".de" 56 | endif 57 | 58 | if s:ext =~? '\.en' 59 | let s:ext = "" 60 | endif 61 | 62 | " The japanese regextutor is available in two encodings, guess which one to use 63 | " The "sjis" one is actually "cp932", it doesn't matter for this text. 64 | if s:ext =~? '\.ja' 65 | if &enc =~ "euc" 66 | let s:ext = ".ja.euc" 67 | elseif &enc != "utf-8" 68 | let s:ext = ".ja.sjis" 69 | endif 70 | endif 71 | 72 | " The korean regextutor is available in two encodings, guess which one to use 73 | if s:ext =~? '\.ko' 74 | if &enc != "utf-8" 75 | let s:ext = ".ko.euc" 76 | endif 77 | endif 78 | 79 | " The Chinese regextutor is available in two encodings, guess which one to use 80 | " This segment is from the above lines and modified by 81 | " Mendel L Chan for Chinese vim regextutorial 82 | if s:ext =~? '\.zh' 83 | if &enc =~ 'big5\|cp950' 84 | let s:ext = ".zh.big5" 85 | elseif &enc != 'utf-8' 86 | let s:ext = ".zh.euc" 87 | endif 88 | endif 89 | 90 | " The Polish regextutor is available in two encodings, guess which one to use. 91 | if s:ext =~? '\.pl' 92 | if &enc =~ 1250 93 | let s:ext = ".pl.cp1250" 94 | endif 95 | endif 96 | 97 | " The Turkish regextutor is available in two encodings, guess which one to use 98 | if s:ext =~? '\.tr' 99 | if &enc == "iso-8859-9" 100 | let s:ext = ".tr.iso9" 101 | endif 102 | endif 103 | 104 | " The Greek regextutor is available in three encodings, guess what to use. 105 | " We used ".gr" (Greece) instead of ".el" (Greek); accept both. 106 | if s:ext =~? '\.gr\|\.el' 107 | if &enc == "iso-8859-7" 108 | let s:ext = ".el" 109 | elseif &enc == "utf-8" 110 | let s:ext = ".el.utf-8" 111 | elseif &enc =~ 737 112 | let s:ext = ".el.cp737" 113 | endif 114 | endif 115 | 116 | " The Slovak regextutor is available in three encodings, guess which one to use 117 | if s:ext =~? '\.sk' 118 | if &enc =~ 1250 119 | let s:ext = ".sk.cp1250" 120 | endif 121 | endif 122 | 123 | " The Czech regextutor is available in three encodings, guess which one to use 124 | if s:ext =~? '\.cs' 125 | if &enc =~ 1250 126 | let s:ext = ".cs.cp1250" 127 | endif 128 | endif 129 | 130 | " The Russian regextutor is available in three encodings, guess which one to use. 131 | if s:ext =~? '\.ru' 132 | if &enc =~ '1251' 133 | let s:ext = '.ru.cp1251' 134 | elseif &enc =~ 'koi8' 135 | let s:ext = '.ru' 136 | endif 137 | endif 138 | 139 | " The Hungarian regextutor is available in three encodings, guess which one to use. 140 | if s:ext =~? '\.hu' 141 | if &enc =~ 1250 142 | let s:ext = ".hu.cp1250" 143 | elseif &enc =~ 'iso-8859-2' 144 | let s:ext = '.hu' 145 | endif 146 | endif 147 | 148 | " The Croatian regextutor is available in three encodings, guess which one to use. 149 | if s:ext =~? '\.hr' 150 | if &enc =~ 1250 151 | let s:ext = ".hr.cp1250" 152 | elseif &enc =~ 'iso-8859-2' 153 | let s:ext = '.hr' 154 | endif 155 | endif 156 | 157 | " Esperanto is only available in utf-8 158 | if s:ext =~? '\.eo' 159 | let s:ext = ".eo.utf-8" 160 | endif 161 | " Vietnamese is only available in utf-8 162 | if s:ext =~? '\.vi' 163 | let s:ext = ".vi.utf-8" 164 | endif 165 | 166 | " If 'encoding' is utf-8 s:ext must end in utf-8. 167 | if &enc == 'utf-8' && s:ext !~ '\.utf-8' 168 | let s:ext .= '.utf-8' 169 | endif 170 | 171 | " 2. Build the name of the file: 172 | let s:regextutorfile = "tutor/regextutor" 173 | let s:regextutorxx = findfile(s:regextutorfile . s:ext, &rtp) 174 | 175 | " 3. Finding the file: 176 | if filereadable(s:regextutorxx) 177 | echo "wtf" 178 | let $REGEXTUTOR = s:regextutorxx 179 | else 180 | echo "wtf2" 181 | let $REGEXTUTOR = findfile(s:regextutorfile, &rtp) 182 | echo "The file " . s:regextutorxx . " does not exist.\n" 183 | echo "Copying English version: " . $REGEXTUTOR 184 | 4sleep 185 | endif 186 | 187 | " 4. Making the copy and exiting Vim: 188 | e $REGEXTUTOR 189 | wq! $REGEXTUTORCOPY 190 | -------------------------------------------------------------------------------- /vimgor: -------------------------------------------------------------------------------- 1 | pcre-tutor is Lea Verou's Demystifying Regular Expressions ( http://www.youtube.com/watch?v=EkluES9Rvak ) is a good introduction to PCRE. 2 | 3 | regtutor is The command :VimRegexTutor provides a Vim style regular expression tutorial in the same manner as vimtutor ::: https://github.com/dahu/VimRegexTutor . Also, http://vimregex.com/ is a reasonable online tutorial. See pcre-tutor 4 | --------------------------------------------------------------------------------