├── Exercise_solutions.md ├── Exercises.md ├── LICENSE ├── README.md ├── Version_changes.md ├── code_snippets ├── Alternation_and_Grouping.js ├── Anchors.js ├── Character_class.js ├── Dot_metacharacter_and_Quantifiers.js ├── Escaping_metacharacters.js ├── Groupings_and_backreferences.js ├── Interlude_Common_tasks.js ├── Lookarounds.js ├── RegExp_introduction.js ├── Unicode.js └── Working_with_matched_portions.js ├── images ├── backslash_in_RegExp.png ├── find_replace.png ├── info.svg ├── js_regexp_ls.png ├── password_check.png ├── regex101.png ├── regulex.png ├── v_flag_examples.png └── warning.svg ├── js_regexp.md └── sample_chapters └── js_regexp_sample.pdf /Exercise_solutions.md: -------------------------------------------------------------------------------- 1 | # Exercise solutions 2 | 3 | >![info](images/info.svg) Try to solve the exercises in every chapter using only the features discussed until that chapter. Some of the exercises will be easier to solve with techniques presented in the later chapters, but the aim of these exercises is to explore the features presented so far. 4 | 5 |
6 | 7 | # RegExp introduction 8 | 9 | **1)** Check if the given input strings contain `two` irrespective of case. 10 | 11 | ```js 12 | > let s1 = 'Their artwork is exceptional' 13 | > let s2 = 'one plus tw0 is not three' 14 | > let s3 = 'TRUSTWORTHY' 15 | 16 | > const pat1 = /two/i 17 | 18 | > pat1.test(s1) 19 | < true 20 | > pat1.test(s2) 21 | < false 22 | > pat1.test(s3) 23 | < true 24 | ``` 25 | 26 | **2)** For the given array, filter all elements that do *not* contain `e`. 27 | 28 | ```js 29 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner'] 30 | 31 | > items.filter(w => !/e/.test(w)) 32 | < ['goal', 'sit'] 33 | ``` 34 | 35 | **3)** Replace only the first occurrence of `5` with `five` for the given string. 36 | 37 | ```js 38 | > let ip = 'They ate 5 apples and 5 oranges' 39 | 40 | > ip.replace(/5/, 'five') 41 | < 'They ate five apples and 5 oranges' 42 | ``` 43 | 44 | **4)** Replace all occurrences of `5` with `five` for the given string. 45 | 46 | ```js 47 | > let ip = 'They ate 5 apples and 5 oranges' 48 | 49 | > ip.replace(/5/g, 'five') 50 | < 'They ate five apples and five oranges' 51 | ``` 52 | 53 | **5)** Replace all occurrences of `note` irrespective of case with `X`. 54 | 55 | ```js 56 | > let ip = 'This note should not be NoTeD' 57 | 58 | > ip.replace(/note/ig, 'X') 59 | < 'This X should not be XD' 60 | ``` 61 | 62 | **6)** For the given multiline input string, filter all lines NOT containing the string `2`. 63 | 64 | ```js 65 | > let purchases = `items qty 66 | apple 24 67 | mango 50 68 | guava 42 69 | onion 31 70 | water 10` 71 | 72 | > const num = /2/ 73 | 74 | > console.log(purchases.split('\n') 75 | .filter(e => !num.test(e)) 76 | .join('\n')) 77 | < items qty 78 | mango 50 79 | onion 31 80 | water 10 81 | ``` 82 | 83 | >![info](images/info.svg) You'd be able to solve this using just the `replace()` method by the end of the [Dot metacharacter and Quantifiers](#dot-metacharacter-and-quantifiers) chapter. 84 | 85 | **7)** For the given array, filter all elements that contain either `a` or `w`. 86 | 87 | ```js 88 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner'] 89 | 90 | > items.filter(w => /a/.test(w) || /w/.test(w)) 91 | < ['goal', 'new', 'eat'] 92 | ``` 93 | 94 | **8)** For the given array, filter all elements that contain both `e` and `n`. 95 | 96 | ```js 97 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner'] 98 | 99 | > items.filter(w => /e/.test(w) && /n/.test(w)) 100 | < ['new', 'dinner'] 101 | ``` 102 | 103 | **9)** For the given string, replace `0xA0` with `0x7F` and `0xC0` with `0x1F`. 104 | 105 | ```js 106 | > let ip = 'start address: 0xA0, func1 address: 0xC0' 107 | 108 | > ip.replace(/0xA0/, '0x7F').replace(/0xC0/, '0x1F') 109 | < 'start address: 0x7F, func1 address: 0x1F' 110 | ``` 111 | 112 |
113 | 114 | # Anchors 115 | 116 | **1)** Check if the given input strings contain `is` or `the` as whole words. 117 | 118 | ```js 119 | > let str1 = 'is; (this)' 120 | > let str2 = "The food isn't good" 121 | > let str3 = 'the2 cats' 122 | > let str4 = 'switch on the light' 123 | 124 | > const pat1 = /\bis\b/ 125 | > const pat2 = /\bthe\b/ 126 | 127 | > pat1.test(str1) || pat2.test(str1) 128 | < true 129 | > pat1.test(str2) || pat2.test(str2) 130 | < false 131 | > pat1.test(str3) || pat2.test(str3) 132 | < false 133 | > pat1.test(str4) || pat2.test(str4) 134 | < true 135 | ``` 136 | 137 | **2)** For the given input string, change only the whole word `red` to `brown`. 138 | 139 | ```js 140 | > let ip = 'bred red spread credible red;' 141 | 142 | > ip.replace(/\bred\b/g, 'brown') 143 | < 'bred brown spread credible brown;' 144 | ``` 145 | 146 | **3)** For the given array, filter all elements that contain `42` surrounded by word characters. 147 | 148 | ```js 149 | > let items = ['hi42bye', 'nice1423', 'bad42', 'cool_42a', 'fake4b'] 150 | 151 | > items.filter(e => /\B42\B/.test(e)) 152 | < ['hi42bye', 'nice1423', 'cool_42a'] 153 | ``` 154 | 155 | **4)** For the given input array, filter all elements that start with `den` or end with `ly`. 156 | 157 | ```js 158 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent'] 159 | 160 | > items.filter(e => /^den/.test(e) || /ly$/.test(e)) 161 | < ['lovely', '2 lonely', 'dent'] 162 | ``` 163 | 164 | **5)** For the given input string, change whole word `mall` to `1234` only if it is at the start of a line. 165 | 166 | ```js 167 | > let para = `(mall) call ball pall 168 | ball fall wall tall 169 | mall call ball pall 170 | wall mall ball fall 171 | mallet wallet malls 172 | mall:call:ball:pall` 173 | 174 | > console.log(para.replace(/^mall\b/gm, '1234')) 175 | < (mall) call ball pall 176 | ball fall wall tall 177 | 1234 call ball pall 178 | wall mall ball fall 179 | mallet wallet malls 180 | 1234:call:ball:pall 181 | ``` 182 | 183 | **6)** For the given array, filter all elements having a line starting with `den` or ending with `ly`. 184 | 185 | ```js 186 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent'] 187 | 188 | > items.filter(e => /^den/m.test(e) || /ly$/m.test(e)) 189 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent'] 190 | ``` 191 | 192 | **7)** For the given input array, filter all whole elements `12\nthree` irrespective of case. 193 | 194 | ```js 195 | > let items = ['12\nthree\n', '12\nThree', '12\nthree\n4', '12\nthree'] 196 | 197 | > items.filter(e => /^12\nthree$/i.test(e)) 198 | < ['12\nThree', '12\nthree'] 199 | ``` 200 | 201 | **8)** For the given input array, replace `hand` with `X` for all elements that start with `hand` followed by at least one word character. 202 | 203 | ```js 204 | > let items = ['handed', 'hand', 'handy', 'un-handed', 'handle', 'hand-2'] 205 | 206 | > items.map(w => w.replace(/^hand\B/, 'X')) 207 | < ['Xed', 'hand', 'Xy', 'un-handed', 'Xle', 'hand-2'] 208 | ``` 209 | 210 | **9)** For the given input array, filter all elements starting with `h`. Additionally, replace `e` with `X` for these filtered elements. 211 | 212 | ```js 213 | > let items = ['handed', 'hand', 'handy', 'unhanded', 'handle', 'hand-2'] 214 | 215 | > items.filter(w => /^h/.test(w)).map(w => w.replace(/e/g, 'X')) 216 | < ['handXd', 'hand', 'handy', 'handlX', 'hand-2'] 217 | ``` 218 | 219 | **10)** Why does the following code show `false` instead of `true`? 220 | 221 | Because `$` matches only the end of string. You'll have to use the `m` flag to enable matching at the end of line separators. Some regular expression engines do allow `$` to match just before `\n` if it is the last character in the string, but not JavaScript. 222 | 223 | ```js 224 | > /end$/.test('bend it\nand send\n') 225 | < false 226 | 227 | > /end$/m.test('bend it\nand send\n') 228 | < true 229 | ``` 230 | 231 |
232 | 233 | # Alternation and Grouping 234 | 235 | **1)** For the given input array, filter all elements that start with `den` or end with `ly`. 236 | 237 | ```js 238 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent'] 239 | 240 | > items.filter(e => /^den|ly$/.test(e)) 241 | < ['lovely', '2 lonely', 'dent'] 242 | ``` 243 | 244 | **2)** For the given array, filter all elements having a line starting with `den` or ending with `ly`. 245 | 246 | ```js 247 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent'] 248 | 249 | > items.filter(e => /^den|ly$/m.test(e)) 250 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent'] 251 | ``` 252 | 253 | **3)** For the given input strings, replace all occurrences of `removed` or `reed` or `received` or `refused` with `X`. 254 | 255 | ```js 256 | > let s1 = 'creed refuse removed read' 257 | > let s2 = 'refused reed redo received' 258 | 259 | > const pat1 = /re(mov|ceiv|fus|)ed/g 260 | 261 | > s1.replace(pat1, 'X') 262 | < 'cX refuse X read' 263 | > s2.replace(pat1, 'X') 264 | < 'X X redo X' 265 | ``` 266 | 267 | **4)** For the given input strings, replace `late` or `later` or `slated` with `A`. 268 | 269 | ```js 270 | > let str1 = 'plate full of slate' 271 | > let str2 = "slated for later, don't be late" 272 | 273 | > const pat2 = /slated|late(r|)/g 274 | 275 | > str1.replace(pat2, 'A') 276 | < 'pA full of sA' 277 | > str2.replace(pat2, 'A') 278 | < "A for A, don't be A" 279 | ``` 280 | 281 |
282 | 283 | # Escaping metacharacters 284 | 285 | **1)** Transform the given input strings to the expected output using the same logic on both strings. 286 | 287 | ```js 288 | > let str1 = '(9-2)*5+qty/3-(9-2)*7' 289 | > let str2 = '(qty+4)/2-(9-2)*5+pq/4' 290 | 291 | > const pat1 = /\(9-2\)\*5/g 292 | > str1.replace(pat1, '35') 293 | < '35+qty/3-(9-2)*7' 294 | > str2.replace(pat1, '35') 295 | < '(qty+4)/2-35+pq/4' 296 | ``` 297 | 298 | **2)** Replace `(4)\|` with `2` only at the start or end of the given input strings. 299 | 300 | ```js 301 | > let s1 = '2.3/(4)\\|6 fig 5.3-(4)\\|' 302 | > let s2 = '(4)\\|42 - (4)\\|3' 303 | > let s3 = 'two - (4)\\|\n' 304 | 305 | > const pat2 = /^\(4\)\\\||\(4\)\\\|$/g 306 | 307 | > s1.replace(pat2, '2') 308 | < '2.3/(4)\\|6 fig 5.3-2' 309 | > s2.replace(pat2, '2') 310 | < '242 - (4)\\|3' 311 | > s3.replace(pat2, '2') 312 | < 'two - (4)\\|\n' 313 | ``` 314 | 315 | **3)** Replace any matching element from the array `items` with `X` for given the input strings. Match the elements from `items` literally. Assume no two elements of `items` will result in any matching conflict. 316 | 317 | ```js 318 | > let items = ['a.b', '3+n', 'x\\y\\z', 'qty||price', '{n}'] 319 | 320 | > function escapeRegExp(string) { 321 | return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&') 322 | } 323 | 324 | > function unionRegExp(arr) { 325 | return arr.map(w => escapeRegExp(w)).join('|') 326 | } 327 | 328 | > const pat3 = new RegExp(unionRegExp(items), 'g') 329 | 330 | > '0a.bcd'.replace(pat3, 'X') 331 | < '0Xcd' 332 | > 'E{n}AMPLE'.replace(pat3, 'X') 333 | < 'EXAMPLE' 334 | > '43+n2 ax\\y\\ze'.replace(pat3, 'X') 335 | < '4X2 aXe' 336 | ``` 337 | 338 | **4)** Replace the backspace character `\b` with a single space character for the given input string. 339 | 340 | ```js 341 | > let ip = '123\b456' 342 | 343 | > ip.replace(/\x08/, ' ') 344 | < '123 456' 345 | ``` 346 | 347 | **5)** Replace all occurrences of `\e` with `e`. 348 | 349 | ```js 350 | > let ip = 'th\\er\\e ar\\e common asp\\ects among th\\e alt\\ernations' 351 | 352 | > ip.replace(/\\e/g, 'e') 353 | < 'there are common aspects among the alternations' 354 | ``` 355 | 356 | **6)** Replace any matching item from the array `eqns` with `X` for given the string `ip`. Match the items from `eqns` literally. 357 | 358 | ```js 359 | > let ip = '3-(a^b)+2*(a^b)-(a/b)+3' 360 | > let eqns = ['(a^b)', '(a/b)', '(a^b)+2'] 361 | 362 | // note that '/' is also escaped here 363 | > function escapeRegExp(string) { 364 | return string.replace(/[.*+\-?^${}()|[\]\\\/]/g, '\\$&') 365 | } 366 | 367 | > function unionRegExp(arr) { 368 | return arr.map(w => escapeRegExp(w)).join('|') 369 | } 370 | 371 | > eqns.sort((a, b) => b.length - a.length) 372 | < ['(a^b)+2', '(a^b)', '(a/b)'] 373 | 374 | > const pat4 = new RegExp(unionRegExp(eqns), 'g') 375 | > pat4 376 | < /\(a\^b\)\+2|\(a\^b\)|\(a\/b\)/g 377 | 378 | > ip.replace(pat4, 'X') 379 | < '3-X*X-X+3' 380 | ``` 381 | 382 |
383 | 384 | # Dot metacharacter and Quantifiers 385 | 386 | >![info](images/info.svg) Use `s` flag for these exercises depending upon the contents of the input strings. 387 | 388 | **1)** Replace `42//5` or `42/5` with `8` for the given input. 389 | 390 | ```js 391 | > let ip = 'a+42//5-c pressure*3+42/5-14256' 392 | 393 | > ip.replace(/42\/\/?5/g, '8') 394 | < 'a+8-c pressure*3+8-14256' 395 | ``` 396 | 397 | **2)** For the array `items`, filter all elements starting with `hand` and ending immediately with at most one more character or `le`. 398 | 399 | ```js 400 | > let items = ['handed', 'hand', 'handled', 'handy', 'unhand', 'hands', 'handle'] 401 | 402 | > items.filter(w => /^hand(.|le)?$/.test(w)) 403 | < ['hand', 'handy', 'hands', 'handle'] 404 | ``` 405 | 406 | **3)** Use the `split()` method to get the output as shown for the given input strings. 407 | 408 | ```js 409 | > let eqn1 = 'a+42//5-c' 410 | > let eqn2 = 'pressure*3+42/5-14256' 411 | > let eqn3 = 'r*42-5/3+42///5-42/53+a' 412 | 413 | > const pat1 = new RegExp(`42//?5`) 414 | 415 | > eqn1.split(pat1) 416 | < ['a+', '-c'] 417 | > eqn2.split(pat1) 418 | < ['pressure*3+', '-14256'] 419 | > eqn3.split(pat1) 420 | < ['r*42-5/3+42///5-', '3+a'] 421 | ``` 422 | 423 | **4)** For the given input strings, remove everything from the first occurrence of `i` till the end of the string. 424 | 425 | ```js 426 | > let s1 = 'remove the special meaning of such constructs' 427 | > let s2 = 'characters while constructing' 428 | > let s3 = 'input output' 429 | 430 | > const pat2 = /i.*/ 431 | 432 | > s1.replace(pat2, '') 433 | < 'remove the spec' 434 | > s2.replace(pat2, '') 435 | < 'characters wh' 436 | > s3.replace(pat2, '') 437 | < '' 438 | ``` 439 | 440 | **5)** For the given strings, construct a regexp to get the output as shown. 441 | 442 | ```js 443 | > let str1 = 'a+b(addition)' 444 | > let str2 = 'a/b(division) + c%d(#modulo)' 445 | > let str3 = 'Hi there(greeting). Nice day(a(b)' 446 | 447 | > const remove_parentheses = /\(.*?\)/g 448 | 449 | > str1.replace(remove_parentheses, '') 450 | < 'a+b' 451 | > str2.replace(remove_parentheses, '') 452 | < 'a/b + c%d' 453 | > str3.replace(remove_parentheses, '') 454 | < 'Hi there. Nice day' 455 | ``` 456 | 457 | **6)** Correct the given regexp to get the expected output. 458 | 459 | ```js 460 | > let words = 'plink incoming tint winter in caution sentient' 461 | 462 | // wrong output 463 | > const w1 = /int|in|ion|ing|inco|inter|ink/g 464 | > words.replace(w1, 'X') 465 | "plXk XcomXg tX wXer X cautX sentient" 466 | 467 | // expected output 468 | > const w2 = /in(ter|co|g|k|t)?|ion/g 469 | > words.replace(w2, 'X') 470 | "plX XmX tX wX X cautX sentient" 471 | ``` 472 | 473 | **7)** For the given greedy quantifiers, what would be the equivalent form using the `{m,n}` representation? 474 | 475 | * `?` is same as `{0,1}` 476 | * `*` is same as `{0,}` 477 | * `+` is same as `{1,}` 478 | 479 | **8)** `(a*|b*)` is same as `(a|b)*` — true or false? 480 | 481 | False. Because `(a*|b*)` will match only sequences like `a`, `aaa`, `bb`, `bbbbbbbb`. But `(a|b)*` can match mixed sequences like `ababbba` too. 482 | 483 | **9)** For the given input strings, remove everything from the first occurrence of `test` (irrespective of case) till the end of the string, provided `test` isn't at the end of the string. 484 | 485 | ```js 486 | > let s1 = 'this is a Test' 487 | > let s2 = 'always test your regexp for corner\ncases' 488 | > let s3 = 'a TEST of skill tests?' 489 | 490 | > let pat3 = /test.+/is 491 | 492 | > s1.replace(pat3, '') 493 | < 'this is a Test' 494 | > s2.replace(pat3, '') 495 | < 'always ' 496 | > s3.replace(pat3, '') 497 | < 'a ' 498 | ``` 499 | 500 | **10)** For the input array `words`, filter all elements starting with `s` and containing `e` and `t` in any order. 501 | 502 | ```js 503 | > let words = ['sequoia', 'subtle', 'exhibit', 'a set', 'sets', 'tests', 'site'] 504 | 505 | > words.filter(w => /^s.*(e.*t|t.*e)/.test(w)) 506 | < ['subtle', 'sets', 'site'] 507 | ``` 508 | 509 | **11)** For the input array `words`, remove all elements having less than `6` characters. 510 | 511 | ```js 512 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 'tests', 'site'] 513 | 514 | > words.filter(w => /.{6,}/.test(w)) 515 | < ['sequoia', 'subtle', 'exhibit'] 516 | ``` 517 | 518 | **12)** For the input array `words`, filter all elements starting with `s` or `t` and having a maximum of `6` characters. 519 | 520 | ```js 521 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 't set', 'site'] 522 | 523 | > words.filter(w => /^(s|t).{0,5}$/.test(w)) 524 | < ['subtle', 'sets', 't set', 'site'] 525 | ``` 526 | 527 | **13)** Delete from the string `start` if it is at the beginning of a line up to the next occurrence of the string `end` at the end of a line. Match these keywords irrespective of case. 528 | 529 | ```js 530 | > let para = `good start 531 | start working on that 532 | project you always wanted 533 | to, do not let it end 534 | hi there 535 | start and end the end 536 | 42 537 | Start and try to 538 | finish the End 539 | bye` 540 | 541 | > const mpat = /^start.*?end$/igms 542 | > console.log(para.replace(mpat, '')) 543 | < good start 544 | 545 | hi there 546 | 547 | 42 548 | 549 | bye 550 | ``` 551 | 552 | **14)** Can you reason out why this code results in the output shown? The aim was to remove all `` patterns but not the `<>` ones. The expected result was `'a 1<> b 2<> c'`. 553 | 554 | The use of `.+` quantifier after `<` means that `<>` cannot be a possible match to satisfy `<.+?>`. So, after matching `<` (which occurs after `1` and `2` in the given input string) the regular expression engine will look for next occurrence of `>` character to satisfy the given pattern. To solve such cases, you need to use character classes (discussed in a later chapter) to specify which particular set of characters should be matched by the `+` quantifier (instead of the `.` metacharacter). 555 | 556 | ```js 557 | > let ip = 'a 1<> b 2<> c' 558 | > ip.replace(/<.+?>/g, '') 559 | < 'a 1 2' 560 | ``` 561 | 562 | **15)** Use the `split()` method to get the output as shown below for the given input strings. 563 | 564 | ```js 565 | > let s1 = 'go there :: this :: that' 566 | > let s2 = 'a::b :: c::d e::f :: 4::5' 567 | > let s3 = '42:: hi::bye::see :: carefully' 568 | 569 | > const pat4 = / +:: +(.+)/ 570 | 571 | > s1.split(pat4, 2) 572 | < ['go there', 'this :: that'] 573 | > s2.split(pat4, 2) 574 | < ['a::b', 'c::d e::f :: 4::5'] 575 | > s3.split(pat4, 2) 576 | < ['42:: hi::bye::see', 'carefully'] 577 | ``` 578 | 579 |
580 | 581 | # Working with matched portions 582 | 583 | **1)** For the given strings, extract the matching portion from the first `is` to the last `t`. 584 | 585 | ```js 586 | > let str1 = 'What is the biggest fruit you have seen?' 587 | > let str2 = 'Your mission is to read and practice consistently' 588 | 589 | > const pat1 = /is.*t/ 590 | 591 | > str1.match(pat1)[0] 592 | < 'is the biggest fruit' 593 | > str2.match(pat1)[0] 594 | < 'ission is to read and practice consistent' 595 | ``` 596 | 597 | **2)** Find the starting index of the first occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string. 598 | 599 | ```js 600 | > let s1 = 'match after the last newline character' 601 | > let s2 = 'and then you want to test' 602 | > let s3 = 'this is good bye then' 603 | > let s4 = 'who was there to see?' 604 | 605 | > const pat2 = /is|the|was|to/ 606 | 607 | > s1.search(pat2) 608 | < 12 609 | > s2.search(pat2) 610 | < 4 611 | > s3.search(pat2) 612 | < 2 613 | > s4.search(pat2) 614 | < 4 615 | ``` 616 | 617 | **3)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string. 618 | 619 | ```js 620 | > let s1 = 'match after the last newline character' 621 | > let s2 = 'and then you want to test' 622 | > let s3 = 'this is good bye then' 623 | > let s4 = 'who was there to see?' 624 | 625 | > const pat3 = /.*(is|the|was|to)/d 626 | 627 | > s1.match(pat3).indices[1][0] 628 | < 12 629 | > s2.match(pat3).indices[1][0] 630 | < 18 631 | > s3.match(pat3).indices[1][0] 632 | < 17 633 | > s4.match(pat3).indices[1][0] 634 | < 14 635 | ``` 636 | 637 | **4)** The given input string contains `:` exactly once. Extract all characters after the `:` as output. 638 | 639 | ```js 640 | > let ip = 'fruits:apple, mango, guava, blueberry' 641 | 642 | > ip.match(/:(.*)/)[1] 643 | < 'apple, mango, guava, blueberry' 644 | ``` 645 | 646 | **5)** Extract all words between `(` and `)` from the given input string as an array (including the parentheses). Assume that the input will not contain any broken parentheses. 647 | 648 | ```js 649 | > let ip = 'another (way) to reuse (portion) matched (by) capture groups' 650 | 651 | > ip.match(/\(.*?\)/g) 652 | < ['(way)', '(portion)', '(by)'] 653 | ``` 654 | 655 | **6)** Extract all occurrences of `<` up to the next occurrence of `>`, provided there is at least one character in between `<` and `>`. 656 | 657 | ```js 658 | > let ip = 'a 1<> b 2<> c' 659 | 660 | > ip.match(/<.+?>/g) 661 | < ['', '<> b', '<> c'] 662 | ``` 663 | 664 | **7)** Use `matchAll()` to get the output as shown below for the given input strings. Note the characters used in the input strings carefully. 665 | 666 | ```js 667 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 ' 668 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 ' 669 | 670 | > const pat4 = /(.+?),(.+?) /g 671 | 672 | > Array.from(row1.matchAll(pat4), m => [m[1], m[2]]) 673 | < (4) [Array(2), Array(2), Array(2), Array(2)] 674 | 0: (2) ['-2', '5'] 675 | 1: (2) ['4', '+3'] 676 | 2: (2) ['+42', '-53'] 677 | 3: (2) ['4356246', '-357532354'] 678 | length: 4 679 | [[Prototype]]: Array(0) 680 | 681 | > Array.from(row2.matchAll(pat4), m => [m[1], m[2]]) 682 | < (3) [Array(2), Array(2), Array(2)] 683 | 0: (2) ['1.32', '-3.14'] 684 | 1: (2) ['634', '5.63'] 685 | 2: (2) ['63.3e3', '9907809345343.235'] 686 | length: 3 687 | [[Prototype]]: Array(0) 688 | ``` 689 | 690 | **8)** This is an extension to the previous question. Sum each pair of numbers that are separated by a comma. 691 | 692 | * For `row1`, find the sum of integers. For example, sum of `-2` and `5` is `3`. 693 | * For `row2`, find the sum of floating-point numbers. For example, sum of `1.32` and `-3.14` is `-1.82`. 694 | 695 | ```js 696 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 ' 697 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 ' 698 | 699 | // should be same as the previous question 700 | > const pat5 = /(.+?),(.+?) /g 701 | 702 | > Array.from(row1.matchAll(pat5), m => +m[1] + +m[2]) 703 | < [3, 7, -11, -353176108] 704 | 705 | > Array.from(row2.matchAll(pat5), m => +m[1] + +m[2]) 706 | < [-1.82, 639.63, 9907809408643.234] 707 | ``` 708 | 709 | **9)** Use the `split()` method to get the output as shown below. 710 | 711 | ```js 712 | > let ip = '42:no-output;1000:car-tr:u-ck;SQEX49801' 713 | 714 | > ip.split(/:.+?-(.+?);/) 715 | < ['42', 'output', '1000', 'tr:u-ck', 'SQEX49801'] 716 | ``` 717 | 718 | **10)** Write a string function that changes the given input to alternate case. The first alphabet should be changed to lowercase, the next one to uppercase and then lowercase and so on. Characters other than alphabets should be left alone and not affect case changing. 719 | 720 | ```js 721 | > function aLtErNaTeCaSe(ip) { 722 | let b = true 723 | return ip.replace(/[a-z]/ig, m => (b = !b) ? m.toUpperCase() : m.toLowerCase()) 724 | } 725 | 726 | > aLtErNaTeCaSe('HI THERE!') 727 | < 'hI tHeRe!' 728 | > aLtErNaTeCaSe('good morning') 729 | < 'gOoD mOrNiNg' 730 | > aLtErNaTeCaSe('Sample123string42with777numbers') 731 | < 'sAmPlE123sTrInG42wItH777nUmBeRs' 732 | ``` 733 | 734 | **11)** Replace all occurrences of `par` with `spar`, `spare` with `extra` and `park` with `garden`. 735 | 736 | ```js 737 | > let s1 = 'apartment has a park' 738 | > let s2 = 'do you have a spare cable' 739 | > let s3 = 'write a parser' 740 | 741 | > let d1 = {'par': 'spar', 'spare': 'extra', 'park': 'garden'} 742 | > const pat6 = /spare|park?/g 743 | 744 | > s1.replace(pat6, k => d1[k]) 745 | < 'aspartment has a garden' 746 | > s2.replace(pat6, k => d1[k]) 747 | < 'do you have a extra cable' 748 | > s3.replace(pat6, k => d1[k]) 749 | < 'write a sparser' 750 | ``` 751 | 752 | **12)** Name the flag and property you can use with the `match()` method to get both the starting and ending locations of the matched portions. 753 | 754 | The `d` flag and `indices` property can be used to get both the starting and ending locations of the matched portions. Here's an example: 755 | 756 | ```js 757 | > 'coffee:100g tea:250g'.match(/:(.*?)g/d) 758 | < [':100g', '100', index: 6, input: 'coffee:100g tea:250g', 759 | groups: undefined, indices: Array(2)] 760 | 761 | // locations for the entire match 762 | > 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[0] 763 | < [6, 11] 764 | 765 | // locations for the first capture group 766 | > 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[1] 767 | < [7, 10] 768 | ``` 769 | 770 |
771 | 772 | # Character class 773 | 774 | **1)** For the array `items`, filter all elements starting with `hand` and ending with `s` or `y` or `le`. No other character in between, for example, `hands` should match but not `hand-has`. 775 | 776 | ```js 777 | > let items = ['-handy', 'hand', 'handy', 'unhand', 'hands', 'hand-icy', 'handle'] 778 | 779 | > items.filter(w => /^hand([sy]|le)$/.test(w)) 780 | < ['handy', 'hands', 'handle'] 781 | ``` 782 | 783 | **2)** Replace all whole words `reed` or `read` or `red` with `X`. 784 | 785 | ```js 786 | > let ip = 'redo red credible :read: rod reed bred' 787 | 788 | > ip.replace(/\bre[ae]?d\b/g, 'X') 789 | < 'redo X credible :X: rod X bred' 790 | ``` 791 | 792 | **3)** For the array `words`, filter all elements containing `e` or `i` followed by `l` or `n`. Note that the order mentioned should be followed. 793 | 794 | ```js 795 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest'] 796 | 797 | > words.filter(w => /[ei].*[ln]/.test(w)) 798 | < ['surrender', 'unicorn', 'eel'] 799 | ``` 800 | 801 | **4)** For the array `words`, filter all elements containing `e` or `i` and `l` or `n` in any order. 802 | 803 | ```js 804 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest'] 805 | 806 | > words.filter(w => /[ei].*[ln]|[ln].*[ei]/.test(w)) 807 | < ['surrender', 'unicorn', 'newer', 'eel'] 808 | ``` 809 | 810 | **5)** Extract all hex character sequences, with `0x` optional prefix. Match the characters case insensitively, and the sequences shouldn't be surrounded by other word characters. 811 | 812 | ```js 813 | > let str1 = '128A foo 0xfe32 34 0xbar' 814 | > let str2 = '0XDEADBEEF place 0x0ff1ce bad' 815 | 816 | > const hex_seq = /\b(0x)?[\da-f]+\b/ig 817 | 818 | > str1.match(hex_seq) 819 | < ['128A', '0xfe32', '34'] 820 | > str2.match(hex_seq) 821 | < ['0XDEADBEEF', '0x0ff1ce', 'bad'] 822 | ``` 823 | 824 | **6)** Delete from `(` to the next occurrence of `)` unless they contain parentheses characters in between. 825 | 826 | ```js 827 | > let str1 = 'def factorial()' 828 | > let str2 = 'a/b(division) + c%d(#modulo) - (e+(j/k-3)*4)' 829 | > let str3 = 'Hi there(greeting). Nice day(a(b)' 830 | 831 | > const remove_parentheses = /\([^()]*\)/g 832 | 833 | > str1.replace(remove_parentheses, '') 834 | < 'def factorial' 835 | > str2.replace(remove_parentheses, '') 836 | < 'a/b + c%d - (e+*4)' 837 | > str3.replace(remove_parentheses, '') 838 | < 'Hi there. Nice day(a' 839 | ``` 840 | 841 | **7)** For the array `words`, filter all elements not starting with `e` or `p` or `u`. 842 | 843 | ```js 844 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', '(pest)'] 845 | 846 | > words.filter(w => /^[^epu]/.test(w)) 847 | < ['surrender', 'newer', 'door', '(pest)'] 848 | ``` 849 | 850 | **8)** For the array `words`, filter all elements not containing `u` or `w` or `ee` or `-`. 851 | 852 | ```js 853 | > let words = ['p-t', 'you', 'tea', 'heel', 'owe', 'new', 'reed', 'ear'] 854 | 855 | > words.filter(w => !/[uw-]|ee/.test(w)) 856 | < ['tea', 'ear'] 857 | ``` 858 | 859 | **9)** The given input strings contain fields separated by `,` and fields can be empty too. Replace the last three fields with `WHTSZ323`. 860 | 861 | ```js 862 | > let row1 = '(2),kite,12,,D,C,,' 863 | > let row2 = 'hi,bye,sun,moon' 864 | 865 | > const pat1 = /(,[^,]*){3}$/g 866 | 867 | > row1.replace(pat1, ',WHTSZ323') 868 | < '(2),kite,12,,D,WHTSZ323' 869 | > row2.replace(pat1, ',WHTSZ323') 870 | < 'hi,WHTSZ323' 871 | ``` 872 | 873 | **10)** Split the given strings based on consecutive sequence of digit or whitespace characters. 874 | 875 | ```js 876 | > let s1 = 'lion \t Ink32onion Nice' 877 | > let s2 = '**1\f2\n3star\t7 77\r**' 878 | 879 | > const pat2 = /[\d\s]+/ 880 | 881 | > s1.split(pat2) 882 | < ['lion', 'Ink', 'onion', 'Nice'] 883 | > s2.split(pat2) 884 | < ['**', 'star', '**'] 885 | ``` 886 | 887 | **11)** Delete all occurrences of the sequence `` where `characters` is one or more non `>` characters and cannot be empty. 888 | 889 | ```js 890 | > let ip = 'a 1<> b 2<> c' 891 | 892 | > ip.replace(/<[^>]+>/g, '') 893 | < 'a 1<> b 2<> c' 894 | ``` 895 | 896 | **12)** `\b[a-z](on|no)[a-z]\b` is same as `\b[a-z][on]{2}[a-z]\b`. True or False? Sample input lines shown below might help to understand the differences, if any. 897 | 898 | False. `[on]{2}` will also match `oo` and `nn`. 899 | 900 | ```js 901 | > console.log('known\nmood\nknow\npony\ninns') 902 | known 903 | mood 904 | know 905 | pony 906 | inns 907 | ``` 908 | 909 | **13)** For the given array, filter elements containing any number sequence greater than `624`. 910 | 911 | ```js 912 | > let items = ['hi0000432abcd', 'car00625', '42_624 0512', '3.14 96 2 foo1234baz'] 913 | 914 | > items.filter(e => e.match(/\d+/g).some(m => m > 624)) 915 | < ['car00625', '3.14 96 2 foo1234baz'] 916 | ``` 917 | 918 | **14)** Convert the given input string to two different arrays as shown below. 919 | 920 | ```js 921 | > let ip = 'price_42 roast^\t\n^-ice==cat\neast' 922 | 923 | > ip.split(/\W+/) 924 | < ['price_42', 'roast', 'ice', 'cat', 'east'] 925 | 926 | > ip.split(/(\W+)/) 927 | < ['price_42', ' ', 'roast', '^\t\n^-', 'ice', '==', 'cat', '\n', 'east'] 928 | ``` 929 | 930 | **15)** Filter all elements whose first non-whitespace character is not a `#` character. Any element made up of only whitespace characters should be ignored as well. 931 | 932 | ```js 933 | > let items = [' #comment', '\t\napple #42', '#oops', 'sure', 'no#1', '\t\r\f'] 934 | 935 | > items.filter(e => /^\s*[^#\s]/.test(e)) 936 | < ['\t\napple #42', 'sure', 'no#1'] 937 | ``` 938 | 939 | **16)** For the given string, surround all whole words with `{}` except `par` and `cat`. 940 | 941 | ```js 942 | > let ip = 'part; cat {super} rest_42 par scatter' 943 | 944 | > ip.replace(/\w+/g, w => /\b(par|cat)\b/.test(w) ? w : `{${w}}`) 945 | < '{part}; cat {{super}} {rest_42} par {scatter}' 946 | ``` 947 | 948 |
949 | 950 | # Groupings and backreferences 951 | 952 | **1)** Replace the space character that occurs after a word ending with `a` or `r` with a newline character. 953 | 954 | ```js 955 | > let ip = 'area not a _a2_ roar took 22' 956 | 957 | > console.log(ip.replace(/([ar]) /g, '$1\n')) 958 | area 959 | not a 960 | _a2_ roar 961 | took 22 962 | ``` 963 | 964 | **2)** Add `[]` around words starting with `s` and containing `e` and `t` in any order. 965 | 966 | ```js 967 | > let ip = 'sequoia subtle exhibit asset sets2 tests si_te' 968 | 969 | > ip.replace(/\bs\w*(t\w*e|e\w*t)\w*/g, '[$&]') 970 | < 'sequoia [subtle] exhibit asset [sets2] tests [si_te]' 971 | ``` 972 | 973 | **3)** Replace all whole words with `X` that start and end with the same word character (irrespective of case). Single character word should get replaced with `X` too, as it satisfies the stated condition. 974 | 975 | ```js 976 | > let ip = 'oreo not a _a2_ Roar took 22' 977 | 978 | // can also use: ip.replace(/\b(\w|(\w)\w*\2)\b/ig, 'X') 979 | > ip.replace(/\b(\w)(\w*\1)?\b/ig, 'X') 980 | < 'X not X X X took X' 981 | ``` 982 | 983 | **4)** Convert the given *markdown* headers to corresponding *anchor* tags. Consider the input to start with one or more `#` characters followed by space and word characters. The `name` attribute is constructed by converting the header to lowercase and replacing spaces with hyphens. Can you do it without using a capture group? 984 | 985 | ```js 986 | > let header1 = '# Regular Expressions' 987 | > let header2 = '## Named capture groups' 988 | 989 | > function hyphenify(m) { 990 | return `${m}` 991 | } 992 | 993 | > header1.replace(/\w.*/, hyphenify) 994 | < "# Regular Expressions" 995 | > header2.replace(/\w.*/, hyphenify) 996 | < "## Named capture groups" 997 | ``` 998 | 999 | **5)** Convert the given *markdown* anchors to corresponding *hyperlinks*. 1000 | 1001 | ```js 1002 | > let anchor1 = "# Regular Expressions" 1003 | > let anchor2 = "## Subexpression calls" 1004 | 1005 | > const hyperlink = /[^']+'([^']+)'><\/a>(.+)/ 1006 | 1007 | > anchor1.replace(hyperlink, '[$2](#$1)') 1008 | < '[Regular Expressions](#regular-expressions)' 1009 | > anchor2.replace(hyperlink, '[$2](#$1)') 1010 | < '[Subexpression calls](#subexpression-calls)' 1011 | ``` 1012 | 1013 | **6)** Check if the given input strings have words with at least two consecutive repeated alphabets irrespective of case. For example, words like `stillnesS` and `Committee` should return `true` but words like `root` or `readable` or `rotational` should return `false`. Consider word to be as defined in regular expression parlance. 1014 | 1015 | ```js 1016 | > let s1 = 'readable COMMItTEe' 1017 | > let s2 = 'rotational sti1lness _foot_' 1018 | > let s3 = 'needed repeated' 1019 | > let s4 = 'offsh00t' 1020 | 1021 | > const pat1 = /(?:(\w)\1\w*){2}/i 1022 | 1023 | > pat1.test(s1) 1024 | true 1025 | > pat1.test(s2) 1026 | false 1027 | > pat1.test(s3) 1028 | false 1029 | > pat1.test(s4) 1030 | true 1031 | ``` 1032 | 1033 | **7)** For the given input string, replace all occurrences of digit sequences with only the unique non-repeating sequence. For example, `232323` should be changed to `23` and `897897` should be changed to `897`. If there are no repeats (for example `1234`) or if the repeats end prematurely (for example `12121`), it should not be changed. 1034 | 1035 | ```js 1036 | > let ip = '1234 2323 453545354535 9339 11 60260260' 1037 | 1038 | > ip.replace(/\b(\d+)\1+\b/g, '$1') 1039 | < '1234 23 4535 9339 1 60260260' 1040 | ``` 1041 | 1042 | **8)** Replace sequences made up of words separated by `:` or `.` by the first word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character. 1043 | 1044 | ```js 1045 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.' 1046 | 1047 | > ip.replace(/([:.]\w*)+/g, '') 1048 | < 'wow hi-2 bye kite' 1049 | ``` 1050 | 1051 | **9)** Replace sequences made up of words separated by `:` or `.` by the last word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character. 1052 | 1053 | ```js 1054 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.' 1055 | 1056 | > ip.replace(/((\w+)[:.])+/g, '$2') 1057 | < 'five hi-2 bye water' 1058 | ``` 1059 | 1060 | **10)** Split the given input string on one or more repeated sequence of `cat`. 1061 | 1062 | ```js 1063 | > let ip = 'firecatlioncatcatcatbearcatcatparrot' 1064 | 1065 | > ip.split(/(?:cat)+/) 1066 | < ['fire', 'lion', 'bear', 'parrot'] 1067 | ``` 1068 | 1069 | **11)** For the given input string, find all occurrences of digit sequences with at least one repeating sequence. For example, `232323` and `897897`. If the repeats end prematurely, for example `12121`, it should not be matched. 1070 | 1071 | ```js 1072 | > let ip = '1234 2323 453545354535 9339 11 60260260' 1073 | 1074 | > const pat2 = /\b(\d+)\1+\b/g 1075 | 1076 | // entire sequences in the output 1077 | > ip.match(pat2) 1078 | < ['2323', '453545354535', '11'] 1079 | 1080 | // only the unique sequence in the output 1081 | > Array.from(ip.matchAll(pat2), m => m[1]) 1082 | < ['23', '4535', '1'] 1083 | ``` 1084 | 1085 | **12)** Convert the comma separated strings to corresponding key-value pair mapping as shown below. The keys are `name`, `maths` and `phy` for the three fields in the input strings. 1086 | 1087 | ```js 1088 | > let row1 = 'rohan,75,89' 1089 | > let row2 = 'rose,88,92' 1090 | 1091 | > const pat3 = /(?[^,]+),(?[^,]+),(?[^,]+)/ 1092 | 1093 | > row1.match(pat3).groups 1094 | < {name: 'rohan', maths: '75', phy: '89'} 1095 | 1096 | > row2.match(pat3).groups 1097 | < {name: 'rose', maths: '88', phy: '92'} 1098 | ``` 1099 | 1100 | **13)** Surround all whole words with `()`. Additionally, if the whole word is `imp` or `ant`, delete them. Can you do it with just a single substitution? 1101 | 1102 | ```js 1103 | > let ip = 'tiger imp goat eagle ant important' 1104 | 1105 | > ip.replace(/\b(?:imp|ant|(\w+))\b/g, '($1)') 1106 | < '(tiger) () (goat) (eagle) () (important)' 1107 | ``` 1108 | 1109 |
1110 | 1111 | # Lookarounds 1112 | 1113 | >![info](images/info.svg) Use lookarounds for solving the following exercises even if they are not required. 1114 | 1115 | **1)** Replace all whole words with `X` unless it is preceded by a `(` character. 1116 | 1117 | ```js 1118 | > let ip = '(apple) guava berry) apple (mango) (grape' 1119 | 1120 | > ip.replace(/(? let ip = '(apple) guava berry) apple (mango) (grape' 1128 | 1129 | > ip.replace(/\w+\b(?!\))/g, 'X') 1130 | < '(apple) X berry) X (mango) (X' 1131 | ``` 1132 | 1133 | **3)** Replace all whole words with `X` unless it is preceded by `(` or followed by `)` characters. 1134 | 1135 | ```js 1136 | > let ip = '(apple) guava berry) apple (mango) (grape' 1137 | 1138 | > ip.replace(/(? let ip = 'a_t row on Urn e note Dust n end a2-e|u' 1146 | 1147 | > ip.match(/\b\w+\b(? let ip = 'a_t row on Urn e note Dust n end a2-e|u' 1155 | 1156 | > ip.match(/(?![adn])\b\w+/g) 1157 | < ['row', 'on', 'Urn', 'e', 'Dust', 'end', 'e', 'u'] 1158 | ``` 1159 | 1160 | **6)** Extract all whole words only if they are followed by `:` or `,` or `-`. 1161 | 1162 | ```js 1163 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1164 | 1165 | > ip.match(/\w+(?=[:,-])/g) 1166 | < ['Poke', 'so_good', 'ever2'] 1167 | ``` 1168 | 1169 | **7)** Extract all whole words only if they are preceded by `=` or `/` or `-`. 1170 | 1171 | ```js 1172 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1173 | 1174 | > ip.match(/(?<=[=\/-])\w+/g) 1175 | < ['so_good', 'is', 'sit'] 1176 | ``` 1177 | 1178 | **8)** Extract all whole words only if they are preceded by `=` or `:` and followed by `:` or `.`. 1179 | 1180 | ```js 1181 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1182 | 1183 | > ip.match(/(?<=[=:])\w+(?=[:.])/g) 1184 | < ['so_good', 'ink'] 1185 | ``` 1186 | 1187 | **9)** Extract all whole words only if they are preceded by `=` or `:` or `.` or `(` or `-` and not followed by `.` or `/`. 1188 | 1189 | ```js 1190 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1191 | 1192 | > ip.match(/(?<=[=:.(-])\w+\b(?![\/.])/g) 1193 | < ['so_good', 'vast', 'sit'] 1194 | ``` 1195 | 1196 | **10)** Remove the leading and trailing whitespaces from all the individual fields where `,` is the field separator. 1197 | 1198 | ```js 1199 | > let csv1 = ' comma ,separated ,values \t\r ' 1200 | > let csv2 = 'good bad,nice ice , 42 , , stall small' 1201 | 1202 | > const trim_whitespace = /(?<=^|,)\s+|\s+(?=,|$)/g 1203 | 1204 | > csv1.replace(trim_whitespace, '') 1205 | < 'comma,separated,values' 1206 | > csv2.replace(trim_whitespace, '') 1207 | < 'good bad,nice ice,42,,stall small' 1208 | ``` 1209 | 1210 | **11)** Filter elements that satisfy all of these rules: 1211 | 1212 | * should have at least two alphabets 1213 | * should have at least three digits 1214 | * should have at least one special character among `%` or `*` or `#` or `$` 1215 | * should not end with a whitespace character 1216 | 1217 | ```js 1218 | > let pwds = ['hunter2', 'F2h3u%9', '*X3Yz3.14\t', 'r2_d2_42', 'A $B C1234'] 1219 | 1220 | > pwds.filter(p => /(?!.*\s$)(?=(.*[a-z]){2})(?=(.*\d){3}).*[%*#$]/i.test(p)) 1221 | < ['F2h3u%9', 'A $B C1234'] 1222 | ``` 1223 | 1224 | **12)** For the given string, surround all whole words with `{}` except for whole words `par` and `cat` and `apple`. 1225 | 1226 | ```js 1227 | > let ip = 'part; cat {super} rest_42 par scatter apple spar' 1228 | 1229 | > ip.replace(/\b(?!(?:par|cat|apple)\b)\w+/g, '{$&}') 1230 | < '{part}; cat {{super}} {rest_42} par {scatter} apple {spar}' 1231 | ``` 1232 | 1233 | **13)** Extract the integer portion of floating-point numbers for the given string. A number ending with `.` and no further digits should not be considered. 1234 | 1235 | ```js 1236 | > let ip = '12 ab32.4 go 5 2. 46.42 5' 1237 | 1238 | > ip.match(/\d+(?=\.\d)/g) 1239 | < ['32', '46'] 1240 | ``` 1241 | 1242 | **14)** For the given input strings, extract all overlapping two character sequences. 1243 | 1244 | ```js 1245 | > let s1 = 'apple' 1246 | > let s2 = '1.2-3:4' 1247 | 1248 | > const pat1 = /.(?=(.))/g 1249 | 1250 | > Array.from(s1.matchAll(pat1), m => m[0] + m[1]) 1251 | < ['ap', 'pp', 'pl', 'le'] 1252 | > Array.from(s2.matchAll(pat1), m => m[0] + m[1]) 1253 | < ['1.', '.2', '2-', '-3', '3:', ':4'] 1254 | ``` 1255 | 1256 | **15)** The given input strings contain fields separated by the `:` character. Delete `:` and the last field if there is a digit character anywhere before the last field. 1257 | 1258 | ```js 1259 | > let s1 = '42:cat' 1260 | > let s2 = 'twelve:a2b' 1261 | > let s3 = 'we:be:he:0:a:b:bother' 1262 | > let s4 = 'apple:banana-42:cherry:' 1263 | > let s5 = 'dragon:unicorn:centaur' 1264 | 1265 | > const pat2 = /(?<=\d.*):[^:]*$/ 1266 | 1267 | > s1.replace(pat2, '') 1268 | < '42' 1269 | > s2.replace(pat2, '') 1270 | < 'twelve:a2b' 1271 | > s3.replace(pat2, '') 1272 | < 'we:be:he:0:a:b' 1273 | > s4.replace(pat2, '') 1274 | < 'apple:banana-42:cherry' 1275 | > s5.replace(pat2, '') 1276 | < 'dragon:unicorn:centaur' 1277 | ``` 1278 | 1279 | **16)** Extract all whole words unless they are preceded by `:` or `<=>` or `----` or `#`. 1280 | 1281 | ```js 1282 | > let ip = '::very--at<=>row|in.a_b#b2c=>lion----east' 1283 | 1284 | > ip.match(/(?|-{4})\b\w+/g) 1285 | < ['at', 'in', 'a_b', 'lion'] 1286 | ``` 1287 | 1288 | **17)** Match strings if it contains `qty` followed by `price` but not if there is any whitespace character or the string `error` between them. 1289 | 1290 | ```js 1291 | > let str1 = '23,qty,price,42' 1292 | > let str2 = 'qty price,oh' 1293 | > let str3 = '3.14,qty,6,errors,9,price,3' 1294 | > let str4 = '42\nqty-6,apple-56,price-234,error' 1295 | > let str5 = '4,price,3.14,qty,4' 1296 | > let str6 = '(qtyprice) (hi-there)' 1297 | 1298 | > const neg = /qty((?!\s|error).)*price/ 1299 | 1300 | > neg.test(str1) 1301 | < true 1302 | > neg.test(str2) 1303 | < false 1304 | > neg.test(str3) 1305 | < false 1306 | > neg.test(str4) 1307 | < true 1308 | > neg.test(str5) 1309 | < false 1310 | > neg.test(str6) 1311 | < true 1312 | ``` 1313 | 1314 | **18)** Can you reason out why the following regular expressions behave differently? 1315 | 1316 | `\b` matches both the start and end of word locations. In the below example, `\b..\b` doesn't necessarily mean that the first `\b` will match only the start of word location and the second `\b` will match only the end of word location. They can be any combination! For example, `I` followed by space in the input string here is using the start of word location for both the conditions. Similarly, space followed by `2` is using the end of word location for both the conditions. 1317 | 1318 | In contrast, the negative lookarounds version ensures that there are no word characters around any two characters. Also, such assertions will always be satisfied at the start of string and the end of string respectively. But `\b` depends on the presence of word characters. For example, `!` at the end of the input string here matches the lookaround assertion but not word boundary. 1319 | 1320 | ```js 1321 | > let ip = 'I have 12, he has 2!' 1322 | 1323 | > ip.replace(/\b..\b/g, '{$&}') 1324 | < '{I }have {12}{, }{he} has{ 2}!' 1325 | 1326 | > ip.replace(/(? let w2 = 'Sample123string42with777numbers' 1334 | 1335 | > w2.split(/(? w2.split(/(\d+)(?!.*\d)/) 1339 | < ['Sample123string42with', '777', 'numbers'] 1340 | ``` 1341 | 1342 | **20)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings using the `search()` method. Assume that there will be at least one match for each input string. 1343 | 1344 | ```js 1345 | > let s1 = 'match after the last newline character' 1346 | > let s2 = 'and then you want to test' 1347 | > let s3 = 'this is good bye then' 1348 | > let s4 = 'who was there to see?' 1349 | 1350 | > const pat3 = /(is|the|was|to)(?!.*(is|the|was|to))/ 1351 | 1352 | > s1.search(pat3) 1353 | < 12 1354 | > s2.search(pat3) 1355 | < 18 1356 | > s3.search(pat3) 1357 | < 17 1358 | > s4.search(pat3) 1359 | < 14 1360 | ``` 1361 | 1362 |
1363 | 1364 | # Unicode 1365 | 1366 | **1)** Check if the given input strings are made up of ASCII characters only. Consider the input to be non-empty strings and any character that isn't part of the 7-bit ASCII set should result in `false`. 1367 | 1368 | ```js 1369 | > let str1 = '123 × 456' 1370 | > let str2 = 'good fοοd' 1371 | > let str3 = 'happy learning!' 1372 | 1373 | // can also use: const pat1 = /^[\x00-\x7f]+$/ 1374 | > const pat1 = /^\p{ASCII}+$/u 1375 | 1376 | > pat1.test(str1) 1377 | < false 1378 | > pat1.test(str2) 1379 | < false 1380 | > pat1.test(str3) 1381 | < true 1382 | ``` 1383 | 1384 | **2)** Retain only the punctuation characters for the given string. 1385 | 1386 | ```js 1387 | > let ip = '❨a❩❪1❫❬b❭❮2❯❰c❱❲3❳❴xyz❵⟅123⟆⟦⟧⟨like⟩⟪3.14⟫' 1388 | 1389 | > ip.replace(/\P{P}+/gu, '') 1390 | < '❨❩❪❫❬❭❮❯❰❱❲❳❴❵⟅⟆⟦⟧⟨⟩⟪.⟫' 1391 | ``` 1392 | 1393 | **3)** Is the following code snippet showing the correct output? 1394 | 1395 | Yes. Some regular expression engines allow escape sequences like `\d`, `\b`, `\s`, `\w`, etc to be Unicode aware, but not JavaScript. 1396 | 1397 | ```js 1398 | > 'fox:αλεπού'.match(/\w+/g) 1399 | < ['fox'] 1400 | ``` 1401 | 1402 | **4)** Name the set operations enabled by the `v` flag. 1403 | 1404 | The following set operations are enabled by the `v` flag inside character classes: 1405 | 1406 | * `&&` intersection 1407 | * `--` difference 1408 | 1409 | To aid in such definitions, you can use `[]` in nested fashion. 1410 | 1411 | **5)** Extract all whole words from the given strings. However, do not match words if they contain any character present in the `ignore` variable. 1412 | 1413 | ```js 1414 | > let s1 = 'match after the last new_line character A2' 1415 | > let s2 = 'and then you want to test' 1416 | 1417 | > let ignore = 'aty' 1418 | > const ign1 = new RegExp(`\\b[\\w--[${ignore}]]+\\b`, 'gv') 1419 | > ign1 1420 | < /\b[\w--[aty]]+\b/gv 1421 | > s1.match(ign1) 1422 | < ['new_line', 'A2'] 1423 | > s2.match(ign1) 1424 | < null 1425 | 1426 | > let ignore = 'esw' 1427 | // should be the same solution used above 1428 | > const ign2 = new RegExp(`\\b[\\w--[${ignore}]]+\\b`, 'gv') 1429 | > ign2 1430 | < /\b[\w--[esw]]+\b/gv 1431 | > s1.match(ign2) 1432 | < ['match', 'A2'] 1433 | > s2.match(ign2) 1434 | < ['and', 'you', 'to'] 1435 | ``` 1436 | 1437 | -------------------------------------------------------------------------------- /Exercises.md: -------------------------------------------------------------------------------- 1 | # Exercises 2 | 3 | >![info](images/info.svg) Try to solve the exercises in every chapter using only the features discussed until that chapter. Some of the exercises will be easier to solve with techniques presented in the later chapters, but the aim of these exercises is to explore the features presented so far. 4 | 5 | >![info](images/info.svg) For solutions, see [Exercise_solutions.md](https://github.com/learnbyexample/learn_js_regexp/blob/master/Exercise_solutions.md). 6 | 7 |
8 | 9 | # RegExp introduction 10 | 11 | **1)** Check if the given input strings contain `two` irrespective of case. 12 | 13 | ```js 14 | > let s1 = 'Their artwork is exceptional' 15 | > let s2 = 'one plus tw0 is not three' 16 | > let s3 = 'TRUSTWORTHY' 17 | 18 | > const pat1 = // add your solution here 19 | 20 | > pat1.test(s1) 21 | < true 22 | > pat1.test(s2) 23 | < false 24 | > pat1.test(s3) 25 | < true 26 | ``` 27 | 28 | **2)** For the given array, filter all elements that do *not* contain `e`. 29 | 30 | ```js 31 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner'] 32 | 33 | > items.filter(w => test(w)) // add your solution here 34 | < ['goal', 'sit'] 35 | ``` 36 | 37 | **3)** Replace only the first occurrence of `5` with `five` for the given string. 38 | 39 | ```js 40 | > let ip = 'They ate 5 apples and 5 oranges' 41 | 42 | > ip.replace() // add your solution here 43 | < 'They ate five apples and 5 oranges' 44 | ``` 45 | 46 | **4)** Replace all occurrences of `5` with `five` for the given string. 47 | 48 | ```js 49 | > let ip = 'They ate 5 apples and 5 oranges' 50 | 51 | > ip.replace() // add your solution here 52 | < 'They ate five apples and five oranges' 53 | ``` 54 | 55 | **5)** Replace all occurrences of `note` irrespective of case with `X`. 56 | 57 | ```js 58 | > let ip = 'This note should not be NoTeD' 59 | 60 | > ip.replace() // add your solution here 61 | < 'This X should not be XD' 62 | ``` 63 | 64 | **6)** For the given multiline input string, filter all lines NOT containing the string `2`. 65 | 66 | ```js 67 | > let purchases = `items qty 68 | apple 24 69 | mango 50 70 | guava 42 71 | onion 31 72 | water 10` 73 | 74 | > const num = // add your solution here 75 | 76 | > console.log(purchases.split('\n') 77 | .filter(e => test(e)) // add your solution here 78 | .join('\n')) 79 | < items qty 80 | mango 50 81 | onion 31 82 | water 10 83 | ``` 84 | 85 | >![info](images/info.svg) You'd be able to solve this using just the `replace()` method by the end of the [Dot metacharacter and Quantifiers](#dot-metacharacter-and-quantifiers) chapter. 86 | 87 | **7)** For the given array, filter all elements that contain either `a` or `w`. 88 | 89 | ```js 90 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner'] 91 | 92 | > items.filter(w => test(w) || test(w)) // add your solution here 93 | < ['goal', 'new', 'eat'] 94 | ``` 95 | 96 | **8)** For the given array, filter all elements that contain both `e` and `n`. 97 | 98 | ```js 99 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner'] 100 | 101 | > items.filter(w => test(w) && test(w)) // add your solution here 102 | < ['new', 'dinner'] 103 | ``` 104 | 105 | **9)** For the given string, replace `0xA0` with `0x7F` and `0xC0` with `0x1F`. 106 | 107 | ```js 108 | > let ip = 'start address: 0xA0, func1 address: 0xC0' 109 | 110 | > ip.replace() // add your solution here 111 | < 'start address: 0x7F, func1 address: 0x1F' 112 | ``` 113 | 114 |
115 | 116 | # Anchors 117 | 118 | **1)** Check if the given input strings contain `is` or `the` as whole words. 119 | 120 | ```js 121 | > let str1 = 'is; (this)' 122 | > let str2 = "The food isn't good" 123 | > let str3 = 'the2 cats' 124 | > let str4 = 'switch on the light' 125 | 126 | > const pat1 = // add your solution here 127 | > const pat2 = // add your solution here 128 | 129 | > pat1.test(str1) || pat2.test(str1) 130 | < true 131 | > pat1.test(str2) || pat2.test(str2) 132 | < false 133 | > pat1.test(str3) || pat2.test(str3) 134 | < false 135 | > pat1.test(str4) || pat2.test(str4) 136 | < true 137 | ``` 138 | 139 | **2)** For the given input string, change only the whole word `red` to `brown`. 140 | 141 | ```js 142 | > let ip = 'bred red spread credible red;' 143 | 144 | > ip.replace() // add your solution here 145 | < 'bred brown spread credible brown;' 146 | ``` 147 | 148 | **3)** For the given array, filter all elements that contain `42` surrounded by word characters. 149 | 150 | ```js 151 | > let items = ['hi42bye', 'nice1423', 'bad42', 'cool_42a', 'fake4b'] 152 | 153 | > items.filter(e => test(e)) // add your solution here 154 | < ['hi42bye', 'nice1423', 'cool_42a'] 155 | ``` 156 | 157 | **4)** For the given input array, filter all elements that start with `den` or end with `ly`. 158 | 159 | ```js 160 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent'] 161 | 162 | > items.filter(e => test(e) || test(e)) // add your solution here 163 | < ['lovely', '2 lonely', 'dent'] 164 | ``` 165 | 166 | **5)** For the given input string, change whole word `mall` to `1234` only if it is at the start of a line. 167 | 168 | ```js 169 | > let para = `(mall) call ball pall 170 | ball fall wall tall 171 | mall call ball pall 172 | wall mall ball fall 173 | mallet wallet malls 174 | mall:call:ball:pall` 175 | 176 | > console.log(para.replace()) // add your solution here 177 | < (mall) call ball pall 178 | ball fall wall tall 179 | 1234 call ball pall 180 | wall mall ball fall 181 | mallet wallet malls 182 | 1234:call:ball:pall 183 | ``` 184 | 185 | **6)** For the given array, filter all elements having a line starting with `den` or ending with `ly`. 186 | 187 | ```js 188 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent'] 189 | 190 | > items.filter(e => test(e) || test(e)) // add your solution here 191 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent'] 192 | ``` 193 | 194 | **7)** For the given input array, filter all whole elements `12\nthree` irrespective of case. 195 | 196 | ```js 197 | > let items = ['12\nthree\n', '12\nThree', '12\nthree\n4', '12\nthree'] 198 | 199 | > items.filter(e => test(e)) // add your solution here 200 | < ['12\nThree', '12\nthree'] 201 | ``` 202 | 203 | **8)** For the given input array, replace `hand` with `X` for all elements that start with `hand` followed by at least one word character. 204 | 205 | ```js 206 | > let items = ['handed', 'hand', 'handy', 'un-handed', 'handle', 'hand-2'] 207 | 208 | > items.map(w => w.replace()) // add your solution here 209 | < ['Xed', 'hand', 'Xy', 'un-handed', 'Xle', 'hand-2'] 210 | ``` 211 | 212 | **9)** For the given input array, filter all elements starting with `h`. Additionally, replace `e` with `X` for these filtered elements. 213 | 214 | ```js 215 | > let items = ['handed', 'hand', 'handy', 'unhanded', 'handle', 'hand-2'] 216 | 217 | > items.filter(w => test(w)).map(w => w.replace()) // add your solution here 218 | < ['handXd', 'hand', 'handy', 'handlX', 'hand-2'] 219 | ``` 220 | 221 | **10)** Why does the following code show `false` instead of `true`? 222 | 223 | ```js 224 | > /end$/.test('bend it\nand send\n') 225 | < false 226 | ``` 227 | 228 |
229 | 230 | # Alternation and Grouping 231 | 232 | **1)** For the given input array, filter all elements that start with `den` or end with `ly`. 233 | 234 | ```js 235 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent'] 236 | 237 | > items.filter() // add your solution here 238 | < ['lovely', '2 lonely', 'dent'] 239 | ``` 240 | 241 | **2)** For the given array, filter all elements having a line starting with `den` or ending with `ly`. 242 | 243 | ```js 244 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent'] 245 | 246 | > items.filter() // add your solution here 247 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent'] 248 | ``` 249 | 250 | **3)** For the given input strings, replace all occurrences of `removed` or `reed` or `received` or `refused` with `X`. 251 | 252 | ```js 253 | > let s1 = 'creed refuse removed read' 254 | > let s2 = 'refused reed redo received' 255 | 256 | > const pat1 = // add your solution here 257 | 258 | > s1.replace(pat1, 'X') 259 | < 'cX refuse X read' 260 | > s2.replace(pat1, 'X') 261 | < 'X X redo X' 262 | ``` 263 | 264 | **4)** For the given input strings, replace `late` or `later` or `slated` with `A`. 265 | 266 | ```js 267 | > let str1 = 'plate full of slate' 268 | > let str2 = "slated for later, don't be late" 269 | 270 | > const pat2 = // add your solution here 271 | 272 | > str1.replace(pat2, 'A') 273 | < 'pA full of sA' 274 | > str2.replace(pat2, 'A') 275 | < "A for A, don't be A" 276 | ``` 277 | 278 |
279 | 280 | # Escaping metacharacters 281 | 282 | **1)** Transform the given input strings to the expected output using the same logic on both strings. 283 | 284 | ```js 285 | > let str1 = '(9-2)*5+qty/3-(9-2)*7' 286 | > let str2 = '(qty+4)/2-(9-2)*5+pq/4' 287 | 288 | > const pat1 = // add your solution here 289 | > str1.replace() // add your solution here 290 | < '35+qty/3-(9-2)*7' 291 | > str2.replace() // add your solution here 292 | < '(qty+4)/2-35+pq/4' 293 | ``` 294 | 295 | **2)** Replace `(4)\|` with `2` only at the start or end of the given input strings. 296 | 297 | ```js 298 | > let s1 = '2.3/(4)\\|6 fig 5.3-(4)\\|' 299 | > let s2 = '(4)\\|42 - (4)\\|3' 300 | > let s3 = 'two - (4)\\|\n' 301 | 302 | > const pat2 = // add your solution here 303 | 304 | > s1.replace() // add your solution here 305 | < '2.3/(4)\\|6 fig 5.3-2' 306 | > s2.replace() // add your solution here 307 | < '242 - (4)\\|3' 308 | > s3.replace() // add your solution here 309 | < 'two - (4)\\|\n' 310 | ``` 311 | 312 | **3)** Replace any matching element from the array `items` with `X` for given the input strings. Match the elements from `items` literally. Assume no two elements of `items` will result in any matching conflict. 313 | 314 | ```js 315 | > let items = ['a.b', '3+n', 'x\\y\\z', 'qty||price', '{n}'] 316 | 317 | // add your solution here 318 | > const pat3 = // add your solution here 319 | 320 | > '0a.bcd'.replace(pat3, 'X') 321 | < '0Xcd' 322 | > 'E{n}AMPLE'.replace(pat3, 'X') 323 | < 'EXAMPLE' 324 | > '43+n2 ax\\y\\ze'.replace(pat3, 'X') 325 | < '4X2 aXe' 326 | ``` 327 | 328 | **4)** Replace the backspace character `\b` with a single space character for the given input string. 329 | 330 | ```js 331 | > let ip = '123\b456' 332 | 333 | > ip.replace() // add your solution here 334 | < '123 456' 335 | ``` 336 | 337 | **5)** Replace all occurrences of `\e` with `e`. 338 | 339 | ```js 340 | > let ip = 'th\\er\\e ar\\e common asp\\ects among th\\e alt\\ernations' 341 | 342 | > ip.replace() // add your solution here 343 | < 'there are common aspects among the alternations' 344 | ``` 345 | 346 | **6)** Replace any matching item from the array `eqns` with `X` for given the string `ip`. Match the items from `eqns` literally. 347 | 348 | ```js 349 | > let ip = '3-(a^b)+2*(a^b)-(a/b)+3' 350 | > let eqns = ['(a^b)', '(a/b)', '(a^b)+2'] 351 | 352 | // add your solution here 353 | > const pat4 = // add your solution here 354 | 355 | > ip.replace(pat4, 'X') 356 | < '3-X*X-X+3' 357 | ``` 358 | 359 |
360 | 361 | # Dot metacharacter and Quantifiers 362 | 363 | >![info](images/info.svg) Use `s` flag for these exercises depending upon the contents of the input strings. 364 | 365 | **1)** Replace `42//5` or `42/5` with `8` for the given input. 366 | 367 | ```js 368 | > let ip = 'a+42//5-c pressure*3+42/5-14256' 369 | 370 | // add your solution here 371 | < 'a+8-c pressure*3+8-14256' 372 | ``` 373 | 374 | **2)** For the array `items`, filter all elements starting with `hand` and ending immediately with at most one more character or `le`. 375 | 376 | ```js 377 | > let items = ['handed', 'hand', 'handled', 'handy', 'unhand', 'hands', 'handle'] 378 | 379 | // add your solution here 380 | < ['hand', 'handy', 'hands', 'handle'] 381 | ``` 382 | 383 | **3)** Use the `split()` method to get the output as shown for the given input strings. 384 | 385 | ```js 386 | > let eqn1 = 'a+42//5-c' 387 | > let eqn2 = 'pressure*3+42/5-14256' 388 | > let eqn3 = 'r*42-5/3+42///5-42/53+a' 389 | 390 | > const pat1 = // add your solution here 391 | 392 | > eqn1.split(pat1) 393 | < ['a+', '-c'] 394 | > eqn2.split(pat1) 395 | < ['pressure*3+', '-14256'] 396 | > eqn3.split(pat1) 397 | < ['r*42-5/3+42///5-', '3+a'] 398 | ``` 399 | 400 | **4)** For the given input strings, remove everything from the first occurrence of `i` till the end of the string. 401 | 402 | ```js 403 | > let s1 = 'remove the special meaning of such constructs' 404 | > let s2 = 'characters while constructing' 405 | > let s3 = 'input output' 406 | 407 | > const pat2 = // add your solution here 408 | 409 | > s1.replace(pat2, '') 410 | < 'remove the spec' 411 | > s2.replace(pat2, '') 412 | < 'characters wh' 413 | > s3.replace(pat2, '') 414 | < '' 415 | ``` 416 | 417 | **5)** For the given strings, construct a regexp to get the output as shown. 418 | 419 | ```js 420 | > let str1 = 'a+b(addition)' 421 | > let str2 = 'a/b(division) + c%d(#modulo)' 422 | > let str3 = 'Hi there(greeting). Nice day(a(b)' 423 | 424 | > const remove_parentheses = // add your solution here 425 | 426 | > str1.replace(remove_parentheses, '') 427 | < 'a+b' 428 | > str2.replace(remove_parentheses, '') 429 | < 'a/b + c%d' 430 | > str3.replace(remove_parentheses, '') 431 | < 'Hi there. Nice day' 432 | ``` 433 | 434 | **6)** Correct the given regexp to get the expected output. 435 | 436 | ```js 437 | > let words = 'plink incoming tint winter in caution sentient' 438 | 439 | // wrong output 440 | > const w1 = /int|in|ion|ing|inco|inter|ink/g 441 | > words.replace(w1, 'X') 442 | "plXk XcomXg tX wXer X cautX sentient" 443 | 444 | // expected output 445 | > const w2 = // add your solution here 446 | > words.replace(w2, 'X') 447 | "plX XmX tX wX X cautX sentient" 448 | ``` 449 | 450 | **7)** For the given greedy quantifiers, what would be the equivalent form using the `{m,n}` representation? 451 | 452 | * `?` is same as 453 | * `*` is same as 454 | * `+` is same as 455 | 456 | **8)** `(a*|b*)` is same as `(a|b)*` — true or false? 457 | 458 | **9)** For the given input strings, remove everything from the first occurrence of `test` (irrespective of case) till the end of the string, provided `test` isn't at the end of the string. 459 | 460 | ```js 461 | > let s1 = 'this is a Test' 462 | > let s2 = 'always test your regexp for corner\ncases' 463 | > let s3 = 'a TEST of skill tests?' 464 | 465 | > let pat3 = // add your solution here 466 | 467 | > s1.replace(pat3, '') 468 | < 'this is a Test' 469 | > s2.replace(pat3, '') 470 | < 'always ' 471 | > s3.replace(pat3, '') 472 | < 'a ' 473 | ``` 474 | 475 | **10)** For the input array `words`, filter all elements starting with `s` and containing `e` and `t` in any order. 476 | 477 | ```js 478 | > let words = ['sequoia', 'subtle', 'exhibit', 'a set', 'sets', 'tests', 'site'] 479 | 480 | // add your solution here 481 | < ['subtle', 'sets', 'site'] 482 | ``` 483 | 484 | **11)** For the input array `words`, remove all elements having less than `6` characters. 485 | 486 | ```js 487 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 'tests', 'site'] 488 | 489 | // add your solution here 490 | < ['sequoia', 'subtle', 'exhibit'] 491 | ``` 492 | 493 | **12)** For the input array `words`, filter all elements starting with `s` or `t` and having a maximum of `6` characters. 494 | 495 | ```js 496 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 't set', 'site'] 497 | 498 | // add your solution here 499 | < ['subtle', 'sets', 't set', 'site'] 500 | ``` 501 | 502 | **13)** Delete from the string `start` if it is at the beginning of a line up to the next occurrence of the string `end` at the end of a line. Match these keywords irrespective of case. 503 | 504 | ```js 505 | > let para = `good start 506 | start working on that 507 | project you always wanted 508 | to, do not let it end 509 | hi there 510 | start and end the end 511 | 42 512 | Start and try to 513 | finish the End 514 | bye` 515 | 516 | > const mpat = // add your solution here 517 | > console.log(para.replace(mpat, '')) 518 | < good start 519 | 520 | hi there 521 | 522 | 42 523 | 524 | bye 525 | ``` 526 | 527 | **14)** Can you reason out why this code results in the output shown? The aim was to remove all `` patterns but not the `<>` ones. The expected result was `'a 1<> b 2<> c'`. 528 | 529 | ```js 530 | > let ip = 'a 1<> b 2<> c' 531 | > ip.replace(/<.+?>/g, '') 532 | < 'a 1 2' 533 | ``` 534 | 535 | **15)** Use the `split()` method to get the output as shown below for the given input strings. 536 | 537 | ```js 538 | > let s1 = 'go there :: this :: that' 539 | > let s2 = 'a::b :: c::d e::f :: 4::5' 540 | > let s3 = '42:: hi::bye::see :: carefully' 541 | 542 | > const pat4 = // add your solution here 543 | 544 | > s1.split() // add your solution here 545 | < ['go there', 'this :: that'] 546 | > s2.split() // add your solution here 547 | < ['a::b', 'c::d e::f :: 4::5'] 548 | > s3.split() // add your solution here 549 | < ['42:: hi::bye::see', 'carefully'] 550 | ``` 551 | 552 |
553 | 554 | # Working with matched portions 555 | 556 | **1)** For the given strings, extract the matching portion from the first `is` to the last `t`. 557 | 558 | ```js 559 | > let str1 = 'What is the biggest fruit you have seen?' 560 | > let str2 = 'Your mission is to read and practice consistently' 561 | 562 | > const pat1 = // add your solution here 563 | 564 | // add your solution here for str1 565 | < 'is the biggest fruit' 566 | // add your solution here for str2 567 | < 'ission is to read and practice consistent' 568 | ``` 569 | 570 | **2)** Find the starting index of the first occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string. 571 | 572 | ```js 573 | > let s1 = 'match after the last newline character' 574 | > let s2 = 'and then you want to test' 575 | > let s3 = 'this is good bye then' 576 | > let s4 = 'who was there to see?' 577 | 578 | > const pat2 = // add your solution here 579 | 580 | // add your solution here for s1 581 | < 12 582 | // add your solution here for s2 583 | < 4 584 | // add your solution here for s3 585 | < 2 586 | // add your solution here for s4 587 | < 4 588 | ``` 589 | 590 | **3)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string. 591 | 592 | ```js 593 | > let s1 = 'match after the last newline character' 594 | > let s2 = 'and then you want to test' 595 | > let s3 = 'this is good bye then' 596 | > let s4 = 'who was there to see?' 597 | 598 | > const pat3 = // add your solution here 599 | 600 | // add your solution here for s1 601 | < 12 602 | // add your solution here for s2 603 | < 18 604 | // add your solution here for s3 605 | < 17 606 | // add your solution here for s4 607 | < 14 608 | ``` 609 | 610 | **4)** The given input string contains `:` exactly once. Extract all characters after the `:` as output. 611 | 612 | ```js 613 | > let ip = 'fruits:apple, mango, guava, blueberry' 614 | 615 | // add your solution here 616 | < 'apple, mango, guava, blueberry' 617 | ``` 618 | 619 | **5)** Extract all words between `(` and `)` from the given input string as an array (including the parentheses). Assume that the input will not contain any broken parentheses. 620 | 621 | ```js 622 | > let ip = 'another (way) to reuse (portion) matched (by) capture groups' 623 | 624 | // add your solution here 625 | < ['(way)', '(portion)', '(by)'] 626 | ``` 627 | 628 | **6)** Extract all occurrences of `<` up to the next occurrence of `>`, provided there is at least one character in between `<` and `>`. 629 | 630 | ```js 631 | > let ip = 'a 1<> b 2<> c' 632 | 633 | // add your solution here 634 | < ['', '<> b', '<> c'] 635 | ``` 636 | 637 | **7)** Use `matchAll()` to get the output as shown below for the given input strings. Note the characters used in the input strings carefully. 638 | 639 | ```js 640 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 ' 641 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 ' 642 | 643 | > const pat4 = // add your solution here 644 | 645 | // add your solution here for row1 646 | < (4) [Array(2), Array(2), Array(2), Array(2)] 647 | 0: (2) ['-2', '5'] 648 | 1: (2) ['4', '+3'] 649 | 2: (2) ['+42', '-53'] 650 | 3: (2) ['4356246', '-357532354'] 651 | length: 4 652 | [[Prototype]]: Array(0) 653 | 654 | // add your solution here for row2 655 | < (3) [Array(2), Array(2), Array(2)] 656 | 0: (2) ['1.32', '-3.14'] 657 | 1: (2) ['634', '5.63'] 658 | 2: (2) ['63.3e3', '9907809345343.235'] 659 | length: 3 660 | [[Prototype]]: Array(0) 661 | ``` 662 | 663 | **8)** This is an extension to the previous question. Sum each pair of numbers that are separated by a comma. 664 | 665 | * For `row1`, find the sum of integers. For example, sum of `-2` and `5` is `3`. 666 | * For `row2`, find the sum of floating-point numbers. For example, sum of `1.32` and `-3.14` is `-1.82`. 667 | 668 | ```js 669 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 ' 670 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 ' 671 | 672 | // should be same as the previous question 673 | > const pat5 = // add your solution here 674 | 675 | // add your solution here for row1 676 | < [3, 7, -11, -353176108] 677 | 678 | // add your solution here for row2 679 | < [-1.82, 639.63, 9907809408643.234] 680 | ``` 681 | 682 | **9)** Use the `split()` method to get the output as shown below. 683 | 684 | ```js 685 | > let ip = '42:no-output;1000:car-tr:u-ck;SQEX49801' 686 | 687 | // add your solution here 688 | < ['42', 'output', '1000', 'tr:u-ck', 'SQEX49801'] 689 | ``` 690 | 691 | **10)** Write a string function that changes the given input to alternate case. The first alphabet should be changed to lowercase, the next one to uppercase and then lowercase and so on. Characters other than alphabets should be left alone and not affect case changing. 692 | 693 | ```js 694 | > function aLtErNaTeCaSe(ip) { 695 | // add your solution here 696 | } 697 | 698 | > aLtErNaTeCaSe('HI THERE!') 699 | < 'hI tHeRe!' 700 | > aLtErNaTeCaSe('good morning') 701 | < 'gOoD mOrNiNg' 702 | > aLtErNaTeCaSe('Sample123string42with777numbers') 703 | < 'sAmPlE123sTrInG42wItH777nUmBeRs' 704 | ``` 705 | 706 | **11)** Replace all occurrences of `par` with `spar`, `spare` with `extra` and `park` with `garden`. 707 | 708 | ```js 709 | > let s1 = 'apartment has a park' 710 | > let s2 = 'do you have a spare cable' 711 | > let s3 = 'write a parser' 712 | 713 | > let d1 = // add your solution here 714 | > const pat6 = // add your solution here 715 | 716 | > s1.replace(pat6, k => d1[k]) 717 | < 'aspartment has a garden' 718 | > s2.replace(pat6, k => d1[k]) 719 | < 'do you have a extra cable' 720 | > s3.replace(pat6, k => d1[k]) 721 | < 'write a sparser' 722 | ``` 723 | 724 | **12)** Name the flag and property you can use with the `match()` method to get both the starting and ending locations of the matched portions. 725 | 726 |
727 | 728 | # Character class 729 | 730 | **1)** For the array `items`, filter all elements starting with `hand` and ending with `s` or `y` or `le`. No other character in between, for example, `hands` should match but not `hand-has`. 731 | 732 | ```js 733 | > let items = ['-handy', 'hand', 'handy', 'unhand', 'hands', 'hand-icy', 'handle'] 734 | 735 | // add your solution here 736 | < ['handy', 'hands', 'handle'] 737 | ``` 738 | 739 | **2)** Replace all whole words `reed` or `read` or `red` with `X`. 740 | 741 | ```js 742 | > let ip = 'redo red credible :read: rod reed bred' 743 | 744 | // add your solution here 745 | < 'redo X credible :X: rod X bred' 746 | ``` 747 | 748 | **3)** For the array `words`, filter all elements containing `e` or `i` followed by `l` or `n`. Note that the order mentioned should be followed. 749 | 750 | ```js 751 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest'] 752 | 753 | // add your solution here 754 | < ['surrender', 'unicorn', 'eel'] 755 | ``` 756 | 757 | **4)** For the array `words`, filter all elements containing `e` or `i` and `l` or `n` in any order. 758 | 759 | ```js 760 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest'] 761 | 762 | // add your solution here 763 | < ['surrender', 'unicorn', 'newer', 'eel'] 764 | ``` 765 | 766 | **5)** Extract all hex character sequences, with `0x` optional prefix. Match the characters case insensitively, and the sequences shouldn't be surrounded by other word characters. 767 | 768 | ```js 769 | > let str1 = '128A foo 0xfe32 34 0xbar' 770 | > let str2 = '0XDEADBEEF place 0x0ff1ce bad' 771 | 772 | > const hex_seq = // add your solution here 773 | 774 | > str1.match(hex_seq) 775 | < ['128A', '0xfe32', '34'] 776 | > str2.match(hex_seq) 777 | < ['0XDEADBEEF', '0x0ff1ce', 'bad'] 778 | ``` 779 | 780 | **6)** Delete from `(` to the next occurrence of `)` unless they contain parentheses characters in between. 781 | 782 | ```js 783 | > let str1 = 'def factorial()' 784 | > let str2 = 'a/b(division) + c%d(#modulo) - (e+(j/k-3)*4)' 785 | > let str3 = 'Hi there(greeting). Nice day(a(b)' 786 | 787 | > const remove_parentheses = // add your solution here 788 | 789 | > str1.replace(remove_parentheses, '') 790 | < 'def factorial' 791 | > str2.replace(remove_parentheses, '') 792 | < 'a/b + c%d - (e+*4)' 793 | > str3.replace(remove_parentheses, '') 794 | < 'Hi there. Nice day(a' 795 | ``` 796 | 797 | **7)** For the array `words`, filter all elements not starting with `e` or `p` or `u`. 798 | 799 | ```js 800 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', '(pest)'] 801 | 802 | // add your solution here 803 | < ['surrender', 'newer', 'door', '(pest)'] 804 | ``` 805 | 806 | **8)** For the array `words`, filter all elements not containing `u` or `w` or `ee` or `-`. 807 | 808 | ```js 809 | > let words = ['p-t', 'you', 'tea', 'heel', 'owe', 'new', 'reed', 'ear'] 810 | 811 | // add your solution here 812 | < ['tea', 'ear'] 813 | ``` 814 | 815 | **9)** The given input strings contain fields separated by `,` and fields can be empty too. Replace the last three fields with `WHTSZ323`. 816 | 817 | ```js 818 | > let row1 = '(2),kite,12,,D,C,,' 819 | > let row2 = 'hi,bye,sun,moon' 820 | 821 | > const pat1 = // add your solution here 822 | 823 | // add your solution here for row1 824 | < '(2),kite,12,,D,WHTSZ323' 825 | // add your solution here for row2 826 | < 'hi,WHTSZ323' 827 | ``` 828 | 829 | **10)** Split the given strings based on consecutive sequence of digit or whitespace characters. 830 | 831 | ```js 832 | > let s1 = 'lion \t Ink32onion Nice' 833 | > let s2 = '**1\f2\n3star\t7 77\r**' 834 | 835 | > const pat2 = // add your solution here 836 | 837 | > s1.split(pat2) 838 | < ['lion', 'Ink', 'onion', 'Nice'] 839 | > s2.split(pat2) 840 | < ['**', 'star', '**'] 841 | ``` 842 | 843 | **11)** Delete all occurrences of the sequence `` where `characters` is one or more non `>` characters and cannot be empty. 844 | 845 | ```js 846 | > let ip = 'a 1<> b 2<> c' 847 | 848 | // add your solution here 849 | < 'a 1<> b 2<> c' 850 | ``` 851 | 852 | **12)** `\b[a-z](on|no)[a-z]\b` is same as `\b[a-z][on]{2}[a-z]\b`. True or False? Sample input lines shown below might help to understand the differences, if any. 853 | 854 | ```js 855 | > console.log('known\nmood\nknow\npony\ninns') 856 | known 857 | mood 858 | know 859 | pony 860 | inns 861 | ``` 862 | 863 | **13)** For the given array, filter elements containing any number sequence greater than `624`. 864 | 865 | ```js 866 | > let items = ['hi0000432abcd', 'car00625', '42_624 0512', '3.14 96 2 foo1234baz'] 867 | 868 | // add your solution here 869 | < ['car00625', '3.14 96 2 foo1234baz'] 870 | ``` 871 | 872 | **14)** Convert the given input string to two different arrays as shown below. 873 | 874 | ```js 875 | > let ip = 'price_42 roast^\t\n^-ice==cat\neast' 876 | 877 | // add your solution here 878 | < ['price_42', 'roast', 'ice', 'cat', 'east'] 879 | 880 | // add your solution here 881 | < ['price_42', ' ', 'roast', '^\t\n^-', 'ice', '==', 'cat', '\n', 'east'] 882 | ``` 883 | 884 | **15)** Filter all elements whose first non-whitespace character is not a `#` character. Any element made up of only whitespace characters should be ignored as well. 885 | 886 | ```js 887 | > let items = [' #comment', '\t\napple #42', '#oops', 'sure', 'no#1', '\t\r\f'] 888 | 889 | // add your solution here 890 | < ['\t\napple #42', 'sure', 'no#1'] 891 | ``` 892 | 893 | **16)** For the given string, surround all whole words with `{}` except `par` and `cat`. 894 | 895 | ```js 896 | > let ip = 'part; cat {super} rest_42 par scatter' 897 | 898 | // add your solution here 899 | < '{part}; cat {{super}} {rest_42} par {scatter}' 900 | ``` 901 | 902 |
903 | 904 | # Groupings and backreferences 905 | 906 | **1)** Replace the space character that occurs after a word ending with `a` or `r` with a newline character. 907 | 908 | ```js 909 | > let ip = 'area not a _a2_ roar took 22' 910 | 911 | > console.log() // add your solution here 912 | area 913 | not a 914 | _a2_ roar 915 | took 22 916 | ``` 917 | 918 | **2)** Add `[]` around words starting with `s` and containing `e` and `t` in any order. 919 | 920 | ```js 921 | > let ip = 'sequoia subtle exhibit asset sets2 tests si_te' 922 | 923 | // add your solution here 924 | < 'sequoia [subtle] exhibit asset [sets2] tests [si_te]' 925 | ``` 926 | 927 | **3)** Replace all whole words with `X` that start and end with the same word character (irrespective of case). Single character word should get replaced with `X` too, as it satisfies the stated condition. 928 | 929 | ```js 930 | > let ip = 'oreo not a _a2_ Roar took 22' 931 | 932 | // add your solution here 933 | < 'X not X X X took X' 934 | ``` 935 | 936 | **4)** Convert the given *markdown* headers to corresponding *anchor* tags. Consider the input to start with one or more `#` characters followed by space and word characters. The `name` attribute is constructed by converting the header to lowercase and replacing spaces with hyphens. Can you do it without using a capture group? 937 | 938 | ```js 939 | > let header1 = '# Regular Expressions' 940 | > let header2 = '## Named capture groups' 941 | 942 | > function hyphenify(m) { 943 | // add your solution here 944 | } 945 | 946 | > header1.replace() // add your solution here 947 | < "# Regular Expressions" 948 | > header2.replace() // add your solution here 949 | < "## Named capture groups" 950 | ``` 951 | 952 | **5)** Convert the given *markdown* anchors to corresponding *hyperlinks*. 953 | 954 | ```js 955 | > let anchor1 = "# Regular Expressions" 956 | > let anchor2 = "## Subexpression calls" 957 | 958 | > const hyperlink = // add your solution here 959 | 960 | > anchor1.replace() // add your solution here 961 | < '[Regular Expressions](#regular-expressions)' 962 | > anchor2.replace() // add your solution here 963 | < '[Subexpression calls](#subexpression-calls)' 964 | ``` 965 | 966 | **6)** Check if the given input strings have words with at least two consecutive repeated alphabets irrespective of case. For example, words like `stillnesS` and `Committee` should return `true` but words like `root` or `readable` or `rotational` should return `false`. Consider word to be as defined in regular expression parlance. 967 | 968 | ```js 969 | > let s1 = 'readable COMMItTEe' 970 | > let s2 = 'rotational sti1lness _foot_' 971 | > let s3 = 'needed repeated' 972 | > let s4 = 'offsh00t' 973 | 974 | > const pat1 = // add your solution here 975 | 976 | > pat1.test(s1) 977 | true 978 | > pat1.test(s2) 979 | false 980 | > pat1.test(s3) 981 | false 982 | > pat1.test(s4) 983 | true 984 | ``` 985 | 986 | **7)** For the given input string, replace all occurrences of digit sequences with only the unique non-repeating sequence. For example, `232323` should be changed to `23` and `897897` should be changed to `897`. If there are no repeats (for example `1234`) or if the repeats end prematurely (for example `12121`), it should not be changed. 987 | 988 | ```js 989 | > let ip = '1234 2323 453545354535 9339 11 60260260' 990 | 991 | // add your solution here 992 | < '1234 23 4535 9339 1 60260260' 993 | ``` 994 | 995 | **8)** Replace sequences made up of words separated by `:` or `.` by the first word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character. 996 | 997 | ```js 998 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.' 999 | 1000 | // add your solution here 1001 | < 'wow hi-2 bye kite' 1002 | ``` 1003 | 1004 | **9)** Replace sequences made up of words separated by `:` or `.` by the last word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character. 1005 | 1006 | ```js 1007 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.' 1008 | 1009 | // add your solution here 1010 | < 'five hi-2 bye water' 1011 | ``` 1012 | 1013 | **10)** Split the given input string on one or more repeated sequence of `cat`. 1014 | 1015 | ```js 1016 | > let ip = 'firecatlioncatcatcatbearcatcatparrot' 1017 | 1018 | // add your solution here 1019 | < ['fire', 'lion', 'bear', 'parrot'] 1020 | ``` 1021 | 1022 | **11)** For the given input string, find all occurrences of digit sequences with at least one repeating sequence. For example, `232323` and `897897`. If the repeats end prematurely, for example `12121`, it should not be matched. 1023 | 1024 | ```js 1025 | > let ip = '1234 2323 453545354535 9339 11 60260260' 1026 | 1027 | > const pat2 = // add your solution here 1028 | 1029 | // entire sequences in the output 1030 | // add your solution here 1031 | < ['2323', '453545354535', '11'] 1032 | 1033 | // only the unique sequence in the output 1034 | // add your solution here 1035 | < ['23', '4535', '1'] 1036 | ``` 1037 | 1038 | **12)** Convert the comma separated strings to corresponding key-value pair mapping as shown below. The keys are `name`, `maths` and `phy` for the three fields in the input strings. 1039 | 1040 | ```js 1041 | > let row1 = 'rohan,75,89' 1042 | > let row2 = 'rose,88,92' 1043 | 1044 | > const pat3 = // add your solution here 1045 | 1046 | // add your solution here for row1 1047 | < {name: 'rohan', maths: '75', phy: '89'} 1048 | 1049 | // add your solution here for row2 1050 | < {name: 'rose', maths: '88', phy: '92'} 1051 | ``` 1052 | 1053 | **13)** Surround all whole words with `()`. Additionally, if the whole word is `imp` or `ant`, delete them. Can you do it with just a single substitution? 1054 | 1055 | ```js 1056 | > let ip = 'tiger imp goat eagle ant important' 1057 | 1058 | // add your solution here 1059 | < '(tiger) () (goat) (eagle) () (important)' 1060 | ``` 1061 | 1062 |
1063 | 1064 | # Lookarounds 1065 | 1066 | >![info](images/info.svg) Use lookarounds for solving the following exercises even if they are not required. 1067 | 1068 | **1)** Replace all whole words with `X` unless it is preceded by a `(` character. 1069 | 1070 | ```js 1071 | > let ip = '(apple) guava berry) apple (mango) (grape' 1072 | 1073 | // add your solution here 1074 | < '(apple) X X) X (mango) (grape' 1075 | ``` 1076 | 1077 | **2)** Replace all whole words with `X` unless it is followed by a `)` character. 1078 | 1079 | ```js 1080 | > let ip = '(apple) guava berry) apple (mango) (grape' 1081 | 1082 | // add your solution here 1083 | < '(apple) X berry) X (mango) (X' 1084 | ``` 1085 | 1086 | **3)** Replace all whole words with `X` unless it is preceded by `(` or followed by `)` characters. 1087 | 1088 | ```js 1089 | > let ip = '(apple) guava berry) apple (mango) (grape' 1090 | 1091 | // add your solution here 1092 | < '(apple) X berry) X (mango) (grape' 1093 | ``` 1094 | 1095 | **4)** Extract all whole words that do not end with `e` or `n`. 1096 | 1097 | ```js 1098 | > let ip = 'a_t row on Urn e note Dust n end a2-e|u' 1099 | 1100 | // add your solution here 1101 | < ['a_t', 'row', 'Dust', 'end', 'a2', 'u'] 1102 | ``` 1103 | 1104 | **5)** Extract all whole words that do not start with `a` or `d` or `n`. 1105 | 1106 | ```js 1107 | > let ip = 'a_t row on Urn e note Dust n end a2-e|u' 1108 | 1109 | // add your solution here 1110 | < ['row', 'on', 'Urn', 'e', 'Dust', 'end', 'e', 'u'] 1111 | ``` 1112 | 1113 | **6)** Extract all whole words only if they are followed by `:` or `,` or `-`. 1114 | 1115 | ```js 1116 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1117 | 1118 | // add your solution here 1119 | < ['Poke', 'so_good', 'ever2'] 1120 | ``` 1121 | 1122 | **7)** Extract all whole words only if they are preceded by `=` or `/` or `-`. 1123 | 1124 | ```js 1125 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1126 | 1127 | // add your solution here 1128 | < ['so_good', 'is', 'sit'] 1129 | ``` 1130 | 1131 | **8)** Extract all whole words only if they are preceded by `=` or `:` and followed by `:` or `.`. 1132 | 1133 | ```js 1134 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1135 | 1136 | // add your solution here 1137 | < ['so_good', 'ink'] 1138 | ``` 1139 | 1140 | **9)** Extract all whole words only if they are preceded by `=` or `:` or `.` or `(` or `-` and not followed by `.` or `/`. 1141 | 1142 | ```js 1143 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit' 1144 | 1145 | // add your solution here 1146 | < ['so_good', 'vast', 'sit'] 1147 | ``` 1148 | 1149 | **10)** Remove the leading and trailing whitespaces from all the individual fields where `,` is the field separator. 1150 | 1151 | ```js 1152 | > let csv1 = ' comma ,separated ,values \t\r ' 1153 | > let csv2 = 'good bad,nice ice , 42 , , stall small' 1154 | 1155 | > const trim_whitespace = // add your solution here 1156 | 1157 | > csv1.replace(trim_whitespace, '') 1158 | < 'comma,separated,values' 1159 | > csv2.replace(trim_whitespace, '') 1160 | < 'good bad,nice ice,42,,stall small' 1161 | ``` 1162 | 1163 | **11)** Filter elements that satisfy all of these rules: 1164 | 1165 | * should have at least two alphabets 1166 | * should have at least three digits 1167 | * should have at least one special character among `%` or `*` or `#` or `$` 1168 | * should not end with a whitespace character 1169 | 1170 | ```js 1171 | > let pwds = ['hunter2', 'F2h3u%9', '*X3Yz3.14\t', 'r2_d2_42', 'A $B C1234'] 1172 | 1173 | // add your solution here 1174 | < ['F2h3u%9', 'A $B C1234'] 1175 | ``` 1176 | 1177 | **12)** For the given string, surround all whole words with `{}` except for whole words `par` and `cat` and `apple`. 1178 | 1179 | ```js 1180 | > let ip = 'part; cat {super} rest_42 par scatter apple spar' 1181 | 1182 | // add your solution here 1183 | < '{part}; cat {{super}} {rest_42} par {scatter} apple {spar}' 1184 | ``` 1185 | 1186 | **13)** Extract the integer portion of floating-point numbers for the given string. A number ending with `.` and no further digits should not be considered. 1187 | 1188 | ```js 1189 | > let ip = '12 ab32.4 go 5 2. 46.42 5' 1190 | 1191 | // add your solution here 1192 | < ['32', '46'] 1193 | ``` 1194 | 1195 | **14)** For the given input strings, extract all overlapping two character sequences. 1196 | 1197 | ```js 1198 | > let s1 = 'apple' 1199 | > let s2 = '1.2-3:4' 1200 | 1201 | > const pat1 = // add your solution here 1202 | 1203 | // add your solution here for s1 1204 | < ['ap', 'pp', 'pl', 'le'] 1205 | // add your solution here for s2 1206 | < ['1.', '.2', '2-', '-3', '3:', ':4'] 1207 | ``` 1208 | 1209 | **15)** The given input strings contain fields separated by the `:` character. Delete `:` and the last field if there is a digit character anywhere before the last field. 1210 | 1211 | ```js 1212 | > let s1 = '42:cat' 1213 | > let s2 = 'twelve:a2b' 1214 | > let s3 = 'we:be:he:0:a:b:bother' 1215 | > let s4 = 'apple:banana-42:cherry:' 1216 | > let s5 = 'dragon:unicorn:centaur' 1217 | 1218 | > const pat2 = // add your solution here 1219 | 1220 | > s1.replace(pat2, '') 1221 | < '42' 1222 | > s2.replace(pat2, '') 1223 | < 'twelve:a2b' 1224 | > s3.replace(pat2, '') 1225 | < 'we:be:he:0:a:b' 1226 | > s4.replace(pat2, '') 1227 | < 'apple:banana-42:cherry' 1228 | > s5.replace(pat2, '') 1229 | < 'dragon:unicorn:centaur' 1230 | ``` 1231 | 1232 | **16)** Extract all whole words unless they are preceded by `:` or `<=>` or `----` or `#`. 1233 | 1234 | ```js 1235 | > let ip = '::very--at<=>row|in.a_b#b2c=>lion----east' 1236 | 1237 | // add your solution here 1238 | < ['at', 'in', 'a_b', 'lion'] 1239 | ``` 1240 | 1241 | **17)** Match strings if it contains `qty` followed by `price` but not if there is any whitespace character or the string `error` between them. 1242 | 1243 | ```js 1244 | > let str1 = '23,qty,price,42' 1245 | > let str2 = 'qty price,oh' 1246 | > let str3 = '3.14,qty,6,errors,9,price,3' 1247 | > let str4 = '42\nqty-6,apple-56,price-234,error' 1248 | > let str5 = '4,price,3.14,qty,4' 1249 | > let str6 = '(qtyprice) (hi-there)' 1250 | 1251 | > const neg = // add your solution here 1252 | 1253 | > neg.test(str1) 1254 | < true 1255 | > neg.test(str2) 1256 | < false 1257 | > neg.test(str3) 1258 | < false 1259 | > neg.test(str4) 1260 | < true 1261 | > neg.test(str5) 1262 | < false 1263 | > neg.test(str6) 1264 | < true 1265 | ``` 1266 | 1267 | **18)** Can you reason out why the following regular expressions behave differently? 1268 | 1269 | ```js 1270 | > let ip = 'I have 12, he has 2!' 1271 | 1272 | > ip.replace(/\b..\b/g, '{$&}') 1273 | < '{I }have {12}{, }{he} has{ 2}!' 1274 | 1275 | > ip.replace(/(? let w2 = 'Sample123string42with777numbers' 1283 | 1284 | // add your solution here for splitting based on the first occurrence 1285 | < ['Sample', '123', 'string42with777numbers'] 1286 | 1287 | // add your solution here for splitting based on the last occurrence 1288 | < ['Sample123string42with', '777', 'numbers'] 1289 | ``` 1290 | 1291 | **20)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings using the `search()` method. Assume that there will be at least one match for each input string. 1292 | 1293 | ```js 1294 | > let s1 = 'match after the last newline character' 1295 | > let s2 = 'and then you want to test' 1296 | > let s3 = 'this is good bye then' 1297 | > let s4 = 'who was there to see?' 1298 | 1299 | > const pat3 = // add your solution here 1300 | 1301 | > s1.search(pat3) 1302 | < 12 1303 | > s2.search(pat3) 1304 | < 18 1305 | > s3.search(pat3) 1306 | < 17 1307 | > s4.search(pat3) 1308 | < 14 1309 | ``` 1310 | 1311 |
1312 | 1313 | # Unicode 1314 | 1315 | **1)** Check if the given input strings are made up of ASCII characters only. Consider the input to be non-empty strings and any character that isn't part of the 7-bit ASCII set should result in `false`. 1316 | 1317 | ```js 1318 | > let str1 = '123 × 456' 1319 | > let str2 = 'good fοοd' 1320 | > let str3 = 'happy learning!' 1321 | 1322 | > const pat1 = // add your solution here 1323 | 1324 | > pat1.test(str1) 1325 | < false 1326 | > pat1.test(str2) 1327 | < false 1328 | > pat1.test(str3) 1329 | < true 1330 | ``` 1331 | 1332 | **2)** Retain only the punctuation characters for the given string. 1333 | 1334 | ```js 1335 | > let ip = '❨a❩❪1❫❬b❭❮2❯❰c❱❲3❳❴xyz❵⟅123⟆⟦⟧⟨like⟩⟪3.14⟫' 1336 | 1337 | // add your solution here 1338 | < '❨❩❪❫❬❭❮❯❰❱❲❳❴❵⟅⟆⟦⟧⟨⟩⟪.⟫' 1339 | ``` 1340 | 1341 | **3)** Is the following code snippet showing the correct output? 1342 | 1343 | ```js 1344 | > 'fox:αλεπού'.match(/\w+/g) 1345 | < ['fox'] 1346 | ``` 1347 | 1348 | **4)** Name the set operations enabled by the `v` flag. 1349 | 1350 | **5)** Extract all whole words from the given strings. However, do not match words if they contain any character present in the `ignore` variable. 1351 | 1352 | ```js 1353 | > let s1 = 'match after the last new_line character A2' 1354 | > let s2 = 'and then you want to test' 1355 | 1356 | > let ignore = 'aty' 1357 | > const ign1 = // add your solution here 1358 | > s1.match(ign1) 1359 | < ['new_line', 'A2'] 1360 | > s2.match(ign1) 1361 | < null 1362 | 1363 | > let ignore = 'esw' 1364 | // should be the same solution used above 1365 | > const ign2 = // add your solution here 1366 | > s1.match(ign2) 1367 | < ['match', 'A2'] 1368 | > s2.match(ign2) 1369 | < ['and', 'you', 'to'] 1370 | ``` 1371 | 1372 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Sundeep Agarwal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Understanding JavaScript RegExp 2 | 3 | Learn JavaScript Regular Expressions step-by-step from beginner to advanced levels with hundreds of examples and exercises. Visit https://youtu.be/8X-hUel3GxM for a short video about the book. 4 | 5 |

Understanding JavaScript RegExp ebook cover image

6 | 7 | The book also includes exercises to test your understanding, which are presented together as a single file in this repo — [Exercises.md](./Exercises.md). 8 | 9 | For solutions to the exercises, see [Exercise_solutions.md](./Exercise_solutions.md). 10 | 11 | See [Version_changes.md](./Version_changes.md) to keep track of changes made to the book. 12 | 13 |
14 | 15 | # E-book 16 | 17 | * You can purchase the pdf/epub versions of the book using these links: 18 | * https://learnbyexample.gumroad.com/l/js_regexp 19 | * https://leanpub.com/js_regexp 20 | * You can also get the book as part of these bundles: 21 | * **All books bundle** bundle from https://learnbyexample.gumroad.com/l/all-books 22 | * Includes all my programming books 23 | * **Awesome Regex** bundle from https://learnbyexample.gumroad.com/l/regex or https://leanpub.com/b/regex 24 | * See https://learnbyexample.github.io/books/ for a list of other books 25 | 26 | For a preview of the book, see [sample chapters](./sample_chapters/js_regexp_sample.pdf). 27 | 28 | The book can also be [viewed as a single markdown file in this repo](./js_regexp.md). See my blogpost on [generating pdfs from markdown using pandoc](https://learnbyexample.github.io/customizing-pandoc/) if you are interested in the ebook creation process. 29 | 30 | For the web version of the book, visit https://learnbyexample.github.io/learn_js_regexp/ 31 | 32 |
33 | 34 | # Testimonials 35 | 36 | >Literally was having a mini-breakdown about not understanding Regex in algorithm solutions the other day and now I'm feeling so much better, so thank YOU! I genuinely feel like I'm developing the skill for spotting when and where to use them after so much practice! 37 | > 38 | > — [feedback on twitter](https://twitter.com/codingwithlucy/status/1450668315635036160) 39 | 40 |
41 | 42 | # Feedback 43 | 44 | ⚠️ ⚠️ Please DO NOT submit pull requests. Main reason being any modification requires changes in multiple places. 45 | 46 | I would highly appreciate it if you'd let me know how you felt about this book. It could be anything from a simple thank you, pointing out a typo, mistakes in code snippets, which aspects of the book worked for you (or didn't!) and so on. Reader feedback is essential and especially so for self-published authors. 47 | 48 | You can reach me via: 49 | 50 | * Issue Manager: [https://github.com/learnbyexample/learn_js_regexp/issues](https://github.com/learnbyexample/learn_js_regexp/issues) 51 | * E-mail: `echo 'bGVhcm5ieWV4YW1wbGUubmV0QGdtYWlsLmNvbQo=' | base64 --decode` 52 | * Twitter: [https://twitter.com/learn_byexample](https://twitter.com/learn_byexample) 53 | 54 |
55 | 56 | # Table of Contents 57 | 58 | 1. Preface 59 | 2. Why is it needed? 60 | 3. RegExp introduction 61 | 4. Anchors 62 | 5. Alternation and Grouping 63 | 6. Escaping metacharacters 64 | 7. Dot metacharacter and Quantifiers 65 | 8. Interlude: Tools for debugging and visualization 66 | 9. Working with matched portions 67 | 10. Character class 68 | 11. Groupings and backreferences 69 | 12. Interlude: Common tasks 70 | 13. Lookarounds 71 | 14. Unicode 72 | 15. Further Reading 73 | 74 |
75 | 76 | # Acknowledgements 77 | 78 | * [MDN: Regular Expressions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions) — documentation and examples 79 | * [/r/learnjavascript/](https://old.reddit.com/r/learnjavascript/) and [/r/regex/](https://old.reddit.com/r/regex/) — helpful forums for beginners and experienced programmers alike 80 | * [stackoverflow](https://stackoverflow.com/) — for getting answers to pertinent questions on JavaScript and regular expressions 81 | * [tex.stackexchange](https://tex.stackexchange.com/) — for help on [pandoc](https://github.com/jgm/pandoc/) and `tex` related questions 82 | * [canva](https://www.canva.com/) — cover image 83 | * [Warning](https://commons.wikimedia.org/wiki/File:Warning_icon.svg) and [Info](https://commons.wikimedia.org/wiki/File:Info_icon_002.svg) icons by [Amada44](https://commons.wikimedia.org/wiki/User:Amada44) under public domain 84 | * [oxipng](https://github.com/shssoichiro/oxipng), [pngquant](https://pngquant.org/) and [svgcleaner](https://github.com/RazrFalcon/svgcleaner) — optimizing images 85 | * [mdBook](https://github.com/rust-lang/mdBook) — for web version of the book 86 | * [mdBook-pagetoc](https://github.com/JorelAli/mdBook-pagetoc) — for adding table of contents for each chapter 87 | * [minify-html](https://github.com/wilsonzlin/minify-html) — for minifying html files 88 | 89 |
90 | 91 | # License 92 | 93 | The book is licensed under a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/). 94 | 95 | The code snippets are licensed under MIT, see [LICENSE](./LICENSE) file. 96 | 97 | -------------------------------------------------------------------------------- /Version_changes.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | ### 2.0 4 | 5 | * Examples and exercises added for `d` and `v` flags 6 | * Strings in code snippets changed to be uniformly represented in single quotes 7 | * In general, many of the examples, exercises, solutions, descriptions and external links were updated/corrected 8 | * Updated Acknowledgements section 9 | * Code snippets related to info/warning sections will now appear as a single block 10 | * Book title changed to **Understanding JavaScript RegExp** 11 | * New cover image 12 | * Images centered for EPUB format 13 | 14 |
15 | 16 | ### 1.6 17 | 18 | * Code snippets checked to work with Chrome/Chromium console version 89+ 19 | * Updated `escapeRegExp` function as per [MDN: Regular Expressions doc](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions) 20 | * Typo and miscellaneous changes 21 | 22 |
23 | 24 | ### 1.5 25 | 26 | * Added **epub** version of the book 27 | * Added plenty of new exercises, perhaps too many 28 | * Added solutions for the exercises to the repo 29 | * Added two interlude chapters to highlight external resources 30 | * Added separate section about escape sequences 31 | * Updated and clarified descriptions for many concepts, typo corrections and miscellaneous changes, too many changes to list individually 32 | 33 |
34 | 35 | ### 1.0 36 | 37 | * First version 38 | 39 | -------------------------------------------------------------------------------- /code_snippets/Alternation_and_Grouping.js: -------------------------------------------------------------------------------- 1 | // Alternation 2 | 3 | const pets = /cat|dog/ 4 | 5 | pets.test('I like cats') 6 | 7 | pets.test('I like dogs') 8 | 9 | pets.test('I like parrots') 10 | 11 | 'catapults concatenate cat scat cater'.replace(/^cat|cat\b/g, 'X') 12 | 13 | 'cat dog bee parrot fox'.replace(/cat|dog|fox/g, 'mammal') 14 | 15 | // Grouping 16 | 17 | 'red reform read arrest'.replace(/reform|rest/g, 'X') 18 | 19 | 'red reform read arrest'.replace(/re(form|st)/g, 'X') 20 | 21 | 'par spare part party'.replace(/\bpar\b|\bpart\b/g, 'X') 22 | 23 | 'par spare part party'.replace(/\b(par|part)\b/g, 'X') 24 | 25 | 'par spare part party'.replace(/\bpar(|t)\b/g, 'X') 26 | 27 | // Precedence rules 28 | 29 | let words = 'lion elephant are rope not' 30 | 31 | words.replace(/on|ant/, 'X') 32 | 33 | words.replace(/ant|on/, 'X') 34 | 35 | let mood = 'best years' 36 | 37 | mood.replace(/year|years/, 'X') 38 | 39 | mood.replace(/years|year/, 'X') 40 | 41 | let sample = 'ear xerox at mare part learn eye' 42 | 43 | sample.replace(/ar|are|art/g, 'X') 44 | 45 | sample.replace(/are|ar|art/g, 'X') 46 | 47 | sample.replace(/are|art|ar/g, 'X') 48 | 49 | -------------------------------------------------------------------------------- /code_snippets/Anchors.js: -------------------------------------------------------------------------------- 1 | // String anchors 2 | 3 | /^cat/.test('cater') 4 | 5 | /^cat/.test('concatenation') 6 | 7 | /^hi/.test('hi hello\ntop spot') 8 | 9 | /^top/.test('hi hello\ntop spot') 10 | 11 | /are$/.test('spare') 12 | 13 | /are$/.test('nearest') 14 | 15 | let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest'] 16 | 17 | words.filter(w => /er$/.test(w)) 18 | 19 | words.filter(w => /t$/.test(w)) 20 | 21 | /^cat$/.test('cat') 22 | 23 | /^cat$/.test('cater') 24 | 25 | 'live'.replace(/^/, 're') 26 | 27 | 'send'.replace(/^/, 're') 28 | 29 | 'cat'.replace(/$/, 'er') 30 | 31 | 'hack'.replace(/$/, 'er') 32 | 33 | // Line anchors 34 | 35 | /^top/m.test('hi hello\ntop spot') 36 | 37 | /er$/m.test('spare\npar\nera\ndare') 38 | 39 | let elements = ['spare\ntool', 'par\n', 'dare', 'spared'] 40 | 41 | elements.filter(e => /are$/m.test(e)) 42 | 43 | /^par$/m.test('spare\npar\nera\ndare') 44 | 45 | let items = 'catapults\nconcatenate\ncat' 46 | 47 | console.log(items.replace(/^/gm, '* ')) 48 | 49 | console.log(items.replace(/$/gm, '.')) 50 | 51 | console.log('1\n2\n'.replace(/^/mg, 'fig ')) 52 | 53 | console.log('1\n2\n'.replace(/$/mg, ' apple')) 54 | 55 | // Word anchors 56 | 57 | let words = 'par spar apparent spare part' 58 | 59 | words.replace(/par/g, 'X') 60 | 61 | words.replace(/\bpar/g, 'X') 62 | 63 | words.replace(/par\b/g, 'X') 64 | 65 | words.replace(/\bpar\b/g, 'X') 66 | 67 | let words = 'par spar apparent spare part' 68 | 69 | console.log(words.replace(/\b/g, '"').replace(/ /g, ',')) 70 | 71 | 'output=num1+35*42/num2'.replace(/\b/g, ' ') 72 | 73 | 'output=num1+35*42/num2'.replace(/\b/g, ' ').trim() 74 | 75 | // Opposite Word Anchor 76 | 77 | let words = 'par spar apparent spare part' 78 | 79 | words.replace(/\Bpar/g, 'X') 80 | 81 | words.replace(/\Bpar\b/g, 'X') 82 | 83 | words.replace(/par\B/g, 'X') 84 | 85 | words.replace(/\Bpar\B/g, 'X') 86 | 87 | 'copper'.replace(/\b/g, ':') 88 | 89 | 'copper'.replace(/\B/g, ':') 90 | 91 | '-----hello-----'.replace(/\b/g, ' ') 92 | 93 | '-----hello-----'.replace(/\B/g, ' ') 94 | 95 | -------------------------------------------------------------------------------- /code_snippets/Character_class.js: -------------------------------------------------------------------------------- 1 | // Custom character sets 2 | 3 | ['cute', 'cat', 'cot', 'coat', 'cost', 'scuttle'].filter(w => /c[ou]t/.test(w)) 4 | 5 | 'meeting cute boat site foot'.replace(/[aeo]+t/g, 'X') 6 | 7 | // Range of characters 8 | 9 | 'Sample123string42with777numbers'.match(/[0-9]+/g) 10 | 11 | 'coat Bin food tar12 best Apple fig_42'.match(/\b[a-z0-9]+\b/g) 12 | 13 | 'coat tin food put stoop best fig_42 Pet'.match(/\b[p-z][a-z]*\b/g) 14 | 15 | 'coat tin food put stoop best fig_42 Pet'.match(/\b[a-fp-t]+\b/g) 16 | 17 | // Negating character sets 18 | 19 | 'Sample123string42with777numbers'.match(/[^0-9]+/g) 20 | 21 | 'apple:123:banana:cherry'.replace(/^([^:]+:){2}/, '') 22 | 23 | 'apple=42; cherry=123'.replace(/=[^=]+$/, '') 24 | 25 | let words = ['tryst', 'fun', 'glyph', 'pity', 'why'] 26 | 27 | words.filter(w => /^[^aeiou]+$/.test(w)) 28 | 29 | words.filter(w => !/[aeiou]/.test(w)) 30 | 31 | // Matching metacharacters literally 32 | 33 | 'ab-cd gh-c 12-423'.match(/\b[a-z-]{2,}\b/g) 34 | 35 | 'ab-cd gh-c 12-423'.match(/\b[a-z\-0-9]{2,}\b/g) 36 | 37 | 'f*(a^b) - 3*(a+b)'.match(/a[+^]b/g) 38 | 39 | 'f*(a^b) - 3*(a+b)'.match(/a[\^+]b/g) 40 | 41 | 'words[5] = tea'.match(/[a-z[\]0-9]+/)[0] 42 | 43 | console.log('5ba\\babc2'.match(/[a\\b]+/)[0]) 44 | 45 | // Escape sequence sets 46 | 47 | 'Sample123string42with777numbers'.split(/\d+/) 48 | 49 | 'sea eat car rat eel tea'.match(/\b\w/g).join('') 50 | 51 | 'tea sea-Pit Sit;(lean_2\tbean_3)'.match(/[\w\s]+/g) 52 | 53 | 'Sample123string42with777numbers'.replace(/\D+/g, '-') 54 | 55 | ' 1..3 \v\f fig_tea 42\tzzz \r\n1-2-3 '.match(/\S+/g) 56 | 57 | // Numeric ranges 58 | 59 | '23 154 12 26 98234'.match(/\b[12]\d\b/g) 60 | 61 | '23 154 12 26 98234'.match(/\b\d{3,}\b/g) 62 | 63 | '0501 035 154 12 26 98234'.match(/\b0*[1-9]\d{2,}\b/g) 64 | 65 | '45 349 651 593 4 204'.match(/\d+/g).filter(n => n < 350) 66 | 67 | '45 349 651 593 4 204'.replace(/\d+/g, m => m < 350 ? 0 : 1) 68 | 69 | '45 349 651 593 4 204'.match(/\d+/g).filter(n => n >= 200 && n <= 650) 70 | 71 | -------------------------------------------------------------------------------- /code_snippets/Dot_metacharacter_and_Quantifiers.js: -------------------------------------------------------------------------------- 1 | // Dot metacharacter 2 | 3 | 'tac tin c.t abc;tuv acute'.replace(/c.t/g, 'X') 4 | 5 | 'breadth markedly reported overrides'.replace(/r..d/g, 'X') 6 | 7 | '42\t35'.replace(/2.3/, '8') 8 | 9 | 'cag̈ed'.replace(/a.e/, 'o') 10 | 11 | 'cag̈ed'.replace(/a..e/, 'o') 12 | 13 | // split() method 14 | 15 | 'apple-85-mango-70'.split(/-/) 16 | 17 | 'apple-85-mango-70'.split(/-/, 2) 18 | 19 | 'bus:3:car:-:van'.split(/:.:/) 20 | 21 | // Greedy quantifiers 22 | 23 | 'far feat flare fear'.replace(/e?ar/g, 'X') 24 | 25 | 'par spare part party'.replace(/\bpart?\b/g, 'X') 26 | 27 | ['red', 'ready', 're;d', 'redo', 'reed'].filter(w => /\bre.?d\b/.test(w)) 28 | 29 | 'par part parrot parent'.replace(/par(ro)?t/g, 'X') 30 | 31 | 'par part parrot parent'.replace(/par(en|ro)?t/g, 'X') 32 | 33 | 'tr tear tare steer sitaara'.replace(/ta*r/g, 'X') 34 | 35 | 'tr tear tare steer sitaara'.replace(/t(e|a)*r/g, 'X') 36 | 37 | '3111111111125111142'.replace(/1*2/g, 'X') 38 | 39 | '3111111111125111142'.split(/1*2/) 40 | 41 | '3111111111125111142'.split(/1*/) 42 | 43 | 'tr tear tare steer sitaara'.replace(/ta+r/g, 'X') 44 | 45 | 'tr tear tare steer sitaara'.replace(/t(e|a)+r/g, 'X') 46 | 47 | '3111111111125111142'.replace(/1+2/g, 'X') 48 | 49 | '3111111111125111142'.split(/1+/) 50 | 51 | let repeats = ['abc', 'ac', 'abbc', 'xabbbcz', 'bc', 'abbbbbc'] 52 | 53 | repeats.filter(w => /ab{1,4}c/.test(w)) 54 | 55 | repeats.filter(w => /ab{0,2}c/.test(w)) 56 | 57 | repeats.filter(w => /ab{3,}c/.test(w)) 58 | 59 | repeats.filter(w => /ab{3}c/.test(w)) 60 | 61 | 'a{5} = 10'.replace(/a\{5}/g, 'a{6}') 62 | 63 | 'report_{a,b}.txt'.replace(/_{a,b}/g, '-{c,d}') 64 | 65 | // AND Conditional 66 | 67 | /Error.*valid/.test('Error: not a valid input') 68 | 69 | /Error.*valid/.test('Error: key not found') 70 | 71 | /cat.*dog|dog.*cat/.test('cat and dog') 72 | 73 | /cat.*dog|dog.*cat/.test('dog and cat') 74 | 75 | let patterns = [/cat/, /dog/] 76 | 77 | patterns.every(p => p.test('cat and dog')) 78 | 79 | patterns.every(p => p.test('dog and cat')) 80 | 81 | // What does greedy mean? 82 | 83 | 'foot'.replace(/f.?o/, 'X') 84 | 85 | console.log('table < fig \\< bat < cake'.replace(/\\? escapeRegExp(w)).join('|') 33 | } 34 | 35 | let w1 = ['c^t', 'dog$', 'f|x'] 36 | 37 | const p1 = new RegExp(unionRegExp(w1), 'g') 38 | 39 | p1 40 | 41 | 'c^t dog$ bee parrot f|x'.replace(p1, 'mammal') 42 | 43 | let w2 = ['hand', 'handy', 'handful'] 44 | 45 | w2.sort((a, b) => b.length - a.length) 46 | 47 | const p2 = new RegExp(`\\b(${unionRegExp(w2)})\\b`, 'g') 48 | 49 | p2 50 | 51 | 'handful handed handy hands hand'.replace(p2, 'X') 52 | 53 | // source and flags properties 54 | 55 | const p3 = /\bpar\b/ 56 | 57 | const p4 = new RegExp(p3.source + '|cat', 'g') 58 | 59 | p4 60 | 61 | console.log(p4.source) 62 | 63 | p4.flags 64 | 65 | 'cater cat concatenate par spare'.replace(p4, 'X') 66 | 67 | // Escaping the delimiter 68 | 69 | let path = '/home/joe/report/sales/ip.txt' 70 | 71 | path.replace(/^\/home\/joe\//, '~/') 72 | 73 | path.replace(new RegExp(`^/home/joe/`), '~/') 74 | 75 | // Escape sequences 76 | 77 | 'a\tb\tc'.replace(/\t/g, ':') 78 | 79 | '1\n2\n3'.replace(/\n/g, ' ') 80 | 81 | new RegExp('123\tabc') 82 | 83 | new RegExp('123\\tabc') 84 | 85 | new RegExp('car\b') 86 | 87 | new RegExp('car\\b') 88 | 89 | /\e/.test('hello') 90 | 91 | 'h e l l o'.replace(/\x20/g, '') 92 | 93 | '12|30'.replace(/2\x7c3/g, '5') 94 | 95 | '12|30'.replace(/2|3/g, '5') 96 | 97 | -------------------------------------------------------------------------------- /code_snippets/Groupings_and_backreferences.js: -------------------------------------------------------------------------------- 1 | // Backreferences 2 | 3 | '[52] apples [and] [31] mangoes'.replace(/\[(\d+)\]/g, '$1') 4 | 5 | '_apple_ __123__ _banana_'.replace(/(_)?_/g, '$1') 6 | 7 | 'good,bad 42,24 x,y'.replace(/(\w+),(\w+)/g, '$2,$1') 8 | 9 | '52 apples and 31 mangoes'.replace(/\d+/g, '($&)') 10 | 11 | 'Hello world'.replace(/.*/, 'Hi. $&. Have a nice day') 12 | 13 | 'fork,42,nice,3.14'.replace(/,.+/, '$&,$`') 14 | 15 | let words = ['moon', 'mono', 'excellent', 'POLL', 'a22b'] 16 | 17 | words.filter(w => /(\w)\1/.test(w)) 18 | 19 | 'aa a a a 42 f_1 f_1 f_13.14'.replace(/\b(\w+)( \1)+\b/g, '$1') 20 | 21 | // Backreference oddities 22 | 23 | 'cat'.replace(/a/, '{$1}') 24 | 25 | 'cat'.replace(/(a)/, '{\$1}') 26 | 27 | 'cat'.replace(/(a)/, '{$$1}') 28 | 29 | '[52] apples and [31] mangoes'.replace(/\[(\d+)\]/g, '($15)') 30 | 31 | '[52] apples and [31] mangoes'.replace(/\[(\d+)\]/g, '$3') 32 | 33 | '[52] apples and [31] mangoes'.replace(/\[\d+\]/g, '$1') 34 | 35 | 'abcdefghijklmn'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)/, '$11') 36 | 37 | 'abcdefghijklmn'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)/, '$1\x31') 38 | 39 | 'abcdefghijklmn'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)/, '$011') 40 | 41 | 'abcdefghijklmna1d'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*\1\x31/, 'X') 42 | 43 | // Non-capturing groups 44 | 45 | 'Sample123string42with777numbers'.split(/\d+/) 46 | 47 | 'Sample123string42with777numbers'.split(/(\d+)/) 48 | 49 | 'effort flee facade oddball rat tool'.match(/\b\w*(\w)\1\w*\b/g) 50 | 51 | 'hi 123123123 bye 456123456'.match(/(123)+/g) 52 | 53 | '123hand42handy777handful500'.split(/hand(y|ful)?/) 54 | 55 | '123hand42handy777handful500'.split(/hand(?:y|ful)?/) 56 | 57 | '1,2,3,4,5,6,7'.replace(/^(([^,]+,){3})([^,]+)/, '$1($3)') 58 | 59 | '1,2,3,4,5,6,7'.replace(/^((?:[^,]+,){3})([^,]+)/, '$1($2)') 60 | 61 | 'so:cat:rest:in:put:to'.replace(/^([^:]+:){4}/, '($1)') 62 | 63 | 'so:cat:rest:in:put:to'.replace(/^((?:[^:]+:){4})/, '($1)') 64 | 65 | // Named capture groups 66 | 67 | let row = 'today,2008-03-24,food,2008-03-24,nice,2018-10-25,5632' 68 | 69 | row.match(/(?\d{4}-\d{2}-\d{2}).*\k/)[0] 70 | 71 | 'good,bad 42,24 x,y'.replace(/(?\w+),(?\w+)/g, '$,$') 72 | 73 | let m = '2018-10-25,car,2346'.match(/(?[^,]+),(?[^,]+)/) 74 | 75 | m.groups 76 | 77 | m.groups.date 78 | 79 | m.groups.product 80 | 81 | -------------------------------------------------------------------------------- /code_snippets/Interlude_Common_tasks.js: -------------------------------------------------------------------------------- 1 | // CommonRegexJS 2 | 3 | let data = 'hello 255.21.255.22 okay' 4 | 5 | const comm = new CommonRegex(data) 6 | 7 | comm.IPv4 8 | 9 | let new_data = '23.14.2.4.2 255.21.255.22 567.12.2.1' 10 | 11 | const ip = new CommonRegex(new_data) 12 | 13 | ip.IPv4 14 | 15 | -------------------------------------------------------------------------------- /code_snippets/Lookarounds.js: -------------------------------------------------------------------------------- 1 | // Conditional expressions 2 | 3 | let items = ['1,2,3,4', 'a,b,c,d', '#apple 123'] 4 | 5 | items.filter(s => /\d/.test(s) && s.includes('#')) 6 | 7 | items.filter(s => s[0] != '#').map(s => s.replace(/,.+,/, ' ')) 8 | 9 | // Negative lookarounds 10 | 11 | 'hey cats! cat42 cat_5 catcat'.replace(/cat(?!\d)/g, 'dog') 12 | 13 | 'cat _cat 42catcat'.replace(/(? /(?=.*b)(?=.*e).*t/.test(w)) 60 | 61 | words.filter(w => /(?=.*a)(?=.*e)(?=.*i)(?=.*o).*u/.test(w)) 62 | 63 | words.filter(w => /(?!.*n$)(?=.*a[bt]).*q/.test(w)) 64 | 65 | // Variable length lookbehind 66 | 67 | '=314not :,2irk ,:3cool =42,error'.match(/(?<=[:=]\d+)[a-z]+/g) 68 | 69 | 'cat scatter cater scat'.replace(/(?<=(cat.*?){2})cat/, 'X') 70 | 71 | /(? /tt/.test(w)) 26 | 27 | words.every(w => /at/.test(w)) 28 | 29 | words.some(w => /stat/.test(w)) 30 | 31 | // Flags 32 | 33 | /cat/.test('CaT') 34 | 35 | /cat/i.test('CaT') 36 | 37 | ['Cat', 'cot', 'CATER', 'SCat', 'ScUtTLe'].filter(w => /cat/i.test(w)) 38 | 39 | // RegExp constructor and reuse 40 | 41 | const pet = /dog/ 42 | 43 | pet.test('They bought a dog') 44 | 45 | pet.test('A cat crossed their path') 46 | 47 | const pat = new RegExp('dog') 48 | 49 | pat 50 | 51 | new RegExp('dog', 'i') 52 | 53 | let greeting = 'hi' 54 | 55 | const pat1 = new RegExp(`${greeting} there`) 56 | 57 | pat1 58 | 59 | new RegExp(`${greeting.toUpperCase()} there`) 60 | 61 | // replace() method 62 | 63 | '1,2,3,4'.replace(/,/, '-') 64 | 65 | '1,2,3,4'.replace(/,/g, '-') 66 | 67 | 'cArT PART tart mArt'.replace(/art/ig, '2') 68 | 69 | let word = 'cater' 70 | 71 | word.replace(/cat/, 'hack') 72 | 73 | word 74 | 75 | word = word.replace(/cat/, 'hack') 76 | 77 | word 78 | 79 | -------------------------------------------------------------------------------- /code_snippets/Unicode.js: -------------------------------------------------------------------------------- 1 | // Unicode character sets and the u flag 2 | 3 | 'fox:αλεπού,eagle:αετός'.match(/\p{L}+/gu) 4 | 5 | 'fox:αλεπού,eagle:αετός'.match(/\p{sc=Greek}+/gu) 6 | 7 | 'φοο12,βτ_4,fig'.replace(/\P{L}+/gu, '') 8 | 9 | 'tie. ink east;'.match(/(? c.codePointAt().toString(16)) 26 | 27 | '\u{3b1}' 28 | 29 | 'fox:αλεπού,eagle:αετός'.match(/[\u{61}-\u{7a}]+/gu) 30 | 31 | -------------------------------------------------------------------------------- /code_snippets/Working_with_matched_portions.js: -------------------------------------------------------------------------------- 1 | // match() method 2 | 3 | 'too soon a song snatch'.match(/so+n/) 4 | 5 | 'too soon a song snatch'.match(/so+n/)[0] 6 | 7 | 'too soon a song snatch'.match('so+n') 8 | 9 | let s1 = 'cat and dog' 10 | 11 | s1.match(/dog/).index 12 | 13 | s1.match(/dog/).input 14 | 15 | s1.match(/xyz/) 16 | 17 | // search() method 18 | 19 | 'cat and dog'.search(/dog/) 20 | 21 | 'cat and dog'.search(/xyz/) 22 | 23 | // Capture groups 24 | 25 | let motivation = 'improve yourself.' 26 | 27 | motivation.match(/pr.*our/) 28 | 29 | motivation.match(/pr.*our/)[0] 30 | 31 | let purchase = 'coffee:100g tea:250g sugar:75g chocolate:50g' 32 | 33 | let m = purchase.match(/:(.*?)g.*?:(.*?)g.*?chocolate:(.*?)g/) 34 | 35 | m 36 | 37 | m[1] 38 | 39 | m[3] 40 | 41 | // d flag 42 | 43 | 'awesome'.match(/so/d) 44 | 45 | 'awesome'.match(/so/d).indices[0] 46 | 47 | 'coffee:100g tea:250g'.match(/:(.*?)g/d) 48 | 49 | 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[0] 50 | 51 | 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[1] 52 | 53 | // Getting all the matched portions 54 | 55 | 'too soon a song snatch'.match(/so*n/g) 56 | 57 | 'too soon a song snatch'.match(/so+n/g) 58 | 59 | 'PAR spar apparent SpArE part pare'.match(/\bs?pare?\b/ig) 60 | 61 | 'par spar apparent spare part'.match(/\bs?par(e|t)\b/g) 62 | 63 | 'green:3.14:teal::brown:oh!:blue'.match(/:.*:/g) 64 | 65 | 'green:3.14:teal::brown:oh!:blue'.match(/:.*?:/g) 66 | 67 | // matchAll() method 68 | 69 | 'song too soon snatch'.matchAll(/so*n/g) 70 | 71 | let arr = [...'song too soon snatch'.matchAll(/so*n/g)] 72 | 73 | arr 74 | 75 | arr[0] 76 | 77 | arr[1].index 78 | 79 | Array.from('song too soon snatch'.matchAll(/so*n/g), m => m[0]) 80 | 81 | Array.from('song too soon snatch'.matchAll(/so*n/g), m => m.index) 82 | 83 | Array.from('2023/04,1986/Mar,'.matchAll(/(.*?)\/(.*?),/g), m => m.slice(1)) 84 | 85 | // split() with capture groups 86 | 87 | '31111111111251111426'.split(/1*4?2/) 88 | 89 | '31111111111251111426'.split(/(1*4?2)/) 90 | 91 | '31111111111251111426'.split(/(1*)4?2/) 92 | 93 | '3.14aabccc42'.split(/(a+)b+(c+)/) 94 | 95 | '31111111111251111426'.split(/(1*)(4)?2/) 96 | 97 | '3.14aabccc42abc88'.split(/(a+b+c+)(.*)/, 3) 98 | 99 | // Using functions in the replacement section 100 | 101 | function titleCase(m) { 102 | return m[0].toUpperCase() + m.substr(1).toLowerCase() 103 | } 104 | 105 | 'aBc ac ADC aBbBC'.replace(/a.*?c/ig, titleCase) 106 | 107 | 'abc ac adc abbbc'.replace(/ab*c/g, m => m.toUpperCase()) 108 | 109 | '1 42 317'.replace(/\d+/g, m => m*2) 110 | 111 | function titleCase(m, g1, g2) { 112 | return g1.toUpperCase() + g2.toLowerCase() 113 | } 114 | 115 | 'aBc ac ADC aBbBC'.replace(/(a)(.*?c)/ig, titleCase) 116 | 117 | // Using dictionary in the replacement section 118 | 119 | let h = { '1': 'one', '2': 'two', '4': 'four' } 120 | 121 | '9234012'.replace(/1|2|4/g, k => h[k]) 122 | 123 | '9234012'.replace(/\d/g, k => k in h ? h[k] : 'X') 124 | 125 | let swap = { 'cat': 'tiger', 'tiger': 'cat' } 126 | 127 | 'cat tiger dog tiger cat'.replace(/cat|tiger/g, k => swap[k]) 128 | 129 | let d = { 'hand': 1, 'handy': 2, 'handful': 3, 'a^b': 4 } 130 | 131 | const p = unionRegExp(Object.keys(d).sort((a, b) => b.length - a.length)) 132 | 133 | console.log(p) 134 | 135 | 'handful hand pin handy (a^b)'.replace(new RegExp(p, 'g'), k => d[k]) 136 | 137 | -------------------------------------------------------------------------------- /images/backslash_in_RegExp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/backslash_in_RegExp.png -------------------------------------------------------------------------------- /images/find_replace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/find_replace.png -------------------------------------------------------------------------------- /images/info.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/js_regexp_ls.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/js_regexp_ls.png -------------------------------------------------------------------------------- /images/password_check.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/password_check.png -------------------------------------------------------------------------------- /images/regex101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/regex101.png -------------------------------------------------------------------------------- /images/regulex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/regulex.png -------------------------------------------------------------------------------- /images/v_flag_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/v_flag_examples.png -------------------------------------------------------------------------------- /images/warning.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sample_chapters/js_regexp_sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/sample_chapters/js_regexp_sample.pdf --------------------------------------------------------------------------------