├── Exercise_solutions.md
├── Exercises.md
├── LICENSE
├── README.md
├── Version_changes.md
├── code_snippets
├── Alternation_and_Grouping.js
├── Anchors.js
├── Character_class.js
├── Dot_metacharacter_and_Quantifiers.js
├── Escaping_metacharacters.js
├── Groupings_and_backreferences.js
├── Interlude_Common_tasks.js
├── Lookarounds.js
├── RegExp_introduction.js
├── Unicode.js
└── Working_with_matched_portions.js
├── images
├── backslash_in_RegExp.png
├── find_replace.png
├── info.svg
├── js_regexp_ls.png
├── password_check.png
├── regex101.png
├── regulex.png
├── v_flag_examples.png
└── warning.svg
├── js_regexp.md
└── sample_chapters
└── js_regexp_sample.pdf
/Exercise_solutions.md:
--------------------------------------------------------------------------------
1 | # Exercise solutions
2 |
3 | > Try to solve the exercises in every chapter using only the features discussed until that chapter. Some of the exercises will be easier to solve with techniques presented in the later chapters, but the aim of these exercises is to explore the features presented so far.
4 |
5 |
6 |
7 | # RegExp introduction
8 |
9 | **1)** Check if the given input strings contain `two` irrespective of case.
10 |
11 | ```js
12 | > let s1 = 'Their artwork is exceptional'
13 | > let s2 = 'one plus tw0 is not three'
14 | > let s3 = 'TRUSTWORTHY'
15 |
16 | > const pat1 = /two/i
17 |
18 | > pat1.test(s1)
19 | < true
20 | > pat1.test(s2)
21 | < false
22 | > pat1.test(s3)
23 | < true
24 | ```
25 |
26 | **2)** For the given array, filter all elements that do *not* contain `e`.
27 |
28 | ```js
29 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner']
30 |
31 | > items.filter(w => !/e/.test(w))
32 | < ['goal', 'sit']
33 | ```
34 |
35 | **3)** Replace only the first occurrence of `5` with `five` for the given string.
36 |
37 | ```js
38 | > let ip = 'They ate 5 apples and 5 oranges'
39 |
40 | > ip.replace(/5/, 'five')
41 | < 'They ate five apples and 5 oranges'
42 | ```
43 |
44 | **4)** Replace all occurrences of `5` with `five` for the given string.
45 |
46 | ```js
47 | > let ip = 'They ate 5 apples and 5 oranges'
48 |
49 | > ip.replace(/5/g, 'five')
50 | < 'They ate five apples and five oranges'
51 | ```
52 |
53 | **5)** Replace all occurrences of `note` irrespective of case with `X`.
54 |
55 | ```js
56 | > let ip = 'This note should not be NoTeD'
57 |
58 | > ip.replace(/note/ig, 'X')
59 | < 'This X should not be XD'
60 | ```
61 |
62 | **6)** For the given multiline input string, filter all lines NOT containing the string `2`.
63 |
64 | ```js
65 | > let purchases = `items qty
66 | apple 24
67 | mango 50
68 | guava 42
69 | onion 31
70 | water 10`
71 |
72 | > const num = /2/
73 |
74 | > console.log(purchases.split('\n')
75 | .filter(e => !num.test(e))
76 | .join('\n'))
77 | < items qty
78 | mango 50
79 | onion 31
80 | water 10
81 | ```
82 |
83 | > You'd be able to solve this using just the `replace()` method by the end of the [Dot metacharacter and Quantifiers](#dot-metacharacter-and-quantifiers) chapter.
84 |
85 | **7)** For the given array, filter all elements that contain either `a` or `w`.
86 |
87 | ```js
88 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner']
89 |
90 | > items.filter(w => /a/.test(w) || /w/.test(w))
91 | < ['goal', 'new', 'eat']
92 | ```
93 |
94 | **8)** For the given array, filter all elements that contain both `e` and `n`.
95 |
96 | ```js
97 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner']
98 |
99 | > items.filter(w => /e/.test(w) && /n/.test(w))
100 | < ['new', 'dinner']
101 | ```
102 |
103 | **9)** For the given string, replace `0xA0` with `0x7F` and `0xC0` with `0x1F`.
104 |
105 | ```js
106 | > let ip = 'start address: 0xA0, func1 address: 0xC0'
107 |
108 | > ip.replace(/0xA0/, '0x7F').replace(/0xC0/, '0x1F')
109 | < 'start address: 0x7F, func1 address: 0x1F'
110 | ```
111 |
112 |
113 |
114 | # Anchors
115 |
116 | **1)** Check if the given input strings contain `is` or `the` as whole words.
117 |
118 | ```js
119 | > let str1 = 'is; (this)'
120 | > let str2 = "The food isn't good"
121 | > let str3 = 'the2 cats'
122 | > let str4 = 'switch on the light'
123 |
124 | > const pat1 = /\bis\b/
125 | > const pat2 = /\bthe\b/
126 |
127 | > pat1.test(str1) || pat2.test(str1)
128 | < true
129 | > pat1.test(str2) || pat2.test(str2)
130 | < false
131 | > pat1.test(str3) || pat2.test(str3)
132 | < false
133 | > pat1.test(str4) || pat2.test(str4)
134 | < true
135 | ```
136 |
137 | **2)** For the given input string, change only the whole word `red` to `brown`.
138 |
139 | ```js
140 | > let ip = 'bred red spread credible red;'
141 |
142 | > ip.replace(/\bred\b/g, 'brown')
143 | < 'bred brown spread credible brown;'
144 | ```
145 |
146 | **3)** For the given array, filter all elements that contain `42` surrounded by word characters.
147 |
148 | ```js
149 | > let items = ['hi42bye', 'nice1423', 'bad42', 'cool_42a', 'fake4b']
150 |
151 | > items.filter(e => /\B42\B/.test(e))
152 | < ['hi42bye', 'nice1423', 'cool_42a']
153 | ```
154 |
155 | **4)** For the given input array, filter all elements that start with `den` or end with `ly`.
156 |
157 | ```js
158 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent']
159 |
160 | > items.filter(e => /^den/.test(e) || /ly$/.test(e))
161 | < ['lovely', '2 lonely', 'dent']
162 | ```
163 |
164 | **5)** For the given input string, change whole word `mall` to `1234` only if it is at the start of a line.
165 |
166 | ```js
167 | > let para = `(mall) call ball pall
168 | ball fall wall tall
169 | mall call ball pall
170 | wall mall ball fall
171 | mallet wallet malls
172 | mall:call:ball:pall`
173 |
174 | > console.log(para.replace(/^mall\b/gm, '1234'))
175 | < (mall) call ball pall
176 | ball fall wall tall
177 | 1234 call ball pall
178 | wall mall ball fall
179 | mallet wallet malls
180 | 1234:call:ball:pall
181 | ```
182 |
183 | **6)** For the given array, filter all elements having a line starting with `den` or ending with `ly`.
184 |
185 | ```js
186 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent']
187 |
188 | > items.filter(e => /^den/m.test(e) || /ly$/m.test(e))
189 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent']
190 | ```
191 |
192 | **7)** For the given input array, filter all whole elements `12\nthree` irrespective of case.
193 |
194 | ```js
195 | > let items = ['12\nthree\n', '12\nThree', '12\nthree\n4', '12\nthree']
196 |
197 | > items.filter(e => /^12\nthree$/i.test(e))
198 | < ['12\nThree', '12\nthree']
199 | ```
200 |
201 | **8)** For the given input array, replace `hand` with `X` for all elements that start with `hand` followed by at least one word character.
202 |
203 | ```js
204 | > let items = ['handed', 'hand', 'handy', 'un-handed', 'handle', 'hand-2']
205 |
206 | > items.map(w => w.replace(/^hand\B/, 'X'))
207 | < ['Xed', 'hand', 'Xy', 'un-handed', 'Xle', 'hand-2']
208 | ```
209 |
210 | **9)** For the given input array, filter all elements starting with `h`. Additionally, replace `e` with `X` for these filtered elements.
211 |
212 | ```js
213 | > let items = ['handed', 'hand', 'handy', 'unhanded', 'handle', 'hand-2']
214 |
215 | > items.filter(w => /^h/.test(w)).map(w => w.replace(/e/g, 'X'))
216 | < ['handXd', 'hand', 'handy', 'handlX', 'hand-2']
217 | ```
218 |
219 | **10)** Why does the following code show `false` instead of `true`?
220 |
221 | Because `$` matches only the end of string. You'll have to use the `m` flag to enable matching at the end of line separators. Some regular expression engines do allow `$` to match just before `\n` if it is the last character in the string, but not JavaScript.
222 |
223 | ```js
224 | > /end$/.test('bend it\nand send\n')
225 | < false
226 |
227 | > /end$/m.test('bend it\nand send\n')
228 | < true
229 | ```
230 |
231 |
232 |
233 | # Alternation and Grouping
234 |
235 | **1)** For the given input array, filter all elements that start with `den` or end with `ly`.
236 |
237 | ```js
238 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent']
239 |
240 | > items.filter(e => /^den|ly$/.test(e))
241 | < ['lovely', '2 lonely', 'dent']
242 | ```
243 |
244 | **2)** For the given array, filter all elements having a line starting with `den` or ending with `ly`.
245 |
246 | ```js
247 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent']
248 |
249 | > items.filter(e => /^den|ly$/m.test(e))
250 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent']
251 | ```
252 |
253 | **3)** For the given input strings, replace all occurrences of `removed` or `reed` or `received` or `refused` with `X`.
254 |
255 | ```js
256 | > let s1 = 'creed refuse removed read'
257 | > let s2 = 'refused reed redo received'
258 |
259 | > const pat1 = /re(mov|ceiv|fus|)ed/g
260 |
261 | > s1.replace(pat1, 'X')
262 | < 'cX refuse X read'
263 | > s2.replace(pat1, 'X')
264 | < 'X X redo X'
265 | ```
266 |
267 | **4)** For the given input strings, replace `late` or `later` or `slated` with `A`.
268 |
269 | ```js
270 | > let str1 = 'plate full of slate'
271 | > let str2 = "slated for later, don't be late"
272 |
273 | > const pat2 = /slated|late(r|)/g
274 |
275 | > str1.replace(pat2, 'A')
276 | < 'pA full of sA'
277 | > str2.replace(pat2, 'A')
278 | < "A for A, don't be A"
279 | ```
280 |
281 |
282 |
283 | # Escaping metacharacters
284 |
285 | **1)** Transform the given input strings to the expected output using the same logic on both strings.
286 |
287 | ```js
288 | > let str1 = '(9-2)*5+qty/3-(9-2)*7'
289 | > let str2 = '(qty+4)/2-(9-2)*5+pq/4'
290 |
291 | > const pat1 = /\(9-2\)\*5/g
292 | > str1.replace(pat1, '35')
293 | < '35+qty/3-(9-2)*7'
294 | > str2.replace(pat1, '35')
295 | < '(qty+4)/2-35+pq/4'
296 | ```
297 |
298 | **2)** Replace `(4)\|` with `2` only at the start or end of the given input strings.
299 |
300 | ```js
301 | > let s1 = '2.3/(4)\\|6 fig 5.3-(4)\\|'
302 | > let s2 = '(4)\\|42 - (4)\\|3'
303 | > let s3 = 'two - (4)\\|\n'
304 |
305 | > const pat2 = /^\(4\)\\\||\(4\)\\\|$/g
306 |
307 | > s1.replace(pat2, '2')
308 | < '2.3/(4)\\|6 fig 5.3-2'
309 | > s2.replace(pat2, '2')
310 | < '242 - (4)\\|3'
311 | > s3.replace(pat2, '2')
312 | < 'two - (4)\\|\n'
313 | ```
314 |
315 | **3)** Replace any matching element from the array `items` with `X` for given the input strings. Match the elements from `items` literally. Assume no two elements of `items` will result in any matching conflict.
316 |
317 | ```js
318 | > let items = ['a.b', '3+n', 'x\\y\\z', 'qty||price', '{n}']
319 |
320 | > function escapeRegExp(string) {
321 | return string.replace(/[.*+\-?^${}()|[\]\\]/g, '\\$&')
322 | }
323 |
324 | > function unionRegExp(arr) {
325 | return arr.map(w => escapeRegExp(w)).join('|')
326 | }
327 |
328 | > const pat3 = new RegExp(unionRegExp(items), 'g')
329 |
330 | > '0a.bcd'.replace(pat3, 'X')
331 | < '0Xcd'
332 | > 'E{n}AMPLE'.replace(pat3, 'X')
333 | < 'EXAMPLE'
334 | > '43+n2 ax\\y\\ze'.replace(pat3, 'X')
335 | < '4X2 aXe'
336 | ```
337 |
338 | **4)** Replace the backspace character `\b` with a single space character for the given input string.
339 |
340 | ```js
341 | > let ip = '123\b456'
342 |
343 | > ip.replace(/\x08/, ' ')
344 | < '123 456'
345 | ```
346 |
347 | **5)** Replace all occurrences of `\e` with `e`.
348 |
349 | ```js
350 | > let ip = 'th\\er\\e ar\\e common asp\\ects among th\\e alt\\ernations'
351 |
352 | > ip.replace(/\\e/g, 'e')
353 | < 'there are common aspects among the alternations'
354 | ```
355 |
356 | **6)** Replace any matching item from the array `eqns` with `X` for given the string `ip`. Match the items from `eqns` literally.
357 |
358 | ```js
359 | > let ip = '3-(a^b)+2*(a^b)-(a/b)+3'
360 | > let eqns = ['(a^b)', '(a/b)', '(a^b)+2']
361 |
362 | // note that '/' is also escaped here
363 | > function escapeRegExp(string) {
364 | return string.replace(/[.*+\-?^${}()|[\]\\\/]/g, '\\$&')
365 | }
366 |
367 | > function unionRegExp(arr) {
368 | return arr.map(w => escapeRegExp(w)).join('|')
369 | }
370 |
371 | > eqns.sort((a, b) => b.length - a.length)
372 | < ['(a^b)+2', '(a^b)', '(a/b)']
373 |
374 | > const pat4 = new RegExp(unionRegExp(eqns), 'g')
375 | > pat4
376 | < /\(a\^b\)\+2|\(a\^b\)|\(a\/b\)/g
377 |
378 | > ip.replace(pat4, 'X')
379 | < '3-X*X-X+3'
380 | ```
381 |
382 |
383 |
384 | # Dot metacharacter and Quantifiers
385 |
386 | > Use `s` flag for these exercises depending upon the contents of the input strings.
387 |
388 | **1)** Replace `42//5` or `42/5` with `8` for the given input.
389 |
390 | ```js
391 | > let ip = 'a+42//5-c pressure*3+42/5-14256'
392 |
393 | > ip.replace(/42\/\/?5/g, '8')
394 | < 'a+8-c pressure*3+8-14256'
395 | ```
396 |
397 | **2)** For the array `items`, filter all elements starting with `hand` and ending immediately with at most one more character or `le`.
398 |
399 | ```js
400 | > let items = ['handed', 'hand', 'handled', 'handy', 'unhand', 'hands', 'handle']
401 |
402 | > items.filter(w => /^hand(.|le)?$/.test(w))
403 | < ['hand', 'handy', 'hands', 'handle']
404 | ```
405 |
406 | **3)** Use the `split()` method to get the output as shown for the given input strings.
407 |
408 | ```js
409 | > let eqn1 = 'a+42//5-c'
410 | > let eqn2 = 'pressure*3+42/5-14256'
411 | > let eqn3 = 'r*42-5/3+42///5-42/53+a'
412 |
413 | > const pat1 = new RegExp(`42//?5`)
414 |
415 | > eqn1.split(pat1)
416 | < ['a+', '-c']
417 | > eqn2.split(pat1)
418 | < ['pressure*3+', '-14256']
419 | > eqn3.split(pat1)
420 | < ['r*42-5/3+42///5-', '3+a']
421 | ```
422 |
423 | **4)** For the given input strings, remove everything from the first occurrence of `i` till the end of the string.
424 |
425 | ```js
426 | > let s1 = 'remove the special meaning of such constructs'
427 | > let s2 = 'characters while constructing'
428 | > let s3 = 'input output'
429 |
430 | > const pat2 = /i.*/
431 |
432 | > s1.replace(pat2, '')
433 | < 'remove the spec'
434 | > s2.replace(pat2, '')
435 | < 'characters wh'
436 | > s3.replace(pat2, '')
437 | < ''
438 | ```
439 |
440 | **5)** For the given strings, construct a regexp to get the output as shown.
441 |
442 | ```js
443 | > let str1 = 'a+b(addition)'
444 | > let str2 = 'a/b(division) + c%d(#modulo)'
445 | > let str3 = 'Hi there(greeting). Nice day(a(b)'
446 |
447 | > const remove_parentheses = /\(.*?\)/g
448 |
449 | > str1.replace(remove_parentheses, '')
450 | < 'a+b'
451 | > str2.replace(remove_parentheses, '')
452 | < 'a/b + c%d'
453 | > str3.replace(remove_parentheses, '')
454 | < 'Hi there. Nice day'
455 | ```
456 |
457 | **6)** Correct the given regexp to get the expected output.
458 |
459 | ```js
460 | > let words = 'plink incoming tint winter in caution sentient'
461 |
462 | // wrong output
463 | > const w1 = /int|in|ion|ing|inco|inter|ink/g
464 | > words.replace(w1, 'X')
465 | "plXk XcomXg tX wXer X cautX sentient"
466 |
467 | // expected output
468 | > const w2 = /in(ter|co|g|k|t)?|ion/g
469 | > words.replace(w2, 'X')
470 | "plX XmX tX wX X cautX sentient"
471 | ```
472 |
473 | **7)** For the given greedy quantifiers, what would be the equivalent form using the `{m,n}` representation?
474 |
475 | * `?` is same as `{0,1}`
476 | * `*` is same as `{0,}`
477 | * `+` is same as `{1,}`
478 |
479 | **8)** `(a*|b*)` is same as `(a|b)*` — true or false?
480 |
481 | False. Because `(a*|b*)` will match only sequences like `a`, `aaa`, `bb`, `bbbbbbbb`. But `(a|b)*` can match mixed sequences like `ababbba` too.
482 |
483 | **9)** For the given input strings, remove everything from the first occurrence of `test` (irrespective of case) till the end of the string, provided `test` isn't at the end of the string.
484 |
485 | ```js
486 | > let s1 = 'this is a Test'
487 | > let s2 = 'always test your regexp for corner\ncases'
488 | > let s3 = 'a TEST of skill tests?'
489 |
490 | > let pat3 = /test.+/is
491 |
492 | > s1.replace(pat3, '')
493 | < 'this is a Test'
494 | > s2.replace(pat3, '')
495 | < 'always '
496 | > s3.replace(pat3, '')
497 | < 'a '
498 | ```
499 |
500 | **10)** For the input array `words`, filter all elements starting with `s` and containing `e` and `t` in any order.
501 |
502 | ```js
503 | > let words = ['sequoia', 'subtle', 'exhibit', 'a set', 'sets', 'tests', 'site']
504 |
505 | > words.filter(w => /^s.*(e.*t|t.*e)/.test(w))
506 | < ['subtle', 'sets', 'site']
507 | ```
508 |
509 | **11)** For the input array `words`, remove all elements having less than `6` characters.
510 |
511 | ```js
512 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 'tests', 'site']
513 |
514 | > words.filter(w => /.{6,}/.test(w))
515 | < ['sequoia', 'subtle', 'exhibit']
516 | ```
517 |
518 | **12)** For the input array `words`, filter all elements starting with `s` or `t` and having a maximum of `6` characters.
519 |
520 | ```js
521 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 't set', 'site']
522 |
523 | > words.filter(w => /^(s|t).{0,5}$/.test(w))
524 | < ['subtle', 'sets', 't set', 'site']
525 | ```
526 |
527 | **13)** Delete from the string `start` if it is at the beginning of a line up to the next occurrence of the string `end` at the end of a line. Match these keywords irrespective of case.
528 |
529 | ```js
530 | > let para = `good start
531 | start working on that
532 | project you always wanted
533 | to, do not let it end
534 | hi there
535 | start and end the end
536 | 42
537 | Start and try to
538 | finish the End
539 | bye`
540 |
541 | > const mpat = /^start.*?end$/igms
542 | > console.log(para.replace(mpat, ''))
543 | < good start
544 |
545 | hi there
546 |
547 | 42
548 |
549 | bye
550 | ```
551 |
552 | **14)** Can you reason out why this code results in the output shown? The aim was to remove all `` patterns but not the `<>` ones. The expected result was `'a 1<> b 2<> c'`.
553 |
554 | The use of `.+` quantifier after `<` means that `<>` cannot be a possible match to satisfy `<.+?>`. So, after matching `<` (which occurs after `1` and `2` in the given input string) the regular expression engine will look for next occurrence of `>` character to satisfy the given pattern. To solve such cases, you need to use character classes (discussed in a later chapter) to specify which particular set of characters should be matched by the `+` quantifier (instead of the `.` metacharacter).
555 |
556 | ```js
557 | > let ip = 'a 1<> b 2<> c'
558 | > ip.replace(/<.+?>/g, '')
559 | < 'a 1 2'
560 | ```
561 |
562 | **15)** Use the `split()` method to get the output as shown below for the given input strings.
563 |
564 | ```js
565 | > let s1 = 'go there :: this :: that'
566 | > let s2 = 'a::b :: c::d e::f :: 4::5'
567 | > let s3 = '42:: hi::bye::see :: carefully'
568 |
569 | > const pat4 = / +:: +(.+)/
570 |
571 | > s1.split(pat4, 2)
572 | < ['go there', 'this :: that']
573 | > s2.split(pat4, 2)
574 | < ['a::b', 'c::d e::f :: 4::5']
575 | > s3.split(pat4, 2)
576 | < ['42:: hi::bye::see', 'carefully']
577 | ```
578 |
579 |
580 |
581 | # Working with matched portions
582 |
583 | **1)** For the given strings, extract the matching portion from the first `is` to the last `t`.
584 |
585 | ```js
586 | > let str1 = 'What is the biggest fruit you have seen?'
587 | > let str2 = 'Your mission is to read and practice consistently'
588 |
589 | > const pat1 = /is.*t/
590 |
591 | > str1.match(pat1)[0]
592 | < 'is the biggest fruit'
593 | > str2.match(pat1)[0]
594 | < 'ission is to read and practice consistent'
595 | ```
596 |
597 | **2)** Find the starting index of the first occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string.
598 |
599 | ```js
600 | > let s1 = 'match after the last newline character'
601 | > let s2 = 'and then you want to test'
602 | > let s3 = 'this is good bye then'
603 | > let s4 = 'who was there to see?'
604 |
605 | > const pat2 = /is|the|was|to/
606 |
607 | > s1.search(pat2)
608 | < 12
609 | > s2.search(pat2)
610 | < 4
611 | > s3.search(pat2)
612 | < 2
613 | > s4.search(pat2)
614 | < 4
615 | ```
616 |
617 | **3)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string.
618 |
619 | ```js
620 | > let s1 = 'match after the last newline character'
621 | > let s2 = 'and then you want to test'
622 | > let s3 = 'this is good bye then'
623 | > let s4 = 'who was there to see?'
624 |
625 | > const pat3 = /.*(is|the|was|to)/d
626 |
627 | > s1.match(pat3).indices[1][0]
628 | < 12
629 | > s2.match(pat3).indices[1][0]
630 | < 18
631 | > s3.match(pat3).indices[1][0]
632 | < 17
633 | > s4.match(pat3).indices[1][0]
634 | < 14
635 | ```
636 |
637 | **4)** The given input string contains `:` exactly once. Extract all characters after the `:` as output.
638 |
639 | ```js
640 | > let ip = 'fruits:apple, mango, guava, blueberry'
641 |
642 | > ip.match(/:(.*)/)[1]
643 | < 'apple, mango, guava, blueberry'
644 | ```
645 |
646 | **5)** Extract all words between `(` and `)` from the given input string as an array (including the parentheses). Assume that the input will not contain any broken parentheses.
647 |
648 | ```js
649 | > let ip = 'another (way) to reuse (portion) matched (by) capture groups'
650 |
651 | > ip.match(/\(.*?\)/g)
652 | < ['(way)', '(portion)', '(by)']
653 | ```
654 |
655 | **6)** Extract all occurrences of `<` up to the next occurrence of `>`, provided there is at least one character in between `<` and `>`.
656 |
657 | ```js
658 | > let ip = 'a 1<> b 2<> c'
659 |
660 | > ip.match(/<.+?>/g)
661 | < ['', '<> b', '<> c']
662 | ```
663 |
664 | **7)** Use `matchAll()` to get the output as shown below for the given input strings. Note the characters used in the input strings carefully.
665 |
666 | ```js
667 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 '
668 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 '
669 |
670 | > const pat4 = /(.+?),(.+?) /g
671 |
672 | > Array.from(row1.matchAll(pat4), m => [m[1], m[2]])
673 | < (4) [Array(2), Array(2), Array(2), Array(2)]
674 | 0: (2) ['-2', '5']
675 | 1: (2) ['4', '+3']
676 | 2: (2) ['+42', '-53']
677 | 3: (2) ['4356246', '-357532354']
678 | length: 4
679 | [[Prototype]]: Array(0)
680 |
681 | > Array.from(row2.matchAll(pat4), m => [m[1], m[2]])
682 | < (3) [Array(2), Array(2), Array(2)]
683 | 0: (2) ['1.32', '-3.14']
684 | 1: (2) ['634', '5.63']
685 | 2: (2) ['63.3e3', '9907809345343.235']
686 | length: 3
687 | [[Prototype]]: Array(0)
688 | ```
689 |
690 | **8)** This is an extension to the previous question. Sum each pair of numbers that are separated by a comma.
691 |
692 | * For `row1`, find the sum of integers. For example, sum of `-2` and `5` is `3`.
693 | * For `row2`, find the sum of floating-point numbers. For example, sum of `1.32` and `-3.14` is `-1.82`.
694 |
695 | ```js
696 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 '
697 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 '
698 |
699 | // should be same as the previous question
700 | > const pat5 = /(.+?),(.+?) /g
701 |
702 | > Array.from(row1.matchAll(pat5), m => +m[1] + +m[2])
703 | < [3, 7, -11, -353176108]
704 |
705 | > Array.from(row2.matchAll(pat5), m => +m[1] + +m[2])
706 | < [-1.82, 639.63, 9907809408643.234]
707 | ```
708 |
709 | **9)** Use the `split()` method to get the output as shown below.
710 |
711 | ```js
712 | > let ip = '42:no-output;1000:car-tr:u-ck;SQEX49801'
713 |
714 | > ip.split(/:.+?-(.+?);/)
715 | < ['42', 'output', '1000', 'tr:u-ck', 'SQEX49801']
716 | ```
717 |
718 | **10)** Write a string function that changes the given input to alternate case. The first alphabet should be changed to lowercase, the next one to uppercase and then lowercase and so on. Characters other than alphabets should be left alone and not affect case changing.
719 |
720 | ```js
721 | > function aLtErNaTeCaSe(ip) {
722 | let b = true
723 | return ip.replace(/[a-z]/ig, m => (b = !b) ? m.toUpperCase() : m.toLowerCase())
724 | }
725 |
726 | > aLtErNaTeCaSe('HI THERE!')
727 | < 'hI tHeRe!'
728 | > aLtErNaTeCaSe('good morning')
729 | < 'gOoD mOrNiNg'
730 | > aLtErNaTeCaSe('Sample123string42with777numbers')
731 | < 'sAmPlE123sTrInG42wItH777nUmBeRs'
732 | ```
733 |
734 | **11)** Replace all occurrences of `par` with `spar`, `spare` with `extra` and `park` with `garden`.
735 |
736 | ```js
737 | > let s1 = 'apartment has a park'
738 | > let s2 = 'do you have a spare cable'
739 | > let s3 = 'write a parser'
740 |
741 | > let d1 = {'par': 'spar', 'spare': 'extra', 'park': 'garden'}
742 | > const pat6 = /spare|park?/g
743 |
744 | > s1.replace(pat6, k => d1[k])
745 | < 'aspartment has a garden'
746 | > s2.replace(pat6, k => d1[k])
747 | < 'do you have a extra cable'
748 | > s3.replace(pat6, k => d1[k])
749 | < 'write a sparser'
750 | ```
751 |
752 | **12)** Name the flag and property you can use with the `match()` method to get both the starting and ending locations of the matched portions.
753 |
754 | The `d` flag and `indices` property can be used to get both the starting and ending locations of the matched portions. Here's an example:
755 |
756 | ```js
757 | > 'coffee:100g tea:250g'.match(/:(.*?)g/d)
758 | < [':100g', '100', index: 6, input: 'coffee:100g tea:250g',
759 | groups: undefined, indices: Array(2)]
760 |
761 | // locations for the entire match
762 | > 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[0]
763 | < [6, 11]
764 |
765 | // locations for the first capture group
766 | > 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[1]
767 | < [7, 10]
768 | ```
769 |
770 |
771 |
772 | # Character class
773 |
774 | **1)** For the array `items`, filter all elements starting with `hand` and ending with `s` or `y` or `le`. No other character in between, for example, `hands` should match but not `hand-has`.
775 |
776 | ```js
777 | > let items = ['-handy', 'hand', 'handy', 'unhand', 'hands', 'hand-icy', 'handle']
778 |
779 | > items.filter(w => /^hand([sy]|le)$/.test(w))
780 | < ['handy', 'hands', 'handle']
781 | ```
782 |
783 | **2)** Replace all whole words `reed` or `read` or `red` with `X`.
784 |
785 | ```js
786 | > let ip = 'redo red credible :read: rod reed bred'
787 |
788 | > ip.replace(/\bre[ae]?d\b/g, 'X')
789 | < 'redo X credible :X: rod X bred'
790 | ```
791 |
792 | **3)** For the array `words`, filter all elements containing `e` or `i` followed by `l` or `n`. Note that the order mentioned should be followed.
793 |
794 | ```js
795 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest']
796 |
797 | > words.filter(w => /[ei].*[ln]/.test(w))
798 | < ['surrender', 'unicorn', 'eel']
799 | ```
800 |
801 | **4)** For the array `words`, filter all elements containing `e` or `i` and `l` or `n` in any order.
802 |
803 | ```js
804 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest']
805 |
806 | > words.filter(w => /[ei].*[ln]|[ln].*[ei]/.test(w))
807 | < ['surrender', 'unicorn', 'newer', 'eel']
808 | ```
809 |
810 | **5)** Extract all hex character sequences, with `0x` optional prefix. Match the characters case insensitively, and the sequences shouldn't be surrounded by other word characters.
811 |
812 | ```js
813 | > let str1 = '128A foo 0xfe32 34 0xbar'
814 | > let str2 = '0XDEADBEEF place 0x0ff1ce bad'
815 |
816 | > const hex_seq = /\b(0x)?[\da-f]+\b/ig
817 |
818 | > str1.match(hex_seq)
819 | < ['128A', '0xfe32', '34']
820 | > str2.match(hex_seq)
821 | < ['0XDEADBEEF', '0x0ff1ce', 'bad']
822 | ```
823 |
824 | **6)** Delete from `(` to the next occurrence of `)` unless they contain parentheses characters in between.
825 |
826 | ```js
827 | > let str1 = 'def factorial()'
828 | > let str2 = 'a/b(division) + c%d(#modulo) - (e+(j/k-3)*4)'
829 | > let str3 = 'Hi there(greeting). Nice day(a(b)'
830 |
831 | > const remove_parentheses = /\([^()]*\)/g
832 |
833 | > str1.replace(remove_parentheses, '')
834 | < 'def factorial'
835 | > str2.replace(remove_parentheses, '')
836 | < 'a/b + c%d - (e+*4)'
837 | > str3.replace(remove_parentheses, '')
838 | < 'Hi there. Nice day(a'
839 | ```
840 |
841 | **7)** For the array `words`, filter all elements not starting with `e` or `p` or `u`.
842 |
843 | ```js
844 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', '(pest)']
845 |
846 | > words.filter(w => /^[^epu]/.test(w))
847 | < ['surrender', 'newer', 'door', '(pest)']
848 | ```
849 |
850 | **8)** For the array `words`, filter all elements not containing `u` or `w` or `ee` or `-`.
851 |
852 | ```js
853 | > let words = ['p-t', 'you', 'tea', 'heel', 'owe', 'new', 'reed', 'ear']
854 |
855 | > words.filter(w => !/[uw-]|ee/.test(w))
856 | < ['tea', 'ear']
857 | ```
858 |
859 | **9)** The given input strings contain fields separated by `,` and fields can be empty too. Replace the last three fields with `WHTSZ323`.
860 |
861 | ```js
862 | > let row1 = '(2),kite,12,,D,C,,'
863 | > let row2 = 'hi,bye,sun,moon'
864 |
865 | > const pat1 = /(,[^,]*){3}$/g
866 |
867 | > row1.replace(pat1, ',WHTSZ323')
868 | < '(2),kite,12,,D,WHTSZ323'
869 | > row2.replace(pat1, ',WHTSZ323')
870 | < 'hi,WHTSZ323'
871 | ```
872 |
873 | **10)** Split the given strings based on consecutive sequence of digit or whitespace characters.
874 |
875 | ```js
876 | > let s1 = 'lion \t Ink32onion Nice'
877 | > let s2 = '**1\f2\n3star\t7 77\r**'
878 |
879 | > const pat2 = /[\d\s]+/
880 |
881 | > s1.split(pat2)
882 | < ['lion', 'Ink', 'onion', 'Nice']
883 | > s2.split(pat2)
884 | < ['**', 'star', '**']
885 | ```
886 |
887 | **11)** Delete all occurrences of the sequence `` where `characters` is one or more non `>` characters and cannot be empty.
888 |
889 | ```js
890 | > let ip = 'a 1<> b 2<> c'
891 |
892 | > ip.replace(/<[^>]+>/g, '')
893 | < 'a 1<> b 2<> c'
894 | ```
895 |
896 | **12)** `\b[a-z](on|no)[a-z]\b` is same as `\b[a-z][on]{2}[a-z]\b`. True or False? Sample input lines shown below might help to understand the differences, if any.
897 |
898 | False. `[on]{2}` will also match `oo` and `nn`.
899 |
900 | ```js
901 | > console.log('known\nmood\nknow\npony\ninns')
902 | known
903 | mood
904 | know
905 | pony
906 | inns
907 | ```
908 |
909 | **13)** For the given array, filter elements containing any number sequence greater than `624`.
910 |
911 | ```js
912 | > let items = ['hi0000432abcd', 'car00625', '42_624 0512', '3.14 96 2 foo1234baz']
913 |
914 | > items.filter(e => e.match(/\d+/g).some(m => m > 624))
915 | < ['car00625', '3.14 96 2 foo1234baz']
916 | ```
917 |
918 | **14)** Convert the given input string to two different arrays as shown below.
919 |
920 | ```js
921 | > let ip = 'price_42 roast^\t\n^-ice==cat\neast'
922 |
923 | > ip.split(/\W+/)
924 | < ['price_42', 'roast', 'ice', 'cat', 'east']
925 |
926 | > ip.split(/(\W+)/)
927 | < ['price_42', ' ', 'roast', '^\t\n^-', 'ice', '==', 'cat', '\n', 'east']
928 | ```
929 |
930 | **15)** Filter all elements whose first non-whitespace character is not a `#` character. Any element made up of only whitespace characters should be ignored as well.
931 |
932 | ```js
933 | > let items = [' #comment', '\t\napple #42', '#oops', 'sure', 'no#1', '\t\r\f']
934 |
935 | > items.filter(e => /^\s*[^#\s]/.test(e))
936 | < ['\t\napple #42', 'sure', 'no#1']
937 | ```
938 |
939 | **16)** For the given string, surround all whole words with `{}` except `par` and `cat`.
940 |
941 | ```js
942 | > let ip = 'part; cat {super} rest_42 par scatter'
943 |
944 | > ip.replace(/\w+/g, w => /\b(par|cat)\b/.test(w) ? w : `{${w}}`)
945 | < '{part}; cat {{super}} {rest_42} par {scatter}'
946 | ```
947 |
948 |
949 |
950 | # Groupings and backreferences
951 |
952 | **1)** Replace the space character that occurs after a word ending with `a` or `r` with a newline character.
953 |
954 | ```js
955 | > let ip = 'area not a _a2_ roar took 22'
956 |
957 | > console.log(ip.replace(/([ar]) /g, '$1\n'))
958 | area
959 | not a
960 | _a2_ roar
961 | took 22
962 | ```
963 |
964 | **2)** Add `[]` around words starting with `s` and containing `e` and `t` in any order.
965 |
966 | ```js
967 | > let ip = 'sequoia subtle exhibit asset sets2 tests si_te'
968 |
969 | > ip.replace(/\bs\w*(t\w*e|e\w*t)\w*/g, '[$&]')
970 | < 'sequoia [subtle] exhibit asset [sets2] tests [si_te]'
971 | ```
972 |
973 | **3)** Replace all whole words with `X` that start and end with the same word character (irrespective of case). Single character word should get replaced with `X` too, as it satisfies the stated condition.
974 |
975 | ```js
976 | > let ip = 'oreo not a _a2_ Roar took 22'
977 |
978 | // can also use: ip.replace(/\b(\w|(\w)\w*\2)\b/ig, 'X')
979 | > ip.replace(/\b(\w)(\w*\1)?\b/ig, 'X')
980 | < 'X not X X X took X'
981 | ```
982 |
983 | **4)** Convert the given *markdown* headers to corresponding *anchor* tags. Consider the input to start with one or more `#` characters followed by space and word characters. The `name` attribute is constructed by converting the header to lowercase and replacing spaces with hyphens. Can you do it without using a capture group?
984 |
985 | ```js
986 | > let header1 = '# Regular Expressions'
987 | > let header2 = '## Named capture groups'
988 |
989 | > function hyphenify(m) {
990 | return `${m}`
991 | }
992 |
993 | > header1.replace(/\w.*/, hyphenify)
994 | < "# Regular Expressions"
995 | > header2.replace(/\w.*/, hyphenify)
996 | < "## Named capture groups"
997 | ```
998 |
999 | **5)** Convert the given *markdown* anchors to corresponding *hyperlinks*.
1000 |
1001 | ```js
1002 | > let anchor1 = "# Regular Expressions"
1003 | > let anchor2 = "## Subexpression calls"
1004 |
1005 | > const hyperlink = /[^']+'([^']+)'><\/a>(.+)/
1006 |
1007 | > anchor1.replace(hyperlink, '[$2](#$1)')
1008 | < '[Regular Expressions](#regular-expressions)'
1009 | > anchor2.replace(hyperlink, '[$2](#$1)')
1010 | < '[Subexpression calls](#subexpression-calls)'
1011 | ```
1012 |
1013 | **6)** Check if the given input strings have words with at least two consecutive repeated alphabets irrespective of case. For example, words like `stillnesS` and `Committee` should return `true` but words like `root` or `readable` or `rotational` should return `false`. Consider word to be as defined in regular expression parlance.
1014 |
1015 | ```js
1016 | > let s1 = 'readable COMMItTEe'
1017 | > let s2 = 'rotational sti1lness _foot_'
1018 | > let s3 = 'needed repeated'
1019 | > let s4 = 'offsh00t'
1020 |
1021 | > const pat1 = /(?:(\w)\1\w*){2}/i
1022 |
1023 | > pat1.test(s1)
1024 | true
1025 | > pat1.test(s2)
1026 | false
1027 | > pat1.test(s3)
1028 | false
1029 | > pat1.test(s4)
1030 | true
1031 | ```
1032 |
1033 | **7)** For the given input string, replace all occurrences of digit sequences with only the unique non-repeating sequence. For example, `232323` should be changed to `23` and `897897` should be changed to `897`. If there are no repeats (for example `1234`) or if the repeats end prematurely (for example `12121`), it should not be changed.
1034 |
1035 | ```js
1036 | > let ip = '1234 2323 453545354535 9339 11 60260260'
1037 |
1038 | > ip.replace(/\b(\d+)\1+\b/g, '$1')
1039 | < '1234 23 4535 9339 1 60260260'
1040 | ```
1041 |
1042 | **8)** Replace sequences made up of words separated by `:` or `.` by the first word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character.
1043 |
1044 | ```js
1045 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.'
1046 |
1047 | > ip.replace(/([:.]\w*)+/g, '')
1048 | < 'wow hi-2 bye kite'
1049 | ```
1050 |
1051 | **9)** Replace sequences made up of words separated by `:` or `.` by the last word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character.
1052 |
1053 | ```js
1054 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.'
1055 |
1056 | > ip.replace(/((\w+)[:.])+/g, '$2')
1057 | < 'five hi-2 bye water'
1058 | ```
1059 |
1060 | **10)** Split the given input string on one or more repeated sequence of `cat`.
1061 |
1062 | ```js
1063 | > let ip = 'firecatlioncatcatcatbearcatcatparrot'
1064 |
1065 | > ip.split(/(?:cat)+/)
1066 | < ['fire', 'lion', 'bear', 'parrot']
1067 | ```
1068 |
1069 | **11)** For the given input string, find all occurrences of digit sequences with at least one repeating sequence. For example, `232323` and `897897`. If the repeats end prematurely, for example `12121`, it should not be matched.
1070 |
1071 | ```js
1072 | > let ip = '1234 2323 453545354535 9339 11 60260260'
1073 |
1074 | > const pat2 = /\b(\d+)\1+\b/g
1075 |
1076 | // entire sequences in the output
1077 | > ip.match(pat2)
1078 | < ['2323', '453545354535', '11']
1079 |
1080 | // only the unique sequence in the output
1081 | > Array.from(ip.matchAll(pat2), m => m[1])
1082 | < ['23', '4535', '1']
1083 | ```
1084 |
1085 | **12)** Convert the comma separated strings to corresponding key-value pair mapping as shown below. The keys are `name`, `maths` and `phy` for the three fields in the input strings.
1086 |
1087 | ```js
1088 | > let row1 = 'rohan,75,89'
1089 | > let row2 = 'rose,88,92'
1090 |
1091 | > const pat3 = /(?[^,]+),(?[^,]+),(?[^,]+)/
1092 |
1093 | > row1.match(pat3).groups
1094 | < {name: 'rohan', maths: '75', phy: '89'}
1095 |
1096 | > row2.match(pat3).groups
1097 | < {name: 'rose', maths: '88', phy: '92'}
1098 | ```
1099 |
1100 | **13)** Surround all whole words with `()`. Additionally, if the whole word is `imp` or `ant`, delete them. Can you do it with just a single substitution?
1101 |
1102 | ```js
1103 | > let ip = 'tiger imp goat eagle ant important'
1104 |
1105 | > ip.replace(/\b(?:imp|ant|(\w+))\b/g, '($1)')
1106 | < '(tiger) () (goat) (eagle) () (important)'
1107 | ```
1108 |
1109 |
1110 |
1111 | # Lookarounds
1112 |
1113 | > Use lookarounds for solving the following exercises even if they are not required.
1114 |
1115 | **1)** Replace all whole words with `X` unless it is preceded by a `(` character.
1116 |
1117 | ```js
1118 | > let ip = '(apple) guava berry) apple (mango) (grape'
1119 |
1120 | > ip.replace(/(? let ip = '(apple) guava berry) apple (mango) (grape'
1128 |
1129 | > ip.replace(/\w+\b(?!\))/g, 'X')
1130 | < '(apple) X berry) X (mango) (X'
1131 | ```
1132 |
1133 | **3)** Replace all whole words with `X` unless it is preceded by `(` or followed by `)` characters.
1134 |
1135 | ```js
1136 | > let ip = '(apple) guava berry) apple (mango) (grape'
1137 |
1138 | > ip.replace(/(? let ip = 'a_t row on Urn e note Dust n end a2-e|u'
1146 |
1147 | > ip.match(/\b\w+\b(? let ip = 'a_t row on Urn e note Dust n end a2-e|u'
1155 |
1156 | > ip.match(/(?![adn])\b\w+/g)
1157 | < ['row', 'on', 'Urn', 'e', 'Dust', 'end', 'e', 'u']
1158 | ```
1159 |
1160 | **6)** Extract all whole words only if they are followed by `:` or `,` or `-`.
1161 |
1162 | ```js
1163 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1164 |
1165 | > ip.match(/\w+(?=[:,-])/g)
1166 | < ['Poke', 'so_good', 'ever2']
1167 | ```
1168 |
1169 | **7)** Extract all whole words only if they are preceded by `=` or `/` or `-`.
1170 |
1171 | ```js
1172 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1173 |
1174 | > ip.match(/(?<=[=\/-])\w+/g)
1175 | < ['so_good', 'is', 'sit']
1176 | ```
1177 |
1178 | **8)** Extract all whole words only if they are preceded by `=` or `:` and followed by `:` or `.`.
1179 |
1180 | ```js
1181 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1182 |
1183 | > ip.match(/(?<=[=:])\w+(?=[:.])/g)
1184 | < ['so_good', 'ink']
1185 | ```
1186 |
1187 | **9)** Extract all whole words only if they are preceded by `=` or `:` or `.` or `(` or `-` and not followed by `.` or `/`.
1188 |
1189 | ```js
1190 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1191 |
1192 | > ip.match(/(?<=[=:.(-])\w+\b(?![\/.])/g)
1193 | < ['so_good', 'vast', 'sit']
1194 | ```
1195 |
1196 | **10)** Remove the leading and trailing whitespaces from all the individual fields where `,` is the field separator.
1197 |
1198 | ```js
1199 | > let csv1 = ' comma ,separated ,values \t\r '
1200 | > let csv2 = 'good bad,nice ice , 42 , , stall small'
1201 |
1202 | > const trim_whitespace = /(?<=^|,)\s+|\s+(?=,|$)/g
1203 |
1204 | > csv1.replace(trim_whitespace, '')
1205 | < 'comma,separated,values'
1206 | > csv2.replace(trim_whitespace, '')
1207 | < 'good bad,nice ice,42,,stall small'
1208 | ```
1209 |
1210 | **11)** Filter elements that satisfy all of these rules:
1211 |
1212 | * should have at least two alphabets
1213 | * should have at least three digits
1214 | * should have at least one special character among `%` or `*` or `#` or `$`
1215 | * should not end with a whitespace character
1216 |
1217 | ```js
1218 | > let pwds = ['hunter2', 'F2h3u%9', '*X3Yz3.14\t', 'r2_d2_42', 'A $B C1234']
1219 |
1220 | > pwds.filter(p => /(?!.*\s$)(?=(.*[a-z]){2})(?=(.*\d){3}).*[%*#$]/i.test(p))
1221 | < ['F2h3u%9', 'A $B C1234']
1222 | ```
1223 |
1224 | **12)** For the given string, surround all whole words with `{}` except for whole words `par` and `cat` and `apple`.
1225 |
1226 | ```js
1227 | > let ip = 'part; cat {super} rest_42 par scatter apple spar'
1228 |
1229 | > ip.replace(/\b(?!(?:par|cat|apple)\b)\w+/g, '{$&}')
1230 | < '{part}; cat {{super}} {rest_42} par {scatter} apple {spar}'
1231 | ```
1232 |
1233 | **13)** Extract the integer portion of floating-point numbers for the given string. A number ending with `.` and no further digits should not be considered.
1234 |
1235 | ```js
1236 | > let ip = '12 ab32.4 go 5 2. 46.42 5'
1237 |
1238 | > ip.match(/\d+(?=\.\d)/g)
1239 | < ['32', '46']
1240 | ```
1241 |
1242 | **14)** For the given input strings, extract all overlapping two character sequences.
1243 |
1244 | ```js
1245 | > let s1 = 'apple'
1246 | > let s2 = '1.2-3:4'
1247 |
1248 | > const pat1 = /.(?=(.))/g
1249 |
1250 | > Array.from(s1.matchAll(pat1), m => m[0] + m[1])
1251 | < ['ap', 'pp', 'pl', 'le']
1252 | > Array.from(s2.matchAll(pat1), m => m[0] + m[1])
1253 | < ['1.', '.2', '2-', '-3', '3:', ':4']
1254 | ```
1255 |
1256 | **15)** The given input strings contain fields separated by the `:` character. Delete `:` and the last field if there is a digit character anywhere before the last field.
1257 |
1258 | ```js
1259 | > let s1 = '42:cat'
1260 | > let s2 = 'twelve:a2b'
1261 | > let s3 = 'we:be:he:0:a:b:bother'
1262 | > let s4 = 'apple:banana-42:cherry:'
1263 | > let s5 = 'dragon:unicorn:centaur'
1264 |
1265 | > const pat2 = /(?<=\d.*):[^:]*$/
1266 |
1267 | > s1.replace(pat2, '')
1268 | < '42'
1269 | > s2.replace(pat2, '')
1270 | < 'twelve:a2b'
1271 | > s3.replace(pat2, '')
1272 | < 'we:be:he:0:a:b'
1273 | > s4.replace(pat2, '')
1274 | < 'apple:banana-42:cherry'
1275 | > s5.replace(pat2, '')
1276 | < 'dragon:unicorn:centaur'
1277 | ```
1278 |
1279 | **16)** Extract all whole words unless they are preceded by `:` or `<=>` or `----` or `#`.
1280 |
1281 | ```js
1282 | > let ip = '::very--at<=>row|in.a_b#b2c=>lion----east'
1283 |
1284 | > ip.match(/(?|-{4})\b\w+/g)
1285 | < ['at', 'in', 'a_b', 'lion']
1286 | ```
1287 |
1288 | **17)** Match strings if it contains `qty` followed by `price` but not if there is any whitespace character or the string `error` between them.
1289 |
1290 | ```js
1291 | > let str1 = '23,qty,price,42'
1292 | > let str2 = 'qty price,oh'
1293 | > let str3 = '3.14,qty,6,errors,9,price,3'
1294 | > let str4 = '42\nqty-6,apple-56,price-234,error'
1295 | > let str5 = '4,price,3.14,qty,4'
1296 | > let str6 = '(qtyprice) (hi-there)'
1297 |
1298 | > const neg = /qty((?!\s|error).)*price/
1299 |
1300 | > neg.test(str1)
1301 | < true
1302 | > neg.test(str2)
1303 | < false
1304 | > neg.test(str3)
1305 | < false
1306 | > neg.test(str4)
1307 | < true
1308 | > neg.test(str5)
1309 | < false
1310 | > neg.test(str6)
1311 | < true
1312 | ```
1313 |
1314 | **18)** Can you reason out why the following regular expressions behave differently?
1315 |
1316 | `\b` matches both the start and end of word locations. In the below example, `\b..\b` doesn't necessarily mean that the first `\b` will match only the start of word location and the second `\b` will match only the end of word location. They can be any combination! For example, `I` followed by space in the input string here is using the start of word location for both the conditions. Similarly, space followed by `2` is using the end of word location for both the conditions.
1317 |
1318 | In contrast, the negative lookarounds version ensures that there are no word characters around any two characters. Also, such assertions will always be satisfied at the start of string and the end of string respectively. But `\b` depends on the presence of word characters. For example, `!` at the end of the input string here matches the lookaround assertion but not word boundary.
1319 |
1320 | ```js
1321 | > let ip = 'I have 12, he has 2!'
1322 |
1323 | > ip.replace(/\b..\b/g, '{$&}')
1324 | < '{I }have {12}{, }{he} has{ 2}!'
1325 |
1326 | > ip.replace(/(? let w2 = 'Sample123string42with777numbers'
1334 |
1335 | > w2.split(/(? w2.split(/(\d+)(?!.*\d)/)
1339 | < ['Sample123string42with', '777', 'numbers']
1340 | ```
1341 |
1342 | **20)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings using the `search()` method. Assume that there will be at least one match for each input string.
1343 |
1344 | ```js
1345 | > let s1 = 'match after the last newline character'
1346 | > let s2 = 'and then you want to test'
1347 | > let s3 = 'this is good bye then'
1348 | > let s4 = 'who was there to see?'
1349 |
1350 | > const pat3 = /(is|the|was|to)(?!.*(is|the|was|to))/
1351 |
1352 | > s1.search(pat3)
1353 | < 12
1354 | > s2.search(pat3)
1355 | < 18
1356 | > s3.search(pat3)
1357 | < 17
1358 | > s4.search(pat3)
1359 | < 14
1360 | ```
1361 |
1362 |
1363 |
1364 | # Unicode
1365 |
1366 | **1)** Check if the given input strings are made up of ASCII characters only. Consider the input to be non-empty strings and any character that isn't part of the 7-bit ASCII set should result in `false`.
1367 |
1368 | ```js
1369 | > let str1 = '123 × 456'
1370 | > let str2 = 'good fοοd'
1371 | > let str3 = 'happy learning!'
1372 |
1373 | // can also use: const pat1 = /^[\x00-\x7f]+$/
1374 | > const pat1 = /^\p{ASCII}+$/u
1375 |
1376 | > pat1.test(str1)
1377 | < false
1378 | > pat1.test(str2)
1379 | < false
1380 | > pat1.test(str3)
1381 | < true
1382 | ```
1383 |
1384 | **2)** Retain only the punctuation characters for the given string.
1385 |
1386 | ```js
1387 | > let ip = '❨a❩❪1❫❬b❭❮2❯❰c❱❲3❳❴xyz❵⟅123⟆⟦⟧⟨like⟩⟪3.14⟫'
1388 |
1389 | > ip.replace(/\P{P}+/gu, '')
1390 | < '❨❩❪❫❬❭❮❯❰❱❲❳❴❵⟅⟆⟦⟧⟨⟩⟪.⟫'
1391 | ```
1392 |
1393 | **3)** Is the following code snippet showing the correct output?
1394 |
1395 | Yes. Some regular expression engines allow escape sequences like `\d`, `\b`, `\s`, `\w`, etc to be Unicode aware, but not JavaScript.
1396 |
1397 | ```js
1398 | > 'fox:αλεπού'.match(/\w+/g)
1399 | < ['fox']
1400 | ```
1401 |
1402 | **4)** Name the set operations enabled by the `v` flag.
1403 |
1404 | The following set operations are enabled by the `v` flag inside character classes:
1405 |
1406 | * `&&` intersection
1407 | * `--` difference
1408 |
1409 | To aid in such definitions, you can use `[]` in nested fashion.
1410 |
1411 | **5)** Extract all whole words from the given strings. However, do not match words if they contain any character present in the `ignore` variable.
1412 |
1413 | ```js
1414 | > let s1 = 'match after the last new_line character A2'
1415 | > let s2 = 'and then you want to test'
1416 |
1417 | > let ignore = 'aty'
1418 | > const ign1 = new RegExp(`\\b[\\w--[${ignore}]]+\\b`, 'gv')
1419 | > ign1
1420 | < /\b[\w--[aty]]+\b/gv
1421 | > s1.match(ign1)
1422 | < ['new_line', 'A2']
1423 | > s2.match(ign1)
1424 | < null
1425 |
1426 | > let ignore = 'esw'
1427 | // should be the same solution used above
1428 | > const ign2 = new RegExp(`\\b[\\w--[${ignore}]]+\\b`, 'gv')
1429 | > ign2
1430 | < /\b[\w--[esw]]+\b/gv
1431 | > s1.match(ign2)
1432 | < ['match', 'A2']
1433 | > s2.match(ign2)
1434 | < ['and', 'you', 'to']
1435 | ```
1436 |
1437 |
--------------------------------------------------------------------------------
/Exercises.md:
--------------------------------------------------------------------------------
1 | # Exercises
2 |
3 | > Try to solve the exercises in every chapter using only the features discussed until that chapter. Some of the exercises will be easier to solve with techniques presented in the later chapters, but the aim of these exercises is to explore the features presented so far.
4 |
5 | > For solutions, see [Exercise_solutions.md](https://github.com/learnbyexample/learn_js_regexp/blob/master/Exercise_solutions.md).
6 |
7 |
8 |
9 | # RegExp introduction
10 |
11 | **1)** Check if the given input strings contain `two` irrespective of case.
12 |
13 | ```js
14 | > let s1 = 'Their artwork is exceptional'
15 | > let s2 = 'one plus tw0 is not three'
16 | > let s3 = 'TRUSTWORTHY'
17 |
18 | > const pat1 = // add your solution here
19 |
20 | > pat1.test(s1)
21 | < true
22 | > pat1.test(s2)
23 | < false
24 | > pat1.test(s3)
25 | < true
26 | ```
27 |
28 | **2)** For the given array, filter all elements that do *not* contain `e`.
29 |
30 | ```js
31 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner']
32 |
33 | > items.filter(w => test(w)) // add your solution here
34 | < ['goal', 'sit']
35 | ```
36 |
37 | **3)** Replace only the first occurrence of `5` with `five` for the given string.
38 |
39 | ```js
40 | > let ip = 'They ate 5 apples and 5 oranges'
41 |
42 | > ip.replace() // add your solution here
43 | < 'They ate five apples and 5 oranges'
44 | ```
45 |
46 | **4)** Replace all occurrences of `5` with `five` for the given string.
47 |
48 | ```js
49 | > let ip = 'They ate 5 apples and 5 oranges'
50 |
51 | > ip.replace() // add your solution here
52 | < 'They ate five apples and five oranges'
53 | ```
54 |
55 | **5)** Replace all occurrences of `note` irrespective of case with `X`.
56 |
57 | ```js
58 | > let ip = 'This note should not be NoTeD'
59 |
60 | > ip.replace() // add your solution here
61 | < 'This X should not be XD'
62 | ```
63 |
64 | **6)** For the given multiline input string, filter all lines NOT containing the string `2`.
65 |
66 | ```js
67 | > let purchases = `items qty
68 | apple 24
69 | mango 50
70 | guava 42
71 | onion 31
72 | water 10`
73 |
74 | > const num = // add your solution here
75 |
76 | > console.log(purchases.split('\n')
77 | .filter(e => test(e)) // add your solution here
78 | .join('\n'))
79 | < items qty
80 | mango 50
81 | onion 31
82 | water 10
83 | ```
84 |
85 | > You'd be able to solve this using just the `replace()` method by the end of the [Dot metacharacter and Quantifiers](#dot-metacharacter-and-quantifiers) chapter.
86 |
87 | **7)** For the given array, filter all elements that contain either `a` or `w`.
88 |
89 | ```js
90 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner']
91 |
92 | > items.filter(w => test(w) || test(w)) // add your solution here
93 | < ['goal', 'new', 'eat']
94 | ```
95 |
96 | **8)** For the given array, filter all elements that contain both `e` and `n`.
97 |
98 | ```js
99 | > let items = ['goal', 'new', 'user', 'sit', 'eat', 'dinner']
100 |
101 | > items.filter(w => test(w) && test(w)) // add your solution here
102 | < ['new', 'dinner']
103 | ```
104 |
105 | **9)** For the given string, replace `0xA0` with `0x7F` and `0xC0` with `0x1F`.
106 |
107 | ```js
108 | > let ip = 'start address: 0xA0, func1 address: 0xC0'
109 |
110 | > ip.replace() // add your solution here
111 | < 'start address: 0x7F, func1 address: 0x1F'
112 | ```
113 |
114 |
115 |
116 | # Anchors
117 |
118 | **1)** Check if the given input strings contain `is` or `the` as whole words.
119 |
120 | ```js
121 | > let str1 = 'is; (this)'
122 | > let str2 = "The food isn't good"
123 | > let str3 = 'the2 cats'
124 | > let str4 = 'switch on the light'
125 |
126 | > const pat1 = // add your solution here
127 | > const pat2 = // add your solution here
128 |
129 | > pat1.test(str1) || pat2.test(str1)
130 | < true
131 | > pat1.test(str2) || pat2.test(str2)
132 | < false
133 | > pat1.test(str3) || pat2.test(str3)
134 | < false
135 | > pat1.test(str4) || pat2.test(str4)
136 | < true
137 | ```
138 |
139 | **2)** For the given input string, change only the whole word `red` to `brown`.
140 |
141 | ```js
142 | > let ip = 'bred red spread credible red;'
143 |
144 | > ip.replace() // add your solution here
145 | < 'bred brown spread credible brown;'
146 | ```
147 |
148 | **3)** For the given array, filter all elements that contain `42` surrounded by word characters.
149 |
150 | ```js
151 | > let items = ['hi42bye', 'nice1423', 'bad42', 'cool_42a', 'fake4b']
152 |
153 | > items.filter(e => test(e)) // add your solution here
154 | < ['hi42bye', 'nice1423', 'cool_42a']
155 | ```
156 |
157 | **4)** For the given input array, filter all elements that start with `den` or end with `ly`.
158 |
159 | ```js
160 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent']
161 |
162 | > items.filter(e => test(e) || test(e)) // add your solution here
163 | < ['lovely', '2 lonely', 'dent']
164 | ```
165 |
166 | **5)** For the given input string, change whole word `mall` to `1234` only if it is at the start of a line.
167 |
168 | ```js
169 | > let para = `(mall) call ball pall
170 | ball fall wall tall
171 | mall call ball pall
172 | wall mall ball fall
173 | mallet wallet malls
174 | mall:call:ball:pall`
175 |
176 | > console.log(para.replace()) // add your solution here
177 | < (mall) call ball pall
178 | ball fall wall tall
179 | 1234 call ball pall
180 | wall mall ball fall
181 | mallet wallet malls
182 | 1234:call:ball:pall
183 | ```
184 |
185 | **6)** For the given array, filter all elements having a line starting with `den` or ending with `ly`.
186 |
187 | ```js
188 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent']
189 |
190 | > items.filter(e => test(e) || test(e)) // add your solution here
191 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent']
192 | ```
193 |
194 | **7)** For the given input array, filter all whole elements `12\nthree` irrespective of case.
195 |
196 | ```js
197 | > let items = ['12\nthree\n', '12\nThree', '12\nthree\n4', '12\nthree']
198 |
199 | > items.filter(e => test(e)) // add your solution here
200 | < ['12\nThree', '12\nthree']
201 | ```
202 |
203 | **8)** For the given input array, replace `hand` with `X` for all elements that start with `hand` followed by at least one word character.
204 |
205 | ```js
206 | > let items = ['handed', 'hand', 'handy', 'un-handed', 'handle', 'hand-2']
207 |
208 | > items.map(w => w.replace()) // add your solution here
209 | < ['Xed', 'hand', 'Xy', 'un-handed', 'Xle', 'hand-2']
210 | ```
211 |
212 | **9)** For the given input array, filter all elements starting with `h`. Additionally, replace `e` with `X` for these filtered elements.
213 |
214 | ```js
215 | > let items = ['handed', 'hand', 'handy', 'unhanded', 'handle', 'hand-2']
216 |
217 | > items.filter(w => test(w)).map(w => w.replace()) // add your solution here
218 | < ['handXd', 'hand', 'handy', 'handlX', 'hand-2']
219 | ```
220 |
221 | **10)** Why does the following code show `false` instead of `true`?
222 |
223 | ```js
224 | > /end$/.test('bend it\nand send\n')
225 | < false
226 | ```
227 |
228 |
229 |
230 | # Alternation and Grouping
231 |
232 | **1)** For the given input array, filter all elements that start with `den` or end with `ly`.
233 |
234 | ```js
235 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\n', 'dent']
236 |
237 | > items.filter() // add your solution here
238 | < ['lovely', '2 lonely', 'dent']
239 | ```
240 |
241 | **2)** For the given array, filter all elements having a line starting with `den` or ending with `ly`.
242 |
243 | ```js
244 | > let items = ['lovely', '1\ndentist', '2 lonely', 'eden', 'fly\nfar', 'dent']
245 |
246 | > items.filter() // add your solution here
247 | < ['lovely', '1\ndentist', '2 lonely', 'fly\nfar', 'dent']
248 | ```
249 |
250 | **3)** For the given input strings, replace all occurrences of `removed` or `reed` or `received` or `refused` with `X`.
251 |
252 | ```js
253 | > let s1 = 'creed refuse removed read'
254 | > let s2 = 'refused reed redo received'
255 |
256 | > const pat1 = // add your solution here
257 |
258 | > s1.replace(pat1, 'X')
259 | < 'cX refuse X read'
260 | > s2.replace(pat1, 'X')
261 | < 'X X redo X'
262 | ```
263 |
264 | **4)** For the given input strings, replace `late` or `later` or `slated` with `A`.
265 |
266 | ```js
267 | > let str1 = 'plate full of slate'
268 | > let str2 = "slated for later, don't be late"
269 |
270 | > const pat2 = // add your solution here
271 |
272 | > str1.replace(pat2, 'A')
273 | < 'pA full of sA'
274 | > str2.replace(pat2, 'A')
275 | < "A for A, don't be A"
276 | ```
277 |
278 |
279 |
280 | # Escaping metacharacters
281 |
282 | **1)** Transform the given input strings to the expected output using the same logic on both strings.
283 |
284 | ```js
285 | > let str1 = '(9-2)*5+qty/3-(9-2)*7'
286 | > let str2 = '(qty+4)/2-(9-2)*5+pq/4'
287 |
288 | > const pat1 = // add your solution here
289 | > str1.replace() // add your solution here
290 | < '35+qty/3-(9-2)*7'
291 | > str2.replace() // add your solution here
292 | < '(qty+4)/2-35+pq/4'
293 | ```
294 |
295 | **2)** Replace `(4)\|` with `2` only at the start or end of the given input strings.
296 |
297 | ```js
298 | > let s1 = '2.3/(4)\\|6 fig 5.3-(4)\\|'
299 | > let s2 = '(4)\\|42 - (4)\\|3'
300 | > let s3 = 'two - (4)\\|\n'
301 |
302 | > const pat2 = // add your solution here
303 |
304 | > s1.replace() // add your solution here
305 | < '2.3/(4)\\|6 fig 5.3-2'
306 | > s2.replace() // add your solution here
307 | < '242 - (4)\\|3'
308 | > s3.replace() // add your solution here
309 | < 'two - (4)\\|\n'
310 | ```
311 |
312 | **3)** Replace any matching element from the array `items` with `X` for given the input strings. Match the elements from `items` literally. Assume no two elements of `items` will result in any matching conflict.
313 |
314 | ```js
315 | > let items = ['a.b', '3+n', 'x\\y\\z', 'qty||price', '{n}']
316 |
317 | // add your solution here
318 | > const pat3 = // add your solution here
319 |
320 | > '0a.bcd'.replace(pat3, 'X')
321 | < '0Xcd'
322 | > 'E{n}AMPLE'.replace(pat3, 'X')
323 | < 'EXAMPLE'
324 | > '43+n2 ax\\y\\ze'.replace(pat3, 'X')
325 | < '4X2 aXe'
326 | ```
327 |
328 | **4)** Replace the backspace character `\b` with a single space character for the given input string.
329 |
330 | ```js
331 | > let ip = '123\b456'
332 |
333 | > ip.replace() // add your solution here
334 | < '123 456'
335 | ```
336 |
337 | **5)** Replace all occurrences of `\e` with `e`.
338 |
339 | ```js
340 | > let ip = 'th\\er\\e ar\\e common asp\\ects among th\\e alt\\ernations'
341 |
342 | > ip.replace() // add your solution here
343 | < 'there are common aspects among the alternations'
344 | ```
345 |
346 | **6)** Replace any matching item from the array `eqns` with `X` for given the string `ip`. Match the items from `eqns` literally.
347 |
348 | ```js
349 | > let ip = '3-(a^b)+2*(a^b)-(a/b)+3'
350 | > let eqns = ['(a^b)', '(a/b)', '(a^b)+2']
351 |
352 | // add your solution here
353 | > const pat4 = // add your solution here
354 |
355 | > ip.replace(pat4, 'X')
356 | < '3-X*X-X+3'
357 | ```
358 |
359 |
360 |
361 | # Dot metacharacter and Quantifiers
362 |
363 | > Use `s` flag for these exercises depending upon the contents of the input strings.
364 |
365 | **1)** Replace `42//5` or `42/5` with `8` for the given input.
366 |
367 | ```js
368 | > let ip = 'a+42//5-c pressure*3+42/5-14256'
369 |
370 | // add your solution here
371 | < 'a+8-c pressure*3+8-14256'
372 | ```
373 |
374 | **2)** For the array `items`, filter all elements starting with `hand` and ending immediately with at most one more character or `le`.
375 |
376 | ```js
377 | > let items = ['handed', 'hand', 'handled', 'handy', 'unhand', 'hands', 'handle']
378 |
379 | // add your solution here
380 | < ['hand', 'handy', 'hands', 'handle']
381 | ```
382 |
383 | **3)** Use the `split()` method to get the output as shown for the given input strings.
384 |
385 | ```js
386 | > let eqn1 = 'a+42//5-c'
387 | > let eqn2 = 'pressure*3+42/5-14256'
388 | > let eqn3 = 'r*42-5/3+42///5-42/53+a'
389 |
390 | > const pat1 = // add your solution here
391 |
392 | > eqn1.split(pat1)
393 | < ['a+', '-c']
394 | > eqn2.split(pat1)
395 | < ['pressure*3+', '-14256']
396 | > eqn3.split(pat1)
397 | < ['r*42-5/3+42///5-', '3+a']
398 | ```
399 |
400 | **4)** For the given input strings, remove everything from the first occurrence of `i` till the end of the string.
401 |
402 | ```js
403 | > let s1 = 'remove the special meaning of such constructs'
404 | > let s2 = 'characters while constructing'
405 | > let s3 = 'input output'
406 |
407 | > const pat2 = // add your solution here
408 |
409 | > s1.replace(pat2, '')
410 | < 'remove the spec'
411 | > s2.replace(pat2, '')
412 | < 'characters wh'
413 | > s3.replace(pat2, '')
414 | < ''
415 | ```
416 |
417 | **5)** For the given strings, construct a regexp to get the output as shown.
418 |
419 | ```js
420 | > let str1 = 'a+b(addition)'
421 | > let str2 = 'a/b(division) + c%d(#modulo)'
422 | > let str3 = 'Hi there(greeting). Nice day(a(b)'
423 |
424 | > const remove_parentheses = // add your solution here
425 |
426 | > str1.replace(remove_parentheses, '')
427 | < 'a+b'
428 | > str2.replace(remove_parentheses, '')
429 | < 'a/b + c%d'
430 | > str3.replace(remove_parentheses, '')
431 | < 'Hi there. Nice day'
432 | ```
433 |
434 | **6)** Correct the given regexp to get the expected output.
435 |
436 | ```js
437 | > let words = 'plink incoming tint winter in caution sentient'
438 |
439 | // wrong output
440 | > const w1 = /int|in|ion|ing|inco|inter|ink/g
441 | > words.replace(w1, 'X')
442 | "plXk XcomXg tX wXer X cautX sentient"
443 |
444 | // expected output
445 | > const w2 = // add your solution here
446 | > words.replace(w2, 'X')
447 | "plX XmX tX wX X cautX sentient"
448 | ```
449 |
450 | **7)** For the given greedy quantifiers, what would be the equivalent form using the `{m,n}` representation?
451 |
452 | * `?` is same as
453 | * `*` is same as
454 | * `+` is same as
455 |
456 | **8)** `(a*|b*)` is same as `(a|b)*` — true or false?
457 |
458 | **9)** For the given input strings, remove everything from the first occurrence of `test` (irrespective of case) till the end of the string, provided `test` isn't at the end of the string.
459 |
460 | ```js
461 | > let s1 = 'this is a Test'
462 | > let s2 = 'always test your regexp for corner\ncases'
463 | > let s3 = 'a TEST of skill tests?'
464 |
465 | > let pat3 = // add your solution here
466 |
467 | > s1.replace(pat3, '')
468 | < 'this is a Test'
469 | > s2.replace(pat3, '')
470 | < 'always '
471 | > s3.replace(pat3, '')
472 | < 'a '
473 | ```
474 |
475 | **10)** For the input array `words`, filter all elements starting with `s` and containing `e` and `t` in any order.
476 |
477 | ```js
478 | > let words = ['sequoia', 'subtle', 'exhibit', 'a set', 'sets', 'tests', 'site']
479 |
480 | // add your solution here
481 | < ['subtle', 'sets', 'site']
482 | ```
483 |
484 | **11)** For the input array `words`, remove all elements having less than `6` characters.
485 |
486 | ```js
487 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 'tests', 'site']
488 |
489 | // add your solution here
490 | < ['sequoia', 'subtle', 'exhibit']
491 | ```
492 |
493 | **12)** For the input array `words`, filter all elements starting with `s` or `t` and having a maximum of `6` characters.
494 |
495 | ```js
496 | > let words = ['sequoia', 'subtle', 'exhibit', 'asset', 'sets', 't set', 'site']
497 |
498 | // add your solution here
499 | < ['subtle', 'sets', 't set', 'site']
500 | ```
501 |
502 | **13)** Delete from the string `start` if it is at the beginning of a line up to the next occurrence of the string `end` at the end of a line. Match these keywords irrespective of case.
503 |
504 | ```js
505 | > let para = `good start
506 | start working on that
507 | project you always wanted
508 | to, do not let it end
509 | hi there
510 | start and end the end
511 | 42
512 | Start and try to
513 | finish the End
514 | bye`
515 |
516 | > const mpat = // add your solution here
517 | > console.log(para.replace(mpat, ''))
518 | < good start
519 |
520 | hi there
521 |
522 | 42
523 |
524 | bye
525 | ```
526 |
527 | **14)** Can you reason out why this code results in the output shown? The aim was to remove all `` patterns but not the `<>` ones. The expected result was `'a 1<> b 2<> c'`.
528 |
529 | ```js
530 | > let ip = 'a 1<> b 2<> c'
531 | > ip.replace(/<.+?>/g, '')
532 | < 'a 1 2'
533 | ```
534 |
535 | **15)** Use the `split()` method to get the output as shown below for the given input strings.
536 |
537 | ```js
538 | > let s1 = 'go there :: this :: that'
539 | > let s2 = 'a::b :: c::d e::f :: 4::5'
540 | > let s3 = '42:: hi::bye::see :: carefully'
541 |
542 | > const pat4 = // add your solution here
543 |
544 | > s1.split() // add your solution here
545 | < ['go there', 'this :: that']
546 | > s2.split() // add your solution here
547 | < ['a::b', 'c::d e::f :: 4::5']
548 | > s3.split() // add your solution here
549 | < ['42:: hi::bye::see', 'carefully']
550 | ```
551 |
552 |
553 |
554 | # Working with matched portions
555 |
556 | **1)** For the given strings, extract the matching portion from the first `is` to the last `t`.
557 |
558 | ```js
559 | > let str1 = 'What is the biggest fruit you have seen?'
560 | > let str2 = 'Your mission is to read and practice consistently'
561 |
562 | > const pat1 = // add your solution here
563 |
564 | // add your solution here for str1
565 | < 'is the biggest fruit'
566 | // add your solution here for str2
567 | < 'ission is to read and practice consistent'
568 | ```
569 |
570 | **2)** Find the starting index of the first occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string.
571 |
572 | ```js
573 | > let s1 = 'match after the last newline character'
574 | > let s2 = 'and then you want to test'
575 | > let s3 = 'this is good bye then'
576 | > let s4 = 'who was there to see?'
577 |
578 | > const pat2 = // add your solution here
579 |
580 | // add your solution here for s1
581 | < 12
582 | // add your solution here for s2
583 | < 4
584 | // add your solution here for s3
585 | < 2
586 | // add your solution here for s4
587 | < 4
588 | ```
589 |
590 | **3)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings. Assume that there will be at least one match for each input string.
591 |
592 | ```js
593 | > let s1 = 'match after the last newline character'
594 | > let s2 = 'and then you want to test'
595 | > let s3 = 'this is good bye then'
596 | > let s4 = 'who was there to see?'
597 |
598 | > const pat3 = // add your solution here
599 |
600 | // add your solution here for s1
601 | < 12
602 | // add your solution here for s2
603 | < 18
604 | // add your solution here for s3
605 | < 17
606 | // add your solution here for s4
607 | < 14
608 | ```
609 |
610 | **4)** The given input string contains `:` exactly once. Extract all characters after the `:` as output.
611 |
612 | ```js
613 | > let ip = 'fruits:apple, mango, guava, blueberry'
614 |
615 | // add your solution here
616 | < 'apple, mango, guava, blueberry'
617 | ```
618 |
619 | **5)** Extract all words between `(` and `)` from the given input string as an array (including the parentheses). Assume that the input will not contain any broken parentheses.
620 |
621 | ```js
622 | > let ip = 'another (way) to reuse (portion) matched (by) capture groups'
623 |
624 | // add your solution here
625 | < ['(way)', '(portion)', '(by)']
626 | ```
627 |
628 | **6)** Extract all occurrences of `<` up to the next occurrence of `>`, provided there is at least one character in between `<` and `>`.
629 |
630 | ```js
631 | > let ip = 'a 1<> b 2<> c'
632 |
633 | // add your solution here
634 | < ['', '<> b', '<> c']
635 | ```
636 |
637 | **7)** Use `matchAll()` to get the output as shown below for the given input strings. Note the characters used in the input strings carefully.
638 |
639 | ```js
640 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 '
641 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 '
642 |
643 | > const pat4 = // add your solution here
644 |
645 | // add your solution here for row1
646 | < (4) [Array(2), Array(2), Array(2), Array(2)]
647 | 0: (2) ['-2', '5']
648 | 1: (2) ['4', '+3']
649 | 2: (2) ['+42', '-53']
650 | 3: (2) ['4356246', '-357532354']
651 | length: 4
652 | [[Prototype]]: Array(0)
653 |
654 | // add your solution here for row2
655 | < (3) [Array(2), Array(2), Array(2)]
656 | 0: (2) ['1.32', '-3.14']
657 | 1: (2) ['634', '5.63']
658 | 2: (2) ['63.3e3', '9907809345343.235']
659 | length: 3
660 | [[Prototype]]: Array(0)
661 | ```
662 |
663 | **8)** This is an extension to the previous question. Sum each pair of numbers that are separated by a comma.
664 |
665 | * For `row1`, find the sum of integers. For example, sum of `-2` and `5` is `3`.
666 | * For `row2`, find the sum of floating-point numbers. For example, sum of `1.32` and `-3.14` is `-1.82`.
667 |
668 | ```js
669 | > let row1 = '-2,5 4,+3 +42,-53 4356246,-357532354 '
670 | > let row2 = '1.32,-3.14 634,5.63 63.3e3,9907809345343.235 '
671 |
672 | // should be same as the previous question
673 | > const pat5 = // add your solution here
674 |
675 | // add your solution here for row1
676 | < [3, 7, -11, -353176108]
677 |
678 | // add your solution here for row2
679 | < [-1.82, 639.63, 9907809408643.234]
680 | ```
681 |
682 | **9)** Use the `split()` method to get the output as shown below.
683 |
684 | ```js
685 | > let ip = '42:no-output;1000:car-tr:u-ck;SQEX49801'
686 |
687 | // add your solution here
688 | < ['42', 'output', '1000', 'tr:u-ck', 'SQEX49801']
689 | ```
690 |
691 | **10)** Write a string function that changes the given input to alternate case. The first alphabet should be changed to lowercase, the next one to uppercase and then lowercase and so on. Characters other than alphabets should be left alone and not affect case changing.
692 |
693 | ```js
694 | > function aLtErNaTeCaSe(ip) {
695 | // add your solution here
696 | }
697 |
698 | > aLtErNaTeCaSe('HI THERE!')
699 | < 'hI tHeRe!'
700 | > aLtErNaTeCaSe('good morning')
701 | < 'gOoD mOrNiNg'
702 | > aLtErNaTeCaSe('Sample123string42with777numbers')
703 | < 'sAmPlE123sTrInG42wItH777nUmBeRs'
704 | ```
705 |
706 | **11)** Replace all occurrences of `par` with `spar`, `spare` with `extra` and `park` with `garden`.
707 |
708 | ```js
709 | > let s1 = 'apartment has a park'
710 | > let s2 = 'do you have a spare cable'
711 | > let s3 = 'write a parser'
712 |
713 | > let d1 = // add your solution here
714 | > const pat6 = // add your solution here
715 |
716 | > s1.replace(pat6, k => d1[k])
717 | < 'aspartment has a garden'
718 | > s2.replace(pat6, k => d1[k])
719 | < 'do you have a extra cable'
720 | > s3.replace(pat6, k => d1[k])
721 | < 'write a sparser'
722 | ```
723 |
724 | **12)** Name the flag and property you can use with the `match()` method to get both the starting and ending locations of the matched portions.
725 |
726 |
727 |
728 | # Character class
729 |
730 | **1)** For the array `items`, filter all elements starting with `hand` and ending with `s` or `y` or `le`. No other character in between, for example, `hands` should match but not `hand-has`.
731 |
732 | ```js
733 | > let items = ['-handy', 'hand', 'handy', 'unhand', 'hands', 'hand-icy', 'handle']
734 |
735 | // add your solution here
736 | < ['handy', 'hands', 'handle']
737 | ```
738 |
739 | **2)** Replace all whole words `reed` or `read` or `red` with `X`.
740 |
741 | ```js
742 | > let ip = 'redo red credible :read: rod reed bred'
743 |
744 | // add your solution here
745 | < 'redo X credible :X: rod X bred'
746 | ```
747 |
748 | **3)** For the array `words`, filter all elements containing `e` or `i` followed by `l` or `n`. Note that the order mentioned should be followed.
749 |
750 | ```js
751 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest']
752 |
753 | // add your solution here
754 | < ['surrender', 'unicorn', 'eel']
755 | ```
756 |
757 | **4)** For the array `words`, filter all elements containing `e` or `i` and `l` or `n` in any order.
758 |
759 | ```js
760 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest']
761 |
762 | // add your solution here
763 | < ['surrender', 'unicorn', 'newer', 'eel']
764 | ```
765 |
766 | **5)** Extract all hex character sequences, with `0x` optional prefix. Match the characters case insensitively, and the sequences shouldn't be surrounded by other word characters.
767 |
768 | ```js
769 | > let str1 = '128A foo 0xfe32 34 0xbar'
770 | > let str2 = '0XDEADBEEF place 0x0ff1ce bad'
771 |
772 | > const hex_seq = // add your solution here
773 |
774 | > str1.match(hex_seq)
775 | < ['128A', '0xfe32', '34']
776 | > str2.match(hex_seq)
777 | < ['0XDEADBEEF', '0x0ff1ce', 'bad']
778 | ```
779 |
780 | **6)** Delete from `(` to the next occurrence of `)` unless they contain parentheses characters in between.
781 |
782 | ```js
783 | > let str1 = 'def factorial()'
784 | > let str2 = 'a/b(division) + c%d(#modulo) - (e+(j/k-3)*4)'
785 | > let str3 = 'Hi there(greeting). Nice day(a(b)'
786 |
787 | > const remove_parentheses = // add your solution here
788 |
789 | > str1.replace(remove_parentheses, '')
790 | < 'def factorial'
791 | > str2.replace(remove_parentheses, '')
792 | < 'a/b + c%d - (e+*4)'
793 | > str3.replace(remove_parentheses, '')
794 | < 'Hi there. Nice day(a'
795 | ```
796 |
797 | **7)** For the array `words`, filter all elements not starting with `e` or `p` or `u`.
798 |
799 | ```js
800 | > let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', '(pest)']
801 |
802 | // add your solution here
803 | < ['surrender', 'newer', 'door', '(pest)']
804 | ```
805 |
806 | **8)** For the array `words`, filter all elements not containing `u` or `w` or `ee` or `-`.
807 |
808 | ```js
809 | > let words = ['p-t', 'you', 'tea', 'heel', 'owe', 'new', 'reed', 'ear']
810 |
811 | // add your solution here
812 | < ['tea', 'ear']
813 | ```
814 |
815 | **9)** The given input strings contain fields separated by `,` and fields can be empty too. Replace the last three fields with `WHTSZ323`.
816 |
817 | ```js
818 | > let row1 = '(2),kite,12,,D,C,,'
819 | > let row2 = 'hi,bye,sun,moon'
820 |
821 | > const pat1 = // add your solution here
822 |
823 | // add your solution here for row1
824 | < '(2),kite,12,,D,WHTSZ323'
825 | // add your solution here for row2
826 | < 'hi,WHTSZ323'
827 | ```
828 |
829 | **10)** Split the given strings based on consecutive sequence of digit or whitespace characters.
830 |
831 | ```js
832 | > let s1 = 'lion \t Ink32onion Nice'
833 | > let s2 = '**1\f2\n3star\t7 77\r**'
834 |
835 | > const pat2 = // add your solution here
836 |
837 | > s1.split(pat2)
838 | < ['lion', 'Ink', 'onion', 'Nice']
839 | > s2.split(pat2)
840 | < ['**', 'star', '**']
841 | ```
842 |
843 | **11)** Delete all occurrences of the sequence `` where `characters` is one or more non `>` characters and cannot be empty.
844 |
845 | ```js
846 | > let ip = 'a 1<> b 2<> c'
847 |
848 | // add your solution here
849 | < 'a 1<> b 2<> c'
850 | ```
851 |
852 | **12)** `\b[a-z](on|no)[a-z]\b` is same as `\b[a-z][on]{2}[a-z]\b`. True or False? Sample input lines shown below might help to understand the differences, if any.
853 |
854 | ```js
855 | > console.log('known\nmood\nknow\npony\ninns')
856 | known
857 | mood
858 | know
859 | pony
860 | inns
861 | ```
862 |
863 | **13)** For the given array, filter elements containing any number sequence greater than `624`.
864 |
865 | ```js
866 | > let items = ['hi0000432abcd', 'car00625', '42_624 0512', '3.14 96 2 foo1234baz']
867 |
868 | // add your solution here
869 | < ['car00625', '3.14 96 2 foo1234baz']
870 | ```
871 |
872 | **14)** Convert the given input string to two different arrays as shown below.
873 |
874 | ```js
875 | > let ip = 'price_42 roast^\t\n^-ice==cat\neast'
876 |
877 | // add your solution here
878 | < ['price_42', 'roast', 'ice', 'cat', 'east']
879 |
880 | // add your solution here
881 | < ['price_42', ' ', 'roast', '^\t\n^-', 'ice', '==', 'cat', '\n', 'east']
882 | ```
883 |
884 | **15)** Filter all elements whose first non-whitespace character is not a `#` character. Any element made up of only whitespace characters should be ignored as well.
885 |
886 | ```js
887 | > let items = [' #comment', '\t\napple #42', '#oops', 'sure', 'no#1', '\t\r\f']
888 |
889 | // add your solution here
890 | < ['\t\napple #42', 'sure', 'no#1']
891 | ```
892 |
893 | **16)** For the given string, surround all whole words with `{}` except `par` and `cat`.
894 |
895 | ```js
896 | > let ip = 'part; cat {super} rest_42 par scatter'
897 |
898 | // add your solution here
899 | < '{part}; cat {{super}} {rest_42} par {scatter}'
900 | ```
901 |
902 |
903 |
904 | # Groupings and backreferences
905 |
906 | **1)** Replace the space character that occurs after a word ending with `a` or `r` with a newline character.
907 |
908 | ```js
909 | > let ip = 'area not a _a2_ roar took 22'
910 |
911 | > console.log() // add your solution here
912 | area
913 | not a
914 | _a2_ roar
915 | took 22
916 | ```
917 |
918 | **2)** Add `[]` around words starting with `s` and containing `e` and `t` in any order.
919 |
920 | ```js
921 | > let ip = 'sequoia subtle exhibit asset sets2 tests si_te'
922 |
923 | // add your solution here
924 | < 'sequoia [subtle] exhibit asset [sets2] tests [si_te]'
925 | ```
926 |
927 | **3)** Replace all whole words with `X` that start and end with the same word character (irrespective of case). Single character word should get replaced with `X` too, as it satisfies the stated condition.
928 |
929 | ```js
930 | > let ip = 'oreo not a _a2_ Roar took 22'
931 |
932 | // add your solution here
933 | < 'X not X X X took X'
934 | ```
935 |
936 | **4)** Convert the given *markdown* headers to corresponding *anchor* tags. Consider the input to start with one or more `#` characters followed by space and word characters. The `name` attribute is constructed by converting the header to lowercase and replacing spaces with hyphens. Can you do it without using a capture group?
937 |
938 | ```js
939 | > let header1 = '# Regular Expressions'
940 | > let header2 = '## Named capture groups'
941 |
942 | > function hyphenify(m) {
943 | // add your solution here
944 | }
945 |
946 | > header1.replace() // add your solution here
947 | < "# Regular Expressions"
948 | > header2.replace() // add your solution here
949 | < "## Named capture groups"
950 | ```
951 |
952 | **5)** Convert the given *markdown* anchors to corresponding *hyperlinks*.
953 |
954 | ```js
955 | > let anchor1 = "# Regular Expressions"
956 | > let anchor2 = "## Subexpression calls"
957 |
958 | > const hyperlink = // add your solution here
959 |
960 | > anchor1.replace() // add your solution here
961 | < '[Regular Expressions](#regular-expressions)'
962 | > anchor2.replace() // add your solution here
963 | < '[Subexpression calls](#subexpression-calls)'
964 | ```
965 |
966 | **6)** Check if the given input strings have words with at least two consecutive repeated alphabets irrespective of case. For example, words like `stillnesS` and `Committee` should return `true` but words like `root` or `readable` or `rotational` should return `false`. Consider word to be as defined in regular expression parlance.
967 |
968 | ```js
969 | > let s1 = 'readable COMMItTEe'
970 | > let s2 = 'rotational sti1lness _foot_'
971 | > let s3 = 'needed repeated'
972 | > let s4 = 'offsh00t'
973 |
974 | > const pat1 = // add your solution here
975 |
976 | > pat1.test(s1)
977 | true
978 | > pat1.test(s2)
979 | false
980 | > pat1.test(s3)
981 | false
982 | > pat1.test(s4)
983 | true
984 | ```
985 |
986 | **7)** For the given input string, replace all occurrences of digit sequences with only the unique non-repeating sequence. For example, `232323` should be changed to `23` and `897897` should be changed to `897`. If there are no repeats (for example `1234`) or if the repeats end prematurely (for example `12121`), it should not be changed.
987 |
988 | ```js
989 | > let ip = '1234 2323 453545354535 9339 11 60260260'
990 |
991 | // add your solution here
992 | < '1234 23 4535 9339 1 60260260'
993 | ```
994 |
995 | **8)** Replace sequences made up of words separated by `:` or `.` by the first word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character.
996 |
997 | ```js
998 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.'
999 |
1000 | // add your solution here
1001 | < 'wow hi-2 bye kite'
1002 | ```
1003 |
1004 | **9)** Replace sequences made up of words separated by `:` or `.` by the last word of the sequence. Such sequences will end when `:` or `.` is not followed by a word character.
1005 |
1006 | ```js
1007 | > let ip = 'wow:Good:2_two.five: hi-2 bye kite.777:water.'
1008 |
1009 | // add your solution here
1010 | < 'five hi-2 bye water'
1011 | ```
1012 |
1013 | **10)** Split the given input string on one or more repeated sequence of `cat`.
1014 |
1015 | ```js
1016 | > let ip = 'firecatlioncatcatcatbearcatcatparrot'
1017 |
1018 | // add your solution here
1019 | < ['fire', 'lion', 'bear', 'parrot']
1020 | ```
1021 |
1022 | **11)** For the given input string, find all occurrences of digit sequences with at least one repeating sequence. For example, `232323` and `897897`. If the repeats end prematurely, for example `12121`, it should not be matched.
1023 |
1024 | ```js
1025 | > let ip = '1234 2323 453545354535 9339 11 60260260'
1026 |
1027 | > const pat2 = // add your solution here
1028 |
1029 | // entire sequences in the output
1030 | // add your solution here
1031 | < ['2323', '453545354535', '11']
1032 |
1033 | // only the unique sequence in the output
1034 | // add your solution here
1035 | < ['23', '4535', '1']
1036 | ```
1037 |
1038 | **12)** Convert the comma separated strings to corresponding key-value pair mapping as shown below. The keys are `name`, `maths` and `phy` for the three fields in the input strings.
1039 |
1040 | ```js
1041 | > let row1 = 'rohan,75,89'
1042 | > let row2 = 'rose,88,92'
1043 |
1044 | > const pat3 = // add your solution here
1045 |
1046 | // add your solution here for row1
1047 | < {name: 'rohan', maths: '75', phy: '89'}
1048 |
1049 | // add your solution here for row2
1050 | < {name: 'rose', maths: '88', phy: '92'}
1051 | ```
1052 |
1053 | **13)** Surround all whole words with `()`. Additionally, if the whole word is `imp` or `ant`, delete them. Can you do it with just a single substitution?
1054 |
1055 | ```js
1056 | > let ip = 'tiger imp goat eagle ant important'
1057 |
1058 | // add your solution here
1059 | < '(tiger) () (goat) (eagle) () (important)'
1060 | ```
1061 |
1062 |
1063 |
1064 | # Lookarounds
1065 |
1066 | > Use lookarounds for solving the following exercises even if they are not required.
1067 |
1068 | **1)** Replace all whole words with `X` unless it is preceded by a `(` character.
1069 |
1070 | ```js
1071 | > let ip = '(apple) guava berry) apple (mango) (grape'
1072 |
1073 | // add your solution here
1074 | < '(apple) X X) X (mango) (grape'
1075 | ```
1076 |
1077 | **2)** Replace all whole words with `X` unless it is followed by a `)` character.
1078 |
1079 | ```js
1080 | > let ip = '(apple) guava berry) apple (mango) (grape'
1081 |
1082 | // add your solution here
1083 | < '(apple) X berry) X (mango) (X'
1084 | ```
1085 |
1086 | **3)** Replace all whole words with `X` unless it is preceded by `(` or followed by `)` characters.
1087 |
1088 | ```js
1089 | > let ip = '(apple) guava berry) apple (mango) (grape'
1090 |
1091 | // add your solution here
1092 | < '(apple) X berry) X (mango) (grape'
1093 | ```
1094 |
1095 | **4)** Extract all whole words that do not end with `e` or `n`.
1096 |
1097 | ```js
1098 | > let ip = 'a_t row on Urn e note Dust n end a2-e|u'
1099 |
1100 | // add your solution here
1101 | < ['a_t', 'row', 'Dust', 'end', 'a2', 'u']
1102 | ```
1103 |
1104 | **5)** Extract all whole words that do not start with `a` or `d` or `n`.
1105 |
1106 | ```js
1107 | > let ip = 'a_t row on Urn e note Dust n end a2-e|u'
1108 |
1109 | // add your solution here
1110 | < ['row', 'on', 'Urn', 'e', 'Dust', 'end', 'e', 'u']
1111 | ```
1112 |
1113 | **6)** Extract all whole words only if they are followed by `:` or `,` or `-`.
1114 |
1115 | ```js
1116 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1117 |
1118 | // add your solution here
1119 | < ['Poke', 'so_good', 'ever2']
1120 | ```
1121 |
1122 | **7)** Extract all whole words only if they are preceded by `=` or `/` or `-`.
1123 |
1124 | ```js
1125 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1126 |
1127 | // add your solution here
1128 | < ['so_good', 'is', 'sit']
1129 | ```
1130 |
1131 | **8)** Extract all whole words only if they are preceded by `=` or `:` and followed by `:` or `.`.
1132 |
1133 | ```js
1134 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1135 |
1136 | // add your solution here
1137 | < ['so_good', 'ink']
1138 | ```
1139 |
1140 | **9)** Extract all whole words only if they are preceded by `=` or `:` or `.` or `(` or `-` and not followed by `.` or `/`.
1141 |
1142 | ```js
1143 | > let ip = 'Poke,on=-=so_good:ink.to/is(vast)ever2-sit'
1144 |
1145 | // add your solution here
1146 | < ['so_good', 'vast', 'sit']
1147 | ```
1148 |
1149 | **10)** Remove the leading and trailing whitespaces from all the individual fields where `,` is the field separator.
1150 |
1151 | ```js
1152 | > let csv1 = ' comma ,separated ,values \t\r '
1153 | > let csv2 = 'good bad,nice ice , 42 , , stall small'
1154 |
1155 | > const trim_whitespace = // add your solution here
1156 |
1157 | > csv1.replace(trim_whitespace, '')
1158 | < 'comma,separated,values'
1159 | > csv2.replace(trim_whitespace, '')
1160 | < 'good bad,nice ice,42,,stall small'
1161 | ```
1162 |
1163 | **11)** Filter elements that satisfy all of these rules:
1164 |
1165 | * should have at least two alphabets
1166 | * should have at least three digits
1167 | * should have at least one special character among `%` or `*` or `#` or `$`
1168 | * should not end with a whitespace character
1169 |
1170 | ```js
1171 | > let pwds = ['hunter2', 'F2h3u%9', '*X3Yz3.14\t', 'r2_d2_42', 'A $B C1234']
1172 |
1173 | // add your solution here
1174 | < ['F2h3u%9', 'A $B C1234']
1175 | ```
1176 |
1177 | **12)** For the given string, surround all whole words with `{}` except for whole words `par` and `cat` and `apple`.
1178 |
1179 | ```js
1180 | > let ip = 'part; cat {super} rest_42 par scatter apple spar'
1181 |
1182 | // add your solution here
1183 | < '{part}; cat {{super}} {rest_42} par {scatter} apple {spar}'
1184 | ```
1185 |
1186 | **13)** Extract the integer portion of floating-point numbers for the given string. A number ending with `.` and no further digits should not be considered.
1187 |
1188 | ```js
1189 | > let ip = '12 ab32.4 go 5 2. 46.42 5'
1190 |
1191 | // add your solution here
1192 | < ['32', '46']
1193 | ```
1194 |
1195 | **14)** For the given input strings, extract all overlapping two character sequences.
1196 |
1197 | ```js
1198 | > let s1 = 'apple'
1199 | > let s2 = '1.2-3:4'
1200 |
1201 | > const pat1 = // add your solution here
1202 |
1203 | // add your solution here for s1
1204 | < ['ap', 'pp', 'pl', 'le']
1205 | // add your solution here for s2
1206 | < ['1.', '.2', '2-', '-3', '3:', ':4']
1207 | ```
1208 |
1209 | **15)** The given input strings contain fields separated by the `:` character. Delete `:` and the last field if there is a digit character anywhere before the last field.
1210 |
1211 | ```js
1212 | > let s1 = '42:cat'
1213 | > let s2 = 'twelve:a2b'
1214 | > let s3 = 'we:be:he:0:a:b:bother'
1215 | > let s4 = 'apple:banana-42:cherry:'
1216 | > let s5 = 'dragon:unicorn:centaur'
1217 |
1218 | > const pat2 = // add your solution here
1219 |
1220 | > s1.replace(pat2, '')
1221 | < '42'
1222 | > s2.replace(pat2, '')
1223 | < 'twelve:a2b'
1224 | > s3.replace(pat2, '')
1225 | < 'we:be:he:0:a:b'
1226 | > s4.replace(pat2, '')
1227 | < 'apple:banana-42:cherry'
1228 | > s5.replace(pat2, '')
1229 | < 'dragon:unicorn:centaur'
1230 | ```
1231 |
1232 | **16)** Extract all whole words unless they are preceded by `:` or `<=>` or `----` or `#`.
1233 |
1234 | ```js
1235 | > let ip = '::very--at<=>row|in.a_b#b2c=>lion----east'
1236 |
1237 | // add your solution here
1238 | < ['at', 'in', 'a_b', 'lion']
1239 | ```
1240 |
1241 | **17)** Match strings if it contains `qty` followed by `price` but not if there is any whitespace character or the string `error` between them.
1242 |
1243 | ```js
1244 | > let str1 = '23,qty,price,42'
1245 | > let str2 = 'qty price,oh'
1246 | > let str3 = '3.14,qty,6,errors,9,price,3'
1247 | > let str4 = '42\nqty-6,apple-56,price-234,error'
1248 | > let str5 = '4,price,3.14,qty,4'
1249 | > let str6 = '(qtyprice) (hi-there)'
1250 |
1251 | > const neg = // add your solution here
1252 |
1253 | > neg.test(str1)
1254 | < true
1255 | > neg.test(str2)
1256 | < false
1257 | > neg.test(str3)
1258 | < false
1259 | > neg.test(str4)
1260 | < true
1261 | > neg.test(str5)
1262 | < false
1263 | > neg.test(str6)
1264 | < true
1265 | ```
1266 |
1267 | **18)** Can you reason out why the following regular expressions behave differently?
1268 |
1269 | ```js
1270 | > let ip = 'I have 12, he has 2!'
1271 |
1272 | > ip.replace(/\b..\b/g, '{$&}')
1273 | < '{I }have {12}{, }{he} has{ 2}!'
1274 |
1275 | > ip.replace(/(? let w2 = 'Sample123string42with777numbers'
1283 |
1284 | // add your solution here for splitting based on the first occurrence
1285 | < ['Sample', '123', 'string42with777numbers']
1286 |
1287 | // add your solution here for splitting based on the last occurrence
1288 | < ['Sample123string42with', '777', 'numbers']
1289 | ```
1290 |
1291 | **20)** Find the starting index of the last occurrence of `is` or `the` or `was` or `to` for the given input strings using the `search()` method. Assume that there will be at least one match for each input string.
1292 |
1293 | ```js
1294 | > let s1 = 'match after the last newline character'
1295 | > let s2 = 'and then you want to test'
1296 | > let s3 = 'this is good bye then'
1297 | > let s4 = 'who was there to see?'
1298 |
1299 | > const pat3 = // add your solution here
1300 |
1301 | > s1.search(pat3)
1302 | < 12
1303 | > s2.search(pat3)
1304 | < 18
1305 | > s3.search(pat3)
1306 | < 17
1307 | > s4.search(pat3)
1308 | < 14
1309 | ```
1310 |
1311 |
1312 |
1313 | # Unicode
1314 |
1315 | **1)** Check if the given input strings are made up of ASCII characters only. Consider the input to be non-empty strings and any character that isn't part of the 7-bit ASCII set should result in `false`.
1316 |
1317 | ```js
1318 | > let str1 = '123 × 456'
1319 | > let str2 = 'good fοοd'
1320 | > let str3 = 'happy learning!'
1321 |
1322 | > const pat1 = // add your solution here
1323 |
1324 | > pat1.test(str1)
1325 | < false
1326 | > pat1.test(str2)
1327 | < false
1328 | > pat1.test(str3)
1329 | < true
1330 | ```
1331 |
1332 | **2)** Retain only the punctuation characters for the given string.
1333 |
1334 | ```js
1335 | > let ip = '❨a❩❪1❫❬b❭❮2❯❰c❱❲3❳❴xyz❵⟅123⟆⟦⟧⟨like⟩⟪3.14⟫'
1336 |
1337 | // add your solution here
1338 | < '❨❩❪❫❬❭❮❯❰❱❲❳❴❵⟅⟆⟦⟧⟨⟩⟪.⟫'
1339 | ```
1340 |
1341 | **3)** Is the following code snippet showing the correct output?
1342 |
1343 | ```js
1344 | > 'fox:αλεπού'.match(/\w+/g)
1345 | < ['fox']
1346 | ```
1347 |
1348 | **4)** Name the set operations enabled by the `v` flag.
1349 |
1350 | **5)** Extract all whole words from the given strings. However, do not match words if they contain any character present in the `ignore` variable.
1351 |
1352 | ```js
1353 | > let s1 = 'match after the last new_line character A2'
1354 | > let s2 = 'and then you want to test'
1355 |
1356 | > let ignore = 'aty'
1357 | > const ign1 = // add your solution here
1358 | > s1.match(ign1)
1359 | < ['new_line', 'A2']
1360 | > s2.match(ign1)
1361 | < null
1362 |
1363 | > let ignore = 'esw'
1364 | // should be the same solution used above
1365 | > const ign2 = // add your solution here
1366 | > s1.match(ign2)
1367 | < ['match', 'A2']
1368 | > s2.match(ign2)
1369 | < ['and', 'you', 'to']
1370 | ```
1371 |
1372 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Sundeep Agarwal
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Understanding JavaScript RegExp
2 |
3 | Learn JavaScript Regular Expressions step-by-step from beginner to advanced levels with hundreds of examples and exercises. Visit https://youtu.be/8X-hUel3GxM for a short video about the book.
4 |
5 |
6 |
7 | The book also includes exercises to test your understanding, which are presented together as a single file in this repo — [Exercises.md](./Exercises.md).
8 |
9 | For solutions to the exercises, see [Exercise_solutions.md](./Exercise_solutions.md).
10 |
11 | See [Version_changes.md](./Version_changes.md) to keep track of changes made to the book.
12 |
13 |
14 |
15 | # E-book
16 |
17 | * You can purchase the pdf/epub versions of the book using these links:
18 | * https://learnbyexample.gumroad.com/l/js_regexp
19 | * https://leanpub.com/js_regexp
20 | * You can also get the book as part of these bundles:
21 | * **All books bundle** bundle from https://learnbyexample.gumroad.com/l/all-books
22 | * Includes all my programming books
23 | * **Awesome Regex** bundle from https://learnbyexample.gumroad.com/l/regex or https://leanpub.com/b/regex
24 | * See https://learnbyexample.github.io/books/ for a list of other books
25 |
26 | For a preview of the book, see [sample chapters](./sample_chapters/js_regexp_sample.pdf).
27 |
28 | The book can also be [viewed as a single markdown file in this repo](./js_regexp.md). See my blogpost on [generating pdfs from markdown using pandoc](https://learnbyexample.github.io/customizing-pandoc/) if you are interested in the ebook creation process.
29 |
30 | For the web version of the book, visit https://learnbyexample.github.io/learn_js_regexp/
31 |
32 |
33 |
34 | # Testimonials
35 |
36 | >Literally was having a mini-breakdown about not understanding Regex in algorithm solutions the other day and now I'm feeling so much better, so thank YOU! I genuinely feel like I'm developing the skill for spotting when and where to use them after so much practice!
37 | >
38 | > — [feedback on twitter](https://twitter.com/codingwithlucy/status/1450668315635036160)
39 |
40 |
41 |
42 | # Feedback
43 |
44 | ⚠️ ⚠️ Please DO NOT submit pull requests. Main reason being any modification requires changes in multiple places.
45 |
46 | I would highly appreciate it if you'd let me know how you felt about this book. It could be anything from a simple thank you, pointing out a typo, mistakes in code snippets, which aspects of the book worked for you (or didn't!) and so on. Reader feedback is essential and especially so for self-published authors.
47 |
48 | You can reach me via:
49 |
50 | * Issue Manager: [https://github.com/learnbyexample/learn_js_regexp/issues](https://github.com/learnbyexample/learn_js_regexp/issues)
51 | * E-mail: `echo 'bGVhcm5ieWV4YW1wbGUubmV0QGdtYWlsLmNvbQo=' | base64 --decode`
52 | * Twitter: [https://twitter.com/learn_byexample](https://twitter.com/learn_byexample)
53 |
54 |
55 |
56 | # Table of Contents
57 |
58 | 1. Preface
59 | 2. Why is it needed?
60 | 3. RegExp introduction
61 | 4. Anchors
62 | 5. Alternation and Grouping
63 | 6. Escaping metacharacters
64 | 7. Dot metacharacter and Quantifiers
65 | 8. Interlude: Tools for debugging and visualization
66 | 9. Working with matched portions
67 | 10. Character class
68 | 11. Groupings and backreferences
69 | 12. Interlude: Common tasks
70 | 13. Lookarounds
71 | 14. Unicode
72 | 15. Further Reading
73 |
74 |
75 |
76 | # Acknowledgements
77 |
78 | * [MDN: Regular Expressions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions) — documentation and examples
79 | * [/r/learnjavascript/](https://old.reddit.com/r/learnjavascript/) and [/r/regex/](https://old.reddit.com/r/regex/) — helpful forums for beginners and experienced programmers alike
80 | * [stackoverflow](https://stackoverflow.com/) — for getting answers to pertinent questions on JavaScript and regular expressions
81 | * [tex.stackexchange](https://tex.stackexchange.com/) — for help on [pandoc](https://github.com/jgm/pandoc/) and `tex` related questions
82 | * [canva](https://www.canva.com/) — cover image
83 | * [Warning](https://commons.wikimedia.org/wiki/File:Warning_icon.svg) and [Info](https://commons.wikimedia.org/wiki/File:Info_icon_002.svg) icons by [Amada44](https://commons.wikimedia.org/wiki/User:Amada44) under public domain
84 | * [oxipng](https://github.com/shssoichiro/oxipng), [pngquant](https://pngquant.org/) and [svgcleaner](https://github.com/RazrFalcon/svgcleaner) — optimizing images
85 | * [mdBook](https://github.com/rust-lang/mdBook) — for web version of the book
86 | * [mdBook-pagetoc](https://github.com/JorelAli/mdBook-pagetoc) — for adding table of contents for each chapter
87 | * [minify-html](https://github.com/wilsonzlin/minify-html) — for minifying html files
88 |
89 |
90 |
91 | # License
92 |
93 | The book is licensed under a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/).
94 |
95 | The code snippets are licensed under MIT, see [LICENSE](./LICENSE) file.
96 |
97 |
--------------------------------------------------------------------------------
/Version_changes.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ### 2.0
4 |
5 | * Examples and exercises added for `d` and `v` flags
6 | * Strings in code snippets changed to be uniformly represented in single quotes
7 | * In general, many of the examples, exercises, solutions, descriptions and external links were updated/corrected
8 | * Updated Acknowledgements section
9 | * Code snippets related to info/warning sections will now appear as a single block
10 | * Book title changed to **Understanding JavaScript RegExp**
11 | * New cover image
12 | * Images centered for EPUB format
13 |
14 |
15 |
16 | ### 1.6
17 |
18 | * Code snippets checked to work with Chrome/Chromium console version 89+
19 | * Updated `escapeRegExp` function as per [MDN: Regular Expressions doc](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions)
20 | * Typo and miscellaneous changes
21 |
22 |
23 |
24 | ### 1.5
25 |
26 | * Added **epub** version of the book
27 | * Added plenty of new exercises, perhaps too many
28 | * Added solutions for the exercises to the repo
29 | * Added two interlude chapters to highlight external resources
30 | * Added separate section about escape sequences
31 | * Updated and clarified descriptions for many concepts, typo corrections and miscellaneous changes, too many changes to list individually
32 |
33 |
34 |
35 | ### 1.0
36 |
37 | * First version
38 |
39 |
--------------------------------------------------------------------------------
/code_snippets/Alternation_and_Grouping.js:
--------------------------------------------------------------------------------
1 | // Alternation
2 |
3 | const pets = /cat|dog/
4 |
5 | pets.test('I like cats')
6 |
7 | pets.test('I like dogs')
8 |
9 | pets.test('I like parrots')
10 |
11 | 'catapults concatenate cat scat cater'.replace(/^cat|cat\b/g, 'X')
12 |
13 | 'cat dog bee parrot fox'.replace(/cat|dog|fox/g, 'mammal')
14 |
15 | // Grouping
16 |
17 | 'red reform read arrest'.replace(/reform|rest/g, 'X')
18 |
19 | 'red reform read arrest'.replace(/re(form|st)/g, 'X')
20 |
21 | 'par spare part party'.replace(/\bpar\b|\bpart\b/g, 'X')
22 |
23 | 'par spare part party'.replace(/\b(par|part)\b/g, 'X')
24 |
25 | 'par spare part party'.replace(/\bpar(|t)\b/g, 'X')
26 |
27 | // Precedence rules
28 |
29 | let words = 'lion elephant are rope not'
30 |
31 | words.replace(/on|ant/, 'X')
32 |
33 | words.replace(/ant|on/, 'X')
34 |
35 | let mood = 'best years'
36 |
37 | mood.replace(/year|years/, 'X')
38 |
39 | mood.replace(/years|year/, 'X')
40 |
41 | let sample = 'ear xerox at mare part learn eye'
42 |
43 | sample.replace(/ar|are|art/g, 'X')
44 |
45 | sample.replace(/are|ar|art/g, 'X')
46 |
47 | sample.replace(/are|art|ar/g, 'X')
48 |
49 |
--------------------------------------------------------------------------------
/code_snippets/Anchors.js:
--------------------------------------------------------------------------------
1 | // String anchors
2 |
3 | /^cat/.test('cater')
4 |
5 | /^cat/.test('concatenation')
6 |
7 | /^hi/.test('hi hello\ntop spot')
8 |
9 | /^top/.test('hi hello\ntop spot')
10 |
11 | /are$/.test('spare')
12 |
13 | /are$/.test('nearest')
14 |
15 | let words = ['surrender', 'unicorn', 'newer', 'door', 'empty', 'eel', 'pest']
16 |
17 | words.filter(w => /er$/.test(w))
18 |
19 | words.filter(w => /t$/.test(w))
20 |
21 | /^cat$/.test('cat')
22 |
23 | /^cat$/.test('cater')
24 |
25 | 'live'.replace(/^/, 're')
26 |
27 | 'send'.replace(/^/, 're')
28 |
29 | 'cat'.replace(/$/, 'er')
30 |
31 | 'hack'.replace(/$/, 'er')
32 |
33 | // Line anchors
34 |
35 | /^top/m.test('hi hello\ntop spot')
36 |
37 | /er$/m.test('spare\npar\nera\ndare')
38 |
39 | let elements = ['spare\ntool', 'par\n', 'dare', 'spared']
40 |
41 | elements.filter(e => /are$/m.test(e))
42 |
43 | /^par$/m.test('spare\npar\nera\ndare')
44 |
45 | let items = 'catapults\nconcatenate\ncat'
46 |
47 | console.log(items.replace(/^/gm, '* '))
48 |
49 | console.log(items.replace(/$/gm, '.'))
50 |
51 | console.log('1\n2\n'.replace(/^/mg, 'fig '))
52 |
53 | console.log('1\n2\n'.replace(/$/mg, ' apple'))
54 |
55 | // Word anchors
56 |
57 | let words = 'par spar apparent spare part'
58 |
59 | words.replace(/par/g, 'X')
60 |
61 | words.replace(/\bpar/g, 'X')
62 |
63 | words.replace(/par\b/g, 'X')
64 |
65 | words.replace(/\bpar\b/g, 'X')
66 |
67 | let words = 'par spar apparent spare part'
68 |
69 | console.log(words.replace(/\b/g, '"').replace(/ /g, ','))
70 |
71 | 'output=num1+35*42/num2'.replace(/\b/g, ' ')
72 |
73 | 'output=num1+35*42/num2'.replace(/\b/g, ' ').trim()
74 |
75 | // Opposite Word Anchor
76 |
77 | let words = 'par spar apparent spare part'
78 |
79 | words.replace(/\Bpar/g, 'X')
80 |
81 | words.replace(/\Bpar\b/g, 'X')
82 |
83 | words.replace(/par\B/g, 'X')
84 |
85 | words.replace(/\Bpar\B/g, 'X')
86 |
87 | 'copper'.replace(/\b/g, ':')
88 |
89 | 'copper'.replace(/\B/g, ':')
90 |
91 | '-----hello-----'.replace(/\b/g, ' ')
92 |
93 | '-----hello-----'.replace(/\B/g, ' ')
94 |
95 |
--------------------------------------------------------------------------------
/code_snippets/Character_class.js:
--------------------------------------------------------------------------------
1 | // Custom character sets
2 |
3 | ['cute', 'cat', 'cot', 'coat', 'cost', 'scuttle'].filter(w => /c[ou]t/.test(w))
4 |
5 | 'meeting cute boat site foot'.replace(/[aeo]+t/g, 'X')
6 |
7 | // Range of characters
8 |
9 | 'Sample123string42with777numbers'.match(/[0-9]+/g)
10 |
11 | 'coat Bin food tar12 best Apple fig_42'.match(/\b[a-z0-9]+\b/g)
12 |
13 | 'coat tin food put stoop best fig_42 Pet'.match(/\b[p-z][a-z]*\b/g)
14 |
15 | 'coat tin food put stoop best fig_42 Pet'.match(/\b[a-fp-t]+\b/g)
16 |
17 | // Negating character sets
18 |
19 | 'Sample123string42with777numbers'.match(/[^0-9]+/g)
20 |
21 | 'apple:123:banana:cherry'.replace(/^([^:]+:){2}/, '')
22 |
23 | 'apple=42; cherry=123'.replace(/=[^=]+$/, '')
24 |
25 | let words = ['tryst', 'fun', 'glyph', 'pity', 'why']
26 |
27 | words.filter(w => /^[^aeiou]+$/.test(w))
28 |
29 | words.filter(w => !/[aeiou]/.test(w))
30 |
31 | // Matching metacharacters literally
32 |
33 | 'ab-cd gh-c 12-423'.match(/\b[a-z-]{2,}\b/g)
34 |
35 | 'ab-cd gh-c 12-423'.match(/\b[a-z\-0-9]{2,}\b/g)
36 |
37 | 'f*(a^b) - 3*(a+b)'.match(/a[+^]b/g)
38 |
39 | 'f*(a^b) - 3*(a+b)'.match(/a[\^+]b/g)
40 |
41 | 'words[5] = tea'.match(/[a-z[\]0-9]+/)[0]
42 |
43 | console.log('5ba\\babc2'.match(/[a\\b]+/)[0])
44 |
45 | // Escape sequence sets
46 |
47 | 'Sample123string42with777numbers'.split(/\d+/)
48 |
49 | 'sea eat car rat eel tea'.match(/\b\w/g).join('')
50 |
51 | 'tea sea-Pit Sit;(lean_2\tbean_3)'.match(/[\w\s]+/g)
52 |
53 | 'Sample123string42with777numbers'.replace(/\D+/g, '-')
54 |
55 | ' 1..3 \v\f fig_tea 42\tzzz \r\n1-2-3 '.match(/\S+/g)
56 |
57 | // Numeric ranges
58 |
59 | '23 154 12 26 98234'.match(/\b[12]\d\b/g)
60 |
61 | '23 154 12 26 98234'.match(/\b\d{3,}\b/g)
62 |
63 | '0501 035 154 12 26 98234'.match(/\b0*[1-9]\d{2,}\b/g)
64 |
65 | '45 349 651 593 4 204'.match(/\d+/g).filter(n => n < 350)
66 |
67 | '45 349 651 593 4 204'.replace(/\d+/g, m => m < 350 ? 0 : 1)
68 |
69 | '45 349 651 593 4 204'.match(/\d+/g).filter(n => n >= 200 && n <= 650)
70 |
71 |
--------------------------------------------------------------------------------
/code_snippets/Dot_metacharacter_and_Quantifiers.js:
--------------------------------------------------------------------------------
1 | // Dot metacharacter
2 |
3 | 'tac tin c.t abc;tuv acute'.replace(/c.t/g, 'X')
4 |
5 | 'breadth markedly reported overrides'.replace(/r..d/g, 'X')
6 |
7 | '42\t35'.replace(/2.3/, '8')
8 |
9 | 'cag̈ed'.replace(/a.e/, 'o')
10 |
11 | 'cag̈ed'.replace(/a..e/, 'o')
12 |
13 | // split() method
14 |
15 | 'apple-85-mango-70'.split(/-/)
16 |
17 | 'apple-85-mango-70'.split(/-/, 2)
18 |
19 | 'bus:3:car:-:van'.split(/:.:/)
20 |
21 | // Greedy quantifiers
22 |
23 | 'far feat flare fear'.replace(/e?ar/g, 'X')
24 |
25 | 'par spare part party'.replace(/\bpart?\b/g, 'X')
26 |
27 | ['red', 'ready', 're;d', 'redo', 'reed'].filter(w => /\bre.?d\b/.test(w))
28 |
29 | 'par part parrot parent'.replace(/par(ro)?t/g, 'X')
30 |
31 | 'par part parrot parent'.replace(/par(en|ro)?t/g, 'X')
32 |
33 | 'tr tear tare steer sitaara'.replace(/ta*r/g, 'X')
34 |
35 | 'tr tear tare steer sitaara'.replace(/t(e|a)*r/g, 'X')
36 |
37 | '3111111111125111142'.replace(/1*2/g, 'X')
38 |
39 | '3111111111125111142'.split(/1*2/)
40 |
41 | '3111111111125111142'.split(/1*/)
42 |
43 | 'tr tear tare steer sitaara'.replace(/ta+r/g, 'X')
44 |
45 | 'tr tear tare steer sitaara'.replace(/t(e|a)+r/g, 'X')
46 |
47 | '3111111111125111142'.replace(/1+2/g, 'X')
48 |
49 | '3111111111125111142'.split(/1+/)
50 |
51 | let repeats = ['abc', 'ac', 'abbc', 'xabbbcz', 'bc', 'abbbbbc']
52 |
53 | repeats.filter(w => /ab{1,4}c/.test(w))
54 |
55 | repeats.filter(w => /ab{0,2}c/.test(w))
56 |
57 | repeats.filter(w => /ab{3,}c/.test(w))
58 |
59 | repeats.filter(w => /ab{3}c/.test(w))
60 |
61 | 'a{5} = 10'.replace(/a\{5}/g, 'a{6}')
62 |
63 | 'report_{a,b}.txt'.replace(/_{a,b}/g, '-{c,d}')
64 |
65 | // AND Conditional
66 |
67 | /Error.*valid/.test('Error: not a valid input')
68 |
69 | /Error.*valid/.test('Error: key not found')
70 |
71 | /cat.*dog|dog.*cat/.test('cat and dog')
72 |
73 | /cat.*dog|dog.*cat/.test('dog and cat')
74 |
75 | let patterns = [/cat/, /dog/]
76 |
77 | patterns.every(p => p.test('cat and dog'))
78 |
79 | patterns.every(p => p.test('dog and cat'))
80 |
81 | // What does greedy mean?
82 |
83 | 'foot'.replace(/f.?o/, 'X')
84 |
85 | console.log('table < fig \\< bat < cake'.replace(/\\? escapeRegExp(w)).join('|')
33 | }
34 |
35 | let w1 = ['c^t', 'dog$', 'f|x']
36 |
37 | const p1 = new RegExp(unionRegExp(w1), 'g')
38 |
39 | p1
40 |
41 | 'c^t dog$ bee parrot f|x'.replace(p1, 'mammal')
42 |
43 | let w2 = ['hand', 'handy', 'handful']
44 |
45 | w2.sort((a, b) => b.length - a.length)
46 |
47 | const p2 = new RegExp(`\\b(${unionRegExp(w2)})\\b`, 'g')
48 |
49 | p2
50 |
51 | 'handful handed handy hands hand'.replace(p2, 'X')
52 |
53 | // source and flags properties
54 |
55 | const p3 = /\bpar\b/
56 |
57 | const p4 = new RegExp(p3.source + '|cat', 'g')
58 |
59 | p4
60 |
61 | console.log(p4.source)
62 |
63 | p4.flags
64 |
65 | 'cater cat concatenate par spare'.replace(p4, 'X')
66 |
67 | // Escaping the delimiter
68 |
69 | let path = '/home/joe/report/sales/ip.txt'
70 |
71 | path.replace(/^\/home\/joe\//, '~/')
72 |
73 | path.replace(new RegExp(`^/home/joe/`), '~/')
74 |
75 | // Escape sequences
76 |
77 | 'a\tb\tc'.replace(/\t/g, ':')
78 |
79 | '1\n2\n3'.replace(/\n/g, ' ')
80 |
81 | new RegExp('123\tabc')
82 |
83 | new RegExp('123\\tabc')
84 |
85 | new RegExp('car\b')
86 |
87 | new RegExp('car\\b')
88 |
89 | /\e/.test('hello')
90 |
91 | 'h e l l o'.replace(/\x20/g, '')
92 |
93 | '12|30'.replace(/2\x7c3/g, '5')
94 |
95 | '12|30'.replace(/2|3/g, '5')
96 |
97 |
--------------------------------------------------------------------------------
/code_snippets/Groupings_and_backreferences.js:
--------------------------------------------------------------------------------
1 | // Backreferences
2 |
3 | '[52] apples [and] [31] mangoes'.replace(/\[(\d+)\]/g, '$1')
4 |
5 | '_apple_ __123__ _banana_'.replace(/(_)?_/g, '$1')
6 |
7 | 'good,bad 42,24 x,y'.replace(/(\w+),(\w+)/g, '$2,$1')
8 |
9 | '52 apples and 31 mangoes'.replace(/\d+/g, '($&)')
10 |
11 | 'Hello world'.replace(/.*/, 'Hi. $&. Have a nice day')
12 |
13 | 'fork,42,nice,3.14'.replace(/,.+/, '$&,$`')
14 |
15 | let words = ['moon', 'mono', 'excellent', 'POLL', 'a22b']
16 |
17 | words.filter(w => /(\w)\1/.test(w))
18 |
19 | 'aa a a a 42 f_1 f_1 f_13.14'.replace(/\b(\w+)( \1)+\b/g, '$1')
20 |
21 | // Backreference oddities
22 |
23 | 'cat'.replace(/a/, '{$1}')
24 |
25 | 'cat'.replace(/(a)/, '{\$1}')
26 |
27 | 'cat'.replace(/(a)/, '{$$1}')
28 |
29 | '[52] apples and [31] mangoes'.replace(/\[(\d+)\]/g, '($15)')
30 |
31 | '[52] apples and [31] mangoes'.replace(/\[(\d+)\]/g, '$3')
32 |
33 | '[52] apples and [31] mangoes'.replace(/\[\d+\]/g, '$1')
34 |
35 | 'abcdefghijklmn'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)/, '$11')
36 |
37 | 'abcdefghijklmn'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)/, '$1\x31')
38 |
39 | 'abcdefghijklmn'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)/, '$011')
40 |
41 | 'abcdefghijklmna1d'.replace(/(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.).*\1\x31/, 'X')
42 |
43 | // Non-capturing groups
44 |
45 | 'Sample123string42with777numbers'.split(/\d+/)
46 |
47 | 'Sample123string42with777numbers'.split(/(\d+)/)
48 |
49 | 'effort flee facade oddball rat tool'.match(/\b\w*(\w)\1\w*\b/g)
50 |
51 | 'hi 123123123 bye 456123456'.match(/(123)+/g)
52 |
53 | '123hand42handy777handful500'.split(/hand(y|ful)?/)
54 |
55 | '123hand42handy777handful500'.split(/hand(?:y|ful)?/)
56 |
57 | '1,2,3,4,5,6,7'.replace(/^(([^,]+,){3})([^,]+)/, '$1($3)')
58 |
59 | '1,2,3,4,5,6,7'.replace(/^((?:[^,]+,){3})([^,]+)/, '$1($2)')
60 |
61 | 'so:cat:rest:in:put:to'.replace(/^([^:]+:){4}/, '($1)')
62 |
63 | 'so:cat:rest:in:put:to'.replace(/^((?:[^:]+:){4})/, '($1)')
64 |
65 | // Named capture groups
66 |
67 | let row = 'today,2008-03-24,food,2008-03-24,nice,2018-10-25,5632'
68 |
69 | row.match(/(?\d{4}-\d{2}-\d{2}).*\k/)[0]
70 |
71 | 'good,bad 42,24 x,y'.replace(/(?\w+),(?\w+)/g, '$,$')
72 |
73 | let m = '2018-10-25,car,2346'.match(/(?[^,]+),(?[^,]+)/)
74 |
75 | m.groups
76 |
77 | m.groups.date
78 |
79 | m.groups.product
80 |
81 |
--------------------------------------------------------------------------------
/code_snippets/Interlude_Common_tasks.js:
--------------------------------------------------------------------------------
1 | // CommonRegexJS
2 |
3 | let data = 'hello 255.21.255.22 okay'
4 |
5 | const comm = new CommonRegex(data)
6 |
7 | comm.IPv4
8 |
9 | let new_data = '23.14.2.4.2 255.21.255.22 567.12.2.1'
10 |
11 | const ip = new CommonRegex(new_data)
12 |
13 | ip.IPv4
14 |
15 |
--------------------------------------------------------------------------------
/code_snippets/Lookarounds.js:
--------------------------------------------------------------------------------
1 | // Conditional expressions
2 |
3 | let items = ['1,2,3,4', 'a,b,c,d', '#apple 123']
4 |
5 | items.filter(s => /\d/.test(s) && s.includes('#'))
6 |
7 | items.filter(s => s[0] != '#').map(s => s.replace(/,.+,/, ' '))
8 |
9 | // Negative lookarounds
10 |
11 | 'hey cats! cat42 cat_5 catcat'.replace(/cat(?!\d)/g, 'dog')
12 |
13 | 'cat _cat 42catcat'.replace(/(? /(?=.*b)(?=.*e).*t/.test(w))
60 |
61 | words.filter(w => /(?=.*a)(?=.*e)(?=.*i)(?=.*o).*u/.test(w))
62 |
63 | words.filter(w => /(?!.*n$)(?=.*a[bt]).*q/.test(w))
64 |
65 | // Variable length lookbehind
66 |
67 | '=314not :,2irk ,:3cool =42,error'.match(/(?<=[:=]\d+)[a-z]+/g)
68 |
69 | 'cat scatter cater scat'.replace(/(?<=(cat.*?){2})cat/, 'X')
70 |
71 | /(? /tt/.test(w))
26 |
27 | words.every(w => /at/.test(w))
28 |
29 | words.some(w => /stat/.test(w))
30 |
31 | // Flags
32 |
33 | /cat/.test('CaT')
34 |
35 | /cat/i.test('CaT')
36 |
37 | ['Cat', 'cot', 'CATER', 'SCat', 'ScUtTLe'].filter(w => /cat/i.test(w))
38 |
39 | // RegExp constructor and reuse
40 |
41 | const pet = /dog/
42 |
43 | pet.test('They bought a dog')
44 |
45 | pet.test('A cat crossed their path')
46 |
47 | const pat = new RegExp('dog')
48 |
49 | pat
50 |
51 | new RegExp('dog', 'i')
52 |
53 | let greeting = 'hi'
54 |
55 | const pat1 = new RegExp(`${greeting} there`)
56 |
57 | pat1
58 |
59 | new RegExp(`${greeting.toUpperCase()} there`)
60 |
61 | // replace() method
62 |
63 | '1,2,3,4'.replace(/,/, '-')
64 |
65 | '1,2,3,4'.replace(/,/g, '-')
66 |
67 | 'cArT PART tart mArt'.replace(/art/ig, '2')
68 |
69 | let word = 'cater'
70 |
71 | word.replace(/cat/, 'hack')
72 |
73 | word
74 |
75 | word = word.replace(/cat/, 'hack')
76 |
77 | word
78 |
79 |
--------------------------------------------------------------------------------
/code_snippets/Unicode.js:
--------------------------------------------------------------------------------
1 | // Unicode character sets and the u flag
2 |
3 | 'fox:αλεπού,eagle:αετός'.match(/\p{L}+/gu)
4 |
5 | 'fox:αλεπού,eagle:αετός'.match(/\p{sc=Greek}+/gu)
6 |
7 | 'φοο12,βτ_4,fig'.replace(/\P{L}+/gu, '')
8 |
9 | 'tie. ink east;'.match(/(? c.codePointAt().toString(16))
26 |
27 | '\u{3b1}'
28 |
29 | 'fox:αλεπού,eagle:αετός'.match(/[\u{61}-\u{7a}]+/gu)
30 |
31 |
--------------------------------------------------------------------------------
/code_snippets/Working_with_matched_portions.js:
--------------------------------------------------------------------------------
1 | // match() method
2 |
3 | 'too soon a song snatch'.match(/so+n/)
4 |
5 | 'too soon a song snatch'.match(/so+n/)[0]
6 |
7 | 'too soon a song snatch'.match('so+n')
8 |
9 | let s1 = 'cat and dog'
10 |
11 | s1.match(/dog/).index
12 |
13 | s1.match(/dog/).input
14 |
15 | s1.match(/xyz/)
16 |
17 | // search() method
18 |
19 | 'cat and dog'.search(/dog/)
20 |
21 | 'cat and dog'.search(/xyz/)
22 |
23 | // Capture groups
24 |
25 | let motivation = 'improve yourself.'
26 |
27 | motivation.match(/pr.*our/)
28 |
29 | motivation.match(/pr.*our/)[0]
30 |
31 | let purchase = 'coffee:100g tea:250g sugar:75g chocolate:50g'
32 |
33 | let m = purchase.match(/:(.*?)g.*?:(.*?)g.*?chocolate:(.*?)g/)
34 |
35 | m
36 |
37 | m[1]
38 |
39 | m[3]
40 |
41 | // d flag
42 |
43 | 'awesome'.match(/so/d)
44 |
45 | 'awesome'.match(/so/d).indices[0]
46 |
47 | 'coffee:100g tea:250g'.match(/:(.*?)g/d)
48 |
49 | 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[0]
50 |
51 | 'coffee:100g tea:250g'.match(/:(.*?)g/d).indices[1]
52 |
53 | // Getting all the matched portions
54 |
55 | 'too soon a song snatch'.match(/so*n/g)
56 |
57 | 'too soon a song snatch'.match(/so+n/g)
58 |
59 | 'PAR spar apparent SpArE part pare'.match(/\bs?pare?\b/ig)
60 |
61 | 'par spar apparent spare part'.match(/\bs?par(e|t)\b/g)
62 |
63 | 'green:3.14:teal::brown:oh!:blue'.match(/:.*:/g)
64 |
65 | 'green:3.14:teal::brown:oh!:blue'.match(/:.*?:/g)
66 |
67 | // matchAll() method
68 |
69 | 'song too soon snatch'.matchAll(/so*n/g)
70 |
71 | let arr = [...'song too soon snatch'.matchAll(/so*n/g)]
72 |
73 | arr
74 |
75 | arr[0]
76 |
77 | arr[1].index
78 |
79 | Array.from('song too soon snatch'.matchAll(/so*n/g), m => m[0])
80 |
81 | Array.from('song too soon snatch'.matchAll(/so*n/g), m => m.index)
82 |
83 | Array.from('2023/04,1986/Mar,'.matchAll(/(.*?)\/(.*?),/g), m => m.slice(1))
84 |
85 | // split() with capture groups
86 |
87 | '31111111111251111426'.split(/1*4?2/)
88 |
89 | '31111111111251111426'.split(/(1*4?2)/)
90 |
91 | '31111111111251111426'.split(/(1*)4?2/)
92 |
93 | '3.14aabccc42'.split(/(a+)b+(c+)/)
94 |
95 | '31111111111251111426'.split(/(1*)(4)?2/)
96 |
97 | '3.14aabccc42abc88'.split(/(a+b+c+)(.*)/, 3)
98 |
99 | // Using functions in the replacement section
100 |
101 | function titleCase(m) {
102 | return m[0].toUpperCase() + m.substr(1).toLowerCase()
103 | }
104 |
105 | 'aBc ac ADC aBbBC'.replace(/a.*?c/ig, titleCase)
106 |
107 | 'abc ac adc abbbc'.replace(/ab*c/g, m => m.toUpperCase())
108 |
109 | '1 42 317'.replace(/\d+/g, m => m*2)
110 |
111 | function titleCase(m, g1, g2) {
112 | return g1.toUpperCase() + g2.toLowerCase()
113 | }
114 |
115 | 'aBc ac ADC aBbBC'.replace(/(a)(.*?c)/ig, titleCase)
116 |
117 | // Using dictionary in the replacement section
118 |
119 | let h = { '1': 'one', '2': 'two', '4': 'four' }
120 |
121 | '9234012'.replace(/1|2|4/g, k => h[k])
122 |
123 | '9234012'.replace(/\d/g, k => k in h ? h[k] : 'X')
124 |
125 | let swap = { 'cat': 'tiger', 'tiger': 'cat' }
126 |
127 | 'cat tiger dog tiger cat'.replace(/cat|tiger/g, k => swap[k])
128 |
129 | let d = { 'hand': 1, 'handy': 2, 'handful': 3, 'a^b': 4 }
130 |
131 | const p = unionRegExp(Object.keys(d).sort((a, b) => b.length - a.length))
132 |
133 | console.log(p)
134 |
135 | 'handful hand pin handy (a^b)'.replace(new RegExp(p, 'g'), k => d[k])
136 |
137 |
--------------------------------------------------------------------------------
/images/backslash_in_RegExp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/backslash_in_RegExp.png
--------------------------------------------------------------------------------
/images/find_replace.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/find_replace.png
--------------------------------------------------------------------------------
/images/info.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/images/js_regexp_ls.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/js_regexp_ls.png
--------------------------------------------------------------------------------
/images/password_check.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/password_check.png
--------------------------------------------------------------------------------
/images/regex101.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/regex101.png
--------------------------------------------------------------------------------
/images/regulex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/regulex.png
--------------------------------------------------------------------------------
/images/v_flag_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/images/v_flag_examples.png
--------------------------------------------------------------------------------
/images/warning.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/sample_chapters/js_regexp_sample.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/learnbyexample/learn_js_regexp/946a981d5c556d1aa4b89118ce4c133a8b118e3f/sample_chapters/js_regexp_sample.pdf
--------------------------------------------------------------------------------