├── .gitignore ├── jest.answers.json ├── answers ├── 03_Or.js ├── 01_Basics.js ├── 02_BeginningToEnd.js ├── 04_CharacterSets.js ├── 06_Modifiers.js ├── 08_GreedyVsMinimal.js ├── 05_CharacterSetShortcuts.js ├── 09_Grouping.js ├── 12_RegexReplace.js ├── 07_RepeatingCharacters.js ├── 10_CapturingGroups.js ├── 11_Intermission.js └── 13_LookingAhead.js ├── __tests__ ├── 03_Or.js ├── 01_Basics.js ├── 02_BeginningToEnd.js ├── 04_CharacterSets.js ├── 06_Modifiers.js ├── 08_GreedyVsMinimal.js ├── 05_CharacterSetShortcuts.js ├── 09_Grouping.js ├── 12_RegexReplace.js ├── 07_RepeatingCharacters.js ├── 10_CapturingGroups.js ├── 13_LookingAhead.js └── 11_Intermission.js ├── package.json ├── MIT.LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /jest.answers.json: -------------------------------------------------------------------------------- 1 | { 2 | "testMatch": [ "**/answers/*.js" ] 3 | } 4 | -------------------------------------------------------------------------------- /answers/03_Or.js: -------------------------------------------------------------------------------- 1 | describe("Or", function() { 2 | 3 | it('pipe character ( | ) is a regex OR', function() { 4 | const thisPatternWorks = /^dog|cat$/; 5 | const fixThisPattern = /^hearts|clubs$/; 6 | 7 | expect( 'dog' ).toMatch(thisPatternWorks); 8 | expect( 'cat' ).toMatch(thisPatternWorks); 9 | 10 | expect( 'hearts' ).toMatch(fixThisPattern); 11 | expect( 'clubs' ).toMatch(fixThisPattern); 12 | }); 13 | 14 | }); 15 | -------------------------------------------------------------------------------- /__tests__/03_Or.js: -------------------------------------------------------------------------------- 1 | describe("Or", function() { 2 | 3 | it('pipe character ( | ) is a regex OR', function() { 4 | const thisPatternWorks = /^dog|cat$/; 5 | const fixThisPattern = /^___$/; 6 | 7 | expect( 'dog' ).toMatch(thisPatternWorks); 8 | expect( 'cat' ).toMatch(thisPatternWorks); 9 | 10 | expect( 'hearts' ).toMatch(fixThisPattern); 11 | expect( 'clubs' ).toMatch(fixThisPattern); 12 | }); 13 | 14 | }); 15 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "regex-koans", 3 | "version": "2.0.0", 4 | "description": "Learn regular expressions in javascript through unit tests", 5 | "main": "index.js", 6 | "directories": { 7 | "lib": "lib" 8 | }, 9 | "scripts": { 10 | "test": "jest", 11 | "watch": "jest --watch", 12 | "answers": "jest --config jest.answers.json" 13 | }, 14 | "keywords": [ 15 | "regex", 16 | "regular", 17 | "expression", 18 | "koan" 19 | ], 20 | "author": "Aaron Brown", 21 | "license": "MIT", 22 | "devDependencies": { 23 | "jest": "^29.5.0" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /answers/01_Basics.js: -------------------------------------------------------------------------------- 1 | describe("Basics", function() { 2 | 3 | it('replace /^a$/ with /^___$/ to make RegEx match', function() { 4 | expect('a').toMatch(/^a$/); 5 | }); 6 | 7 | it('a RegEx that is the same as the string will match', function() { 8 | expect('abcd').toMatch(/^abcd$/); 9 | }); 10 | 11 | it('RegEx is case sensitive', function() { 12 | expect('AbCd').toMatch(/^AbCd$/); 13 | }); 14 | 15 | it('whitespace matters', function() { 16 | expect('ab cd').toMatch(/^ab cd$/); 17 | }); 18 | 19 | it('testing for does-not-match is often useful, too', function() { 20 | expect('abcd').not.toMatch(/^bcd$/); 21 | }); 22 | 23 | }); 24 | -------------------------------------------------------------------------------- /__tests__/01_Basics.js: -------------------------------------------------------------------------------- 1 | describe("Basics", function() { 2 | 3 | it('replace /^___$/ with /^a$/ to make RegEx match. We\'ll explain ^ and $ later.', function() { 4 | expect('a').toMatch(/^___$/); 5 | }); 6 | 7 | it('a RegEx that is the same as the string will match', function() { 8 | expect('abcd').toMatch(/^___$/); 9 | }); 10 | 11 | it('RegEx is case sensitive', function() { 12 | expect('AbCd').toMatch(/^___$/); 13 | }); 14 | 15 | it('whitespace matters', function() { 16 | expect('ab cd').toMatch(/^___$/); 17 | }); 18 | 19 | it('testing for does-not-match is often useful, too', function() { 20 | expect('abcd').not.toMatch(/^abcd$/); 21 | }); 22 | 23 | }); 24 | -------------------------------------------------------------------------------- /answers/02_BeginningToEnd.js: -------------------------------------------------------------------------------- 1 | describe("Beginning To End", function() { 2 | 3 | it('^ anchors RegEx to beginning of the string', function() { 4 | expect( 'dog cat pony' ).not.toMatch(/^cat/); // This one already works 5 | 6 | expect( 'dog cat pony' ).toMatch(/^dog/); // Fix this one so it passes 7 | }); 8 | 9 | it('$ anchors RegEx to the end of the string', function() { 10 | expect( 'dog cat pony' ).not.toMatch(/cat$/); // This one already works 11 | 12 | expect( 'dog cat pony' ).toMatch(/pony$/); // Fix this one so it passes 13 | }); 14 | 15 | it('use both to match the entire input', function() { 16 | expect( 'dog cat pony' ).not.toMatch(/^cat$/); // This one already works 17 | 18 | expect( 'dog cat pony' ).toMatch(/^dog cat pony$/); // Fix this one so it passes 19 | }); 20 | 21 | it('a RegEx without any anchors will match a substring anywhere in the input', function() { 22 | // try using "bc" in the RegEx 23 | 24 | expect( 'abcd' ).toMatch(/bc/); 25 | }); 26 | 27 | }); 28 | -------------------------------------------------------------------------------- /__tests__/02_BeginningToEnd.js: -------------------------------------------------------------------------------- 1 | describe("Beginning To End", function() { 2 | 3 | it('^ anchors RegEx to beginning of the string', function() { 4 | expect( 'dog cat pony' ).not.toMatch(/^cat/); // This one already works 5 | 6 | expect( 'dog cat pony' ).toMatch(/^___/); // Fix this one so it passes 7 | }); 8 | 9 | it('$ anchors RegEx to the end of the string', function() { 10 | expect( 'dog cat pony' ).not.toMatch(/cat$/); // This one already works 11 | 12 | expect( 'dog cat pony' ).toMatch(/___$/); // Fix this one so it passes 13 | }); 14 | 15 | it('use both to match the entire input', function() { 16 | expect( 'dog cat pony' ).not.toMatch(/^cat$/); // This one already works 17 | 18 | expect( 'dog cat pony' ).toMatch(/^___y$/); // Fix this one so it passes 19 | }); 20 | 21 | it('a RegEx without any anchors will match a substring anywhere in the input', function() { 22 | // try using "bc" in the RegEx 23 | 24 | expect( 'abcd' ).toMatch(/___/); 25 | }); 26 | 27 | }); 28 | -------------------------------------------------------------------------------- /MIT.LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Aaron Brown 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /__tests__/04_CharacterSets.js: -------------------------------------------------------------------------------- 1 | describe("Character Sets", function() { 2 | 3 | it('use [ ] to allow more than one possible character in this position', function() { 4 | const workingPattern = /^d[iu]g$/; 5 | 6 | const fixThisPattern = /^___$/; 7 | 8 | expect( 'dig' ).toMatch(workingPattern); 9 | expect( 'dug' ).toMatch(workingPattern); 10 | 11 | expect( 'spin' ).toMatch(fixThisPattern); 12 | expect( 'span' ).toMatch(fixThisPattern); 13 | expect( 'spun' ).toMatch(fixThisPattern); 14 | }); 15 | 16 | it('use - to specify a range of characters inside [ ]', function() { 17 | const workingPattern = /^[a-c][a-c][0-9]$/; 18 | 19 | const fixThisPattern = /^___$/; 20 | 21 | expect( 'bc3' ).toMatch(workingPattern); 22 | expect( 'aa8' ).toMatch(workingPattern); 23 | 24 | expect( 'Azy' ).toMatch(fixThisPattern); 25 | expect( 'Dyy' ).toMatch(fixThisPattern); 26 | expect( 'Bxx' ).toMatch(fixThisPattern); 27 | }); 28 | 29 | it('since - is a special character inside [ ], you must escape it with backslash', function() { 30 | const fixThisPattern = /^___$/; 31 | 32 | expect( '1+2' ).toMatch(fixThisPattern); 33 | expect( '2-3' ).toMatch(fixThisPattern); 34 | expect( '4*6' ).toMatch(fixThisPattern); 35 | }); 36 | 37 | it('mixing single characters and ranges is okay', function() { 38 | const workingPattern = /^[a-zA-Z][a-zA-Z][0-4abc]$/; 39 | 40 | const fixThisPattern = /^___$/; 41 | 42 | expect( 'dG2').toMatch(workingPattern); 43 | expect( 'ZZb').toMatch(workingPattern); 44 | 45 | expect( '#444444' ).toMatch(fixThisPattern); 46 | expect( '#ffffff' ).toMatch(fixThisPattern); 47 | expect( '#c0c0c0' ).toMatch(fixThisPattern); 48 | }); 49 | 50 | it('caret negates the character set: "Must Be One Of These" becomes "Must NOT Be One Of These"', function() { 51 | const workingPattern = /^x[^a-z]z$/; 52 | 53 | const fixThisPattern = /^___$/; 54 | 55 | expect( 'x3z' ).toMatch(workingPattern); 56 | expect( 'xYz' ).toMatch(workingPattern); 57 | 58 | expect( '3.2' ).toMatch(fixThisPattern); 59 | expect( '5 8' ).toMatch(fixThisPattern); 60 | expect( '3x3' ).toMatch(fixThisPattern); 61 | }); 62 | }); 63 | -------------------------------------------------------------------------------- /answers/04_CharacterSets.js: -------------------------------------------------------------------------------- 1 | describe("Character Sets", function() { 2 | 3 | it('use [ ] to allow more than one possible character in this position', function() { 4 | const workingPattern = /^d[iu]g$/; 5 | 6 | const fixThisPattern = /^sp[iau]n$/; 7 | 8 | expect( 'dig' ).toMatch(workingPattern); 9 | expect( 'dug' ).toMatch(workingPattern); 10 | 11 | expect( 'spin' ).toMatch(fixThisPattern); 12 | expect( 'span' ).toMatch(fixThisPattern); 13 | expect( 'spun' ).toMatch(fixThisPattern); 14 | }); 15 | 16 | it('use - to specify a range of characters inside [ ]', function() { 17 | const workingPattern = /^[a-c][a-c][0-9]$/; 18 | 19 | const fixThisPattern = /^[A-D][x-z][x-z]$/; 20 | 21 | expect( 'bc3' ).toMatch(workingPattern); 22 | expect( 'aa8' ).toMatch(workingPattern); 23 | 24 | expect( 'Azy' ).toMatch(fixThisPattern); 25 | expect( 'Dyy' ).toMatch(fixThisPattern); 26 | expect( 'Bxx' ).toMatch(fixThisPattern); 27 | }); 28 | 29 | it('since - is a special character inside [ ], you must escape it with backslash', function() { 30 | const fixThisPattern = /^[1-4][+\-*][2-6]$/; 31 | 32 | expect( '1+2' ).toMatch(fixThisPattern); 33 | expect( '2-3' ).toMatch(fixThisPattern); 34 | expect( '4*6' ).toMatch(fixThisPattern); 35 | }); 36 | 37 | it('mixing single characters and ranges is okay', function() { 38 | const workingPattern = /^[a-zA-Z][a-zA-Z][0-4abc]$/; 39 | 40 | const fixThisPattern = /^#[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]$/; 41 | 42 | expect( 'dG2').toMatch(workingPattern); 43 | expect( 'ZZb').toMatch(workingPattern); 44 | 45 | expect( '#444444' ).toMatch(fixThisPattern); 46 | expect( '#ffffff' ).toMatch(fixThisPattern); 47 | expect( '#c0c0c0' ).toMatch(fixThisPattern); 48 | }); 49 | 50 | it('caret negates the character set: "Must Be One Of These" becomes "Must NOT Be One Of These"', function() { 51 | const workingPattern = /^x[^a-z]z$/; 52 | 53 | const fixThisPattern = /^[0-9][^0-9][0-9]$/; 54 | 55 | expect( 'x3z' ).toMatch(workingPattern); 56 | expect( 'xYz' ).toMatch(workingPattern); 57 | 58 | expect( '3.2' ).toMatch(fixThisPattern); 59 | expect( '5 8' ).toMatch(fixThisPattern); 60 | expect( '3x3' ).toMatch(fixThisPattern); 61 | }); 62 | }); 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Regular Expression Koans 2 | ======================== 3 | 4 | A series of tutorials on Regular Expressions. 5 | 6 | Learn how to write, read, and use Regular Expressions by completing or correcting 7 | code to make failing Unit Tests pass. Each Koan includes a series of Unit Tests 8 | which illustrate Regular Expression principles and techniques. I have made an effort 9 | to avoid making forward references in the tutorial sequence, but there are enough 10 | back references to earlier Koans that it would be best to complete the Koans in 11 | the order presented. 12 | 13 | Javascript's Regular Expression engine is very similar to that of many other 14 | programming languages. For an [excellent summary of Javascript's Regex capabilities][jsRegex] 15 | as well as those of many other languages, Jan Goyvaerts is your first and last resource. 16 | 17 | To practice, to try out some code before using it in your program, or just to play around, 18 | [rubular.com][rubular] is a fantastic Regular Expression resource. 19 | It uses Ruby's Regex engine, but the differences are minor. This web tool should be in the 20 | Bookmarks list of every programmer. There are many other good web-based Regular Expression 21 | tools, too many to list here. 22 | 23 | With many thanks to: 24 | 25 | * [EdgeCase](http://edgecase.com/) for the original, inspired, and inspirational [Ruby Koans](http://rubykoans.com/) 26 | 27 | Installation 28 | ------------ 29 | 30 | ``` 31 | $ git clone https://github.com/frenchroasted/RegexKoans.git 32 | $ cd RegexKoans 33 | $ npm install 34 | ``` 35 | 36 | Usage 37 | ----- 38 | The test suite runs in jest, which is a powerful unit testing tool. Some 39 | helpful commands have been included in the package.json file: 40 | 41 | ``` 42 | # To run all the tutorial test scripts at once 43 | $ npm test 44 | 45 | # To run jest in "watch" mode, which is interactive 46 | $ npm run watch 47 | 48 | # Once running in "watch" mode, jest provides tools to 49 | # re-run a specific test or test file, and many other 50 | # options. The test files are numbered which makes them 51 | # easy to filter in the jest watch tool. 52 | ``` 53 | 54 | * Run the tests. See all the failing tests. 55 | * Edit a test file, beginning with 01-Basics.js, save, and re-run the tests. 56 | * Repeat until all the tests are passing. 57 | 58 | [jsRegex]: http://www.regular-expressions.info/javascript.html 59 | [rubular]: http://rubular.com/ 60 | -------------------------------------------------------------------------------- /answers/06_Modifiers.js: -------------------------------------------------------------------------------- 1 | describe("Modifiers", function() { 2 | 3 | const ___ = 0; 4 | 5 | it('regular expressions don\'t have to be case-sensitive', function() { 6 | // In many programming languages, especially those that 7 | // use the /^whatever$/ pattern syntax, modifiers are single 8 | // characters that come after the //. 9 | // 10 | // To make a JavaScript Pattern non-case-sensitive, 11 | // put "i" after the //. 12 | // 13 | // Note: Some programming languages do not support modifiers like 14 | // this. These languages typically have a flag that can be set 15 | // within the pattern string to toggle case-sensitivity. Refer 16 | // to the language API to see how to make a case-insensitive match 17 | // in your language. 18 | 19 | const fixThisPattern = /^abc$/i; 20 | 21 | expect( 'abc' ).toMatch(fixThisPattern); 22 | expect( 'ABC' ).toMatch(fixThisPattern); 23 | expect( 'AbC' ).toMatch(fixThisPattern); 24 | 25 | expect(fixThisPattern.source).not.toMatch(/\[/); 26 | }); 27 | 28 | it('match first or match all', function() { 29 | // //g -> match globally 30 | // By default, javascript // will match only the 31 | // first occurrence in the string which satisfies 32 | // the pattern. //g is useful especially when doing 33 | // find/replace in the string. We'll cover that in 34 | // a later Koan. 35 | // 36 | // The JavaScript String.match function returns 37 | // an array of matches. 38 | 39 | const singleMatch = /x/; 40 | const globalMatch = /x/g; 41 | 42 | expect( "xxx".match(singleMatch).length ).toEqual(1); 43 | expect( "xxx".match(globalMatch).length ).toEqual(3); 44 | }); 45 | 46 | it('treat string as multiple lines', function() { 47 | // //m -> input string is multiple lines, so 48 | // ^ and $ match begin-end of each line, separated 49 | // by line feed. ^ and $ match intermediate 50 | // "lines" within input string 51 | 52 | // Hint: Notice "abc" appears twice in the matching string. 53 | 54 | const fixThisPattern = /^abc$/gm; 55 | 56 | const matches = "abc\nabc".match(fixThisPattern); 57 | 58 | expect( 'abc\nabc' ).toMatch(fixThisPattern); 59 | 60 | expect( matches.length ).toEqual(2); 61 | expect( matches[0] ).toEqual("abc"); 62 | expect( matches[1] ).toEqual("abc"); 63 | 64 | expect(fixThisPattern.source).not.toMatch(/n/); 65 | }); 66 | 67 | }); 68 | -------------------------------------------------------------------------------- /__tests__/06_Modifiers.js: -------------------------------------------------------------------------------- 1 | describe("Modifiers", function() { 2 | 3 | const ___ = 0; 4 | 5 | it('regular expressions don\'t have to be case-sensitive', function() { 6 | // In many programming languages, especially those that 7 | // use the /^whatever$/ pattern syntax, modifiers are single 8 | // characters that come after the //. 9 | // 10 | // To make a JavaScript Pattern non-case-sensitive, 11 | // put "i" after the //. 12 | // 13 | // Note: Some programming languages do not support modifiers like 14 | // this. These languages typically have a flag that can be set 15 | // within the pattern string to toggle case-sensitivity. Refer 16 | // to the language API to see how to make a case-insensitive match 17 | // in your language. 18 | 19 | const fixThisPattern = /^abc$/; 20 | 21 | expect( 'abc' ).toMatch(fixThisPattern); 22 | expect( 'ABC' ).toMatch(fixThisPattern); 23 | expect( 'AbC' ).toMatch(fixThisPattern); 24 | 25 | expect(fixThisPattern.source).not.toMatch(/\[/); 26 | }); 27 | 28 | it('match first or match all', function() { 29 | // //g -> match globally 30 | // By default, javascript // will match only the 31 | // first occurrence in the string which satisfies 32 | // the pattern. //g is useful especially when doing 33 | // find/replace in the string. We'll cover that in 34 | // a later Koan. 35 | // 36 | // The JavaScript String.match function returns 37 | // an array of matches. 38 | 39 | const singleMatch = /x/; 40 | const globalMatch = /x/g; 41 | 42 | expect( "xxx".match(singleMatch).length ).toEqual(___); 43 | expect( "xxx".match(globalMatch).length ).toEqual(___); 44 | }); 45 | 46 | it('treat string as multiple lines', function() { 47 | // //m -> input string is multiple lines, so 48 | // ^ and $ match begin-end of each line, separated 49 | // by line feed. ^ and $ match intermediate 50 | // "lines" within input string 51 | 52 | // Hint: Notice "abc" appears twice in the matching string. 53 | 54 | const fixThisPattern = /^abc$/; 55 | 56 | const matches = "abc\nabc".match(fixThisPattern); 57 | 58 | expect( 'abc\nabc' ).toMatch(fixThisPattern); 59 | 60 | expect( matches.length ).toEqual(2); 61 | expect( matches[0] ).toEqual("___"); 62 | expect( matches[1] ).toEqual("___"); 63 | 64 | expect(fixThisPattern.source).not.toMatch(/n/); 65 | }); 66 | 67 | }); 68 | -------------------------------------------------------------------------------- /answers/08_GreedyVsMinimal.js: -------------------------------------------------------------------------------- 1 | describe("Greedy vs Minimal", function() { 2 | 3 | it('Remember: the String.match() method will return the matched text', function() { 4 | // Remember that the match() method will provide the text matched 5 | // by the pattern. The match() returns an array so that, if the 6 | // pattern is a global pattern (//g), all matches can be returned 7 | // at once. 8 | // 9 | // We will be using the matched value to complete the tests in this Koan. 10 | 11 | const matches = 'aabbbcc'.match( /b+/ ); 12 | 13 | expect( matches[0] ).toEqual('bbb'); 14 | 15 | }); 16 | 17 | it('* and + are "greedy" and may match more text than you intend', function() { 18 | const matches = '"One","Two","Three"'.match( /".*"/ ); 19 | 20 | expect( matches[0] ).toEqual('"One","Two","Three"'); // Change '"One"' so the test passes 21 | }); 22 | 23 | // Greedy character matching is a common downfall of many regular expressions. 24 | // In short, when * or + is used, the regex engine will attempt to use that 25 | // repeat instruction to capture as much of the string as possible while still 26 | // satisfying the other criteria in the regex. This can sometimes lead to 27 | // unexpected results, as illustrated above. 28 | // 29 | // Fortunately, this greedy behavior can be easily toggled on and off, as we'll 30 | // see below. In the case of the One,Two,Three Koan, there are two ways we can 31 | // match just the "One" portion of the string: 32 | // * Use a negative character set 33 | // * Turn off greedy matching and use "reluctant" or "minimal" matching instead 34 | 35 | it('use a negative character set to control the greedy match', function() { 36 | const workingMatches = '"One","Two","Three"'.match( /"[^"]*"/ ); 37 | 38 | const tagMatches = 'Hello, world!'.match( /<[^>]*>/ ); 39 | 40 | expect( workingMatches[0] ).toEqual('"One"'); 41 | 42 | expect( tagMatches[0] ).toEqual(''); 43 | }); 44 | 45 | it('use minimal match to capture the substring', function() { 46 | // To change * and + to non-greedy, minimal matching, 47 | // use *? and +? instead. 48 | 49 | const fixThisPattern = /<.*?>/; 50 | 51 | const matches = 'Hello, world!'.match(fixThisPattern); 52 | 53 | expect( matches[0] ).toEqual(''); 54 | 55 | expect(fixThisPattern.source).not.toMatch(/\[/); 56 | }); 57 | 58 | 59 | // Side note: Have you noticed that none of the above regex patterns use 60 | // ^ $ like most of the other patterns in earlier Koans? 61 | // 62 | // Do you know why? 63 | 64 | 65 | // Remember: 66 | // * and + are greedy by default. 67 | // . does not match linefeed (\n), so .* is greedy until it encounters a \n 68 | 69 | }); 70 | -------------------------------------------------------------------------------- /__tests__/08_GreedyVsMinimal.js: -------------------------------------------------------------------------------- 1 | describe("Greedy vs Minimal", function() { 2 | 3 | it('Remember: the String.match() method will return the matched text', function() { 4 | // Remember that the match() method will provide the text matched 5 | // by the pattern. The match() returns an array so that, if the 6 | // pattern is a global pattern (//g), all matches can be returned 7 | // at once. 8 | // 9 | // We will be using the matched value to complete the tests in this Koan. 10 | 11 | const matches = 'aabbbcc'.match( /b+/ ); 12 | 13 | expect( matches[0] ).toEqual('___'); 14 | 15 | }); 16 | 17 | it('* and + are "greedy" and may match more text than you intend', function() { 18 | const matches = '"One","Two","Three"'.match( /".*"/ ); 19 | 20 | expect( matches[0] ).toEqual('"One"'); // Change '"One"' so the test passes 21 | }); 22 | 23 | // Greedy character matching is a common downfall of many regular expressions. 24 | // In short, when * or + is used, the regex engine will attempt to use that 25 | // repeat instruction to capture as much of the string as possible while still 26 | // satisfying the other criteria in the regex. This can sometimes lead to 27 | // unexpected results, as illustrated above. 28 | // 29 | // Fortunately, this greedy behavior can be easily toggled on and off, as we'll 30 | // see below. In the case of the One,Two,Three Koan, there are two ways we can 31 | // match just the "One" portion of the string: 32 | // * Use a negative character set 33 | // * Turn off greedy matching and use "reluctant" or "minimal" matching instead 34 | 35 | it('use a negative character set to control the greedy match', function() { 36 | const workingMatches = '"One","Two","Three"'.match( /"[^"]*"/ ); 37 | 38 | const tagMatches = 'Hello, world!'.match( /___/ ); 39 | 40 | expect( workingMatches[0] ).toEqual('"One"'); 41 | 42 | expect( tagMatches[0] ).toEqual(''); 43 | }); 44 | 45 | it('use minimal match to capture the substring', function() { 46 | // To change * and + to non-greedy, minimal matching, 47 | // use *? and +? instead. 48 | 49 | const fixThisPattern = /___/; 50 | 51 | const matches = 'Hello, world!'.match(fixThisPattern); 52 | 53 | expect( matches[0] ).toEqual(''); 54 | 55 | expect(fixThisPattern.source).not.toMatch(/\[/); 56 | }); 57 | 58 | 59 | // Side note: Have you noticed that none of the above regex patterns use 60 | // ^ $ like most of the other patterns in earlier Koans? 61 | // 62 | // Do you know why? 63 | 64 | 65 | // Remember: 66 | // * and + are greedy by default. 67 | // . does not match linefeed (\n), so .* is greedy until it encounters a \n 68 | 69 | }); 70 | -------------------------------------------------------------------------------- /answers/05_CharacterSetShortcuts.js: -------------------------------------------------------------------------------- 1 | describe("Character Set Shortcuts", function() { 2 | 3 | it('dot (.) matches anything', function() { 4 | const fixThisPattern = /^d.g$/; 5 | 6 | expect( 'dig' ).toMatch(fixThisPattern); 7 | expect( 'dug' ).toMatch(fixThisPattern); 8 | expect( 'd g' ).toMatch(fixThisPattern); 9 | expect( 'd3g' ).toMatch(fixThisPattern); 10 | expect( 'd!g' ).toMatch(fixThisPattern); 11 | expect( 'd(g' ).toMatch(fixThisPattern); 12 | 13 | expect(fixThisPattern.source).toMatch(/\./); 14 | }); 15 | 16 | it('dot (.) matches anything... except line-feed', function() { 17 | const fixThisPattern = /^d\ng$/; 18 | 19 | expect( 'd\ng' ).toMatch(fixThisPattern); 20 | }); 21 | 22 | it('dot (.) matches dot (.) when used inside [ ]', function() { 23 | const fixThisPattern = /^3[.]1$/; 24 | 25 | expect( '3.1' ).toMatch(fixThisPattern); 26 | expect( '3,1' ).not.toMatch(fixThisPattern); 27 | }); 28 | 29 | it('to match a dot (.) outside of [ ], it must be escaped with backslash', function() { 30 | const fixThisPattern = /\.$/; 31 | 32 | expect( 'Every sentence must end with a period.' ).toMatch(fixThisPattern); 33 | expect( 'What about questions?' ).not.toMatch(fixThisPattern); 34 | }); 35 | 36 | it('backslash-d matches any digit, like [0-9]', function() { 37 | // backslash-d is entered like: \d 38 | 39 | const fixThisPattern = /^\d\d\d\d$/; 40 | 41 | expect( '1234' ).toMatch(fixThisPattern); 42 | expect( '3281' ).toMatch(fixThisPattern); 43 | expect( '5555' ).toMatch(fixThisPattern); 44 | expect( '9329' ).toMatch(fixThisPattern); 45 | 46 | expect(fixThisPattern.source).toMatch(/d/); 47 | }); 48 | 49 | it('backslash-w matches any word character, plus _, like [a-zA-Z0-9_]', function() { 50 | // Note: \w may behave differently in other programming languages. 51 | // for example, numbers may or may not be included. Refer to the 52 | // language Regular Expression or Pattern API before using \w. 53 | 54 | const fixThisPattern = /^\w\w\w$/; 55 | 56 | expect( 'Dog' ).toMatch(fixThisPattern); 57 | expect( 'cAt' ).toMatch(fixThisPattern); 58 | expect( '_x_' ).toMatch(fixThisPattern); 59 | 60 | expect(fixThisPattern.source).toMatch(/w/); 61 | }); 62 | 63 | it('backslash-s matches any whitespace character', function() { 64 | // Whitespace is: 65 | // ( ) 66 | // tab (\t) 67 | // line feed (\n) 68 | // carriage return (\r) 69 | // form feed (\f) 70 | // vertical tab (\v) 71 | // or [ \t\n\r\f\v] 72 | 73 | const fixThisPattern = /^[\s\w][\s\w][\s\w]$/; 74 | 75 | expect( ' ' ).toMatch(fixThisPattern); 76 | expect( 'x\ny' ).toMatch(fixThisPattern); 77 | expect( '\t_Z' ).toMatch(fixThisPattern); 78 | 79 | expect(fixThisPattern.source).toMatch(/s/); 80 | }); 81 | 82 | it('backslash-D, backslash-W, backslash-S match non-digit, non-word, and non-whitespace', function() { 83 | // /\D/ is the same as /[^\d]/ 84 | // /\W/ is the same as /[^\w]/ 85 | // /\S/ is the same as /[^\s]/ 86 | // /\S/ is NOT the same as /[\w\d]/ - why? 87 | 88 | const whatStringFitsThePattern = "***"; 89 | 90 | expect( whatStringFitsThePattern ).toMatch(/^[\S][\S][\S]$/); 91 | expect( whatStringFitsThePattern ).toMatch(/^[^\w\d][^\w\d][^\w\d]$/); 92 | }); 93 | 94 | it('character set shortcuts can be used inside [ ]', function() { 95 | const fixThisPattern = /^[\w\s][\w\s][\w\s]$/; 96 | 97 | expect( 'Dog' ).toMatch(fixThisPattern); 98 | expect( ' 2 ' ).toMatch(fixThisPattern); 99 | 100 | expect(fixThisPattern.source).toMatch(/[\\]/); 101 | }); 102 | 103 | }); 104 | -------------------------------------------------------------------------------- /__tests__/05_CharacterSetShortcuts.js: -------------------------------------------------------------------------------- 1 | describe("Character Set Shortcuts", function() { 2 | 3 | it('dot (.) matches anything', function() { 4 | const fixThisPattern = /^___$/; 5 | 6 | expect( 'dig' ).toMatch(fixThisPattern); 7 | expect( 'dug' ).toMatch(fixThisPattern); 8 | expect( 'd g' ).toMatch(fixThisPattern); 9 | expect( 'd3g' ).toMatch(fixThisPattern); 10 | expect( 'd!g' ).toMatch(fixThisPattern); 11 | expect( 'd(g' ).toMatch(fixThisPattern); 12 | 13 | expect(fixThisPattern.source).toMatch(/\./); 14 | }); 15 | 16 | it('dot (.) matches anything... except line-feed', function() { 17 | const fixThisPattern = /^d.g$/; 18 | 19 | expect( 'd\ng' ).toMatch(fixThisPattern); 20 | }); 21 | 22 | it('dot (.) matches dot (.) when used inside [ ]', function() { 23 | const fixThisPattern = /^___$/; 24 | 25 | expect( '3.1' ).toMatch(fixThisPattern); 26 | expect( '3,1' ).not.toMatch(fixThisPattern); 27 | }); 28 | 29 | it('to match a dot (.) outside of [ ], it must be escaped with backslash', function() { 30 | const fixThisPattern = /___$/; 31 | 32 | expect( 'Every sentence must end with a period.' ).toMatch(fixThisPattern); 33 | expect( 'What about questions?' ).not.toMatch(fixThisPattern); 34 | }); 35 | 36 | it('backslash-d matches any digit, like [0-9]', function() { 37 | // backslash-d is entered like: \d 38 | 39 | const fixThisPattern = /^___$/; 40 | 41 | expect( '1234' ).toMatch(fixThisPattern); 42 | expect( '3281' ).toMatch(fixThisPattern); 43 | expect( '5555' ).toMatch(fixThisPattern); 44 | expect( '9329' ).toMatch(fixThisPattern); 45 | 46 | expect(fixThisPattern.source).toMatch(/d/); 47 | }); 48 | 49 | it('backslash-w matches any word character, plus _, like [a-zA-Z0-9_]', function() { 50 | // Note: \w may behave differently in other programming languages. 51 | // for example, numbers may or may not be included. Refer to the 52 | // language Regular Expression or Pattern API before using \w. 53 | 54 | const fixThisPattern = /^___$/; 55 | 56 | expect( 'Dog' ).toMatch(fixThisPattern); 57 | expect( 'cAt' ).toMatch(fixThisPattern); 58 | expect( '_x_' ).toMatch(fixThisPattern); 59 | 60 | expect(fixThisPattern.source).toMatch(/w/); 61 | }); 62 | 63 | it('backslash-s matches any whitespace character', function() { 64 | // Whitespace is: 65 | // ( ) 66 | // tab (\t) 67 | // line feed (\n) 68 | // carriage return (\r) 69 | // form feed (\f) 70 | // vertical tab (\v) 71 | // or [ \t\n\r\f\v] 72 | 73 | const fixThisPattern = /^___$/; 74 | 75 | expect( ' ' ).toMatch(fixThisPattern); 76 | expect( 'x\ny' ).toMatch(fixThisPattern); 77 | expect( '\t_Z' ).toMatch(fixThisPattern); 78 | 79 | expect(fixThisPattern.source).toMatch(/s/); 80 | }); 81 | 82 | it('backslash-D, backslash-W, backslash-S match non-digit, non-word, and non-whitespace', function() { 83 | // /\D/ is the same as /[^\d]/ 84 | // /\W/ is the same as /[^\w]/ 85 | // /\S/ is the same as /[^\s]/ 86 | // /\S/ is NOT the same as /[\w\d]/ - why? 87 | 88 | const whatStringFitsThePattern = "___"; 89 | 90 | expect( whatStringFitsThePattern ).toMatch(/^[\S][\S][\S]$/); 91 | expect( whatStringFitsThePattern ).toMatch(/^[^\w\d][^\w\d][^\w\d]$/); 92 | }); 93 | 94 | it('character set shortcuts can be used inside [ ]', function() { 95 | const fixThisPattern = /^___$/; 96 | 97 | expect( 'Dog' ).toMatch(fixThisPattern); 98 | expect( ' 2 ' ).toMatch(fixThisPattern); 99 | 100 | expect(fixThisPattern.source).toMatch(/[\\]/); 101 | }); 102 | 103 | }); 104 | -------------------------------------------------------------------------------- /answers/09_Grouping.js: -------------------------------------------------------------------------------- 1 | describe("Grouping", function() { 2 | 3 | // Until now, we have used special characters and modifiers like ? and + 4 | // with single characters. All regular expression operators can also be 5 | // applied to sequences by grouping the sequence inside ( ) like 6 | // "ababababab" =~ /^(ab)+$/ 7 | 8 | it('the cat is optional', function() { 9 | 10 | const fixThisPattern = /^Dog(Cat)?Pony$/; 11 | 12 | expect( 'DogCatPony' ).toMatch(fixThisPattern); 13 | expect( 'DogPony' ).toMatch(fixThisPattern); 14 | }); 15 | 16 | it('anything goes as long as we get to keep the pony', function() { 17 | 18 | const fixThisPattern = /^(Dog)*(Cat)?Pony$/; 19 | 20 | expect( 'DogCatPony' ).toMatch(fixThisPattern); 21 | expect( 'DogPony' ).toMatch(fixThisPattern); 22 | expect( 'CatPony' ).toMatch(fixThisPattern); 23 | expect( 'DogDogDogDogCatPony' ).toMatch(fixThisPattern); 24 | }); 25 | 26 | it('you can nest regular expression operations inside ( )', function() { 27 | 28 | const fixThisPattern = /^(Dog)*(Cats?)?Pony$/; 29 | 30 | expect( 'DogCatPony' ).toMatch(fixThisPattern); 31 | expect( 'DogCatsPony' ).toMatch(fixThisPattern); 32 | expect( 'DogPony' ).toMatch(fixThisPattern); 33 | expect( 'CatPony' ).toMatch(fixThisPattern); 34 | expect( 'DogDogDogDogCatsPony' ).toMatch(fixThisPattern); 35 | }); 36 | 37 | it('you can nest ( ) inside ( )', function() { 38 | 39 | const fixThisPattern = /^((ab)+cd)+(ab)*$/; 40 | 41 | expect( 'abcd' ).toMatch(fixThisPattern); 42 | expect( 'abcdabcdabcd' ).toMatch(fixThisPattern); 43 | expect( 'ababababcdabab' ).toMatch(fixThisPattern); 44 | }); 45 | 46 | it('Real World 3: very VERY basic email address verification', function() { 47 | // Assumptions: 48 | // * email address is composed of [username]@[domain] 49 | // * both username and domain are required 50 | // * username portion of email may contain letters, numbers, _ - . + 51 | // * domain is composed of [letters/numbers].[letters/numbers]....repeat 52 | // * in domain, . is the separator between each unit. Domain cannot begin or end with . 53 | // * email address will be all in lowercase 54 | // 55 | // Note that this is an oversimplification of the real email address 56 | // syntax requirements. In fact, using a regular expression to check 57 | // the full email address specification (http://www.ietf.org/rfc/rfc822.txt) 58 | // is very, very complicated. In this case, as with many others, 59 | // it is important to understand the business rules that apply, and 60 | // to not over-engineer. Here, we're agreeing that these are the only 61 | // rules we care about for email addresses, so our regex doesn't need 62 | // to worry about all the other stuff. 63 | 64 | const fixThisPattern = /^[a-z0-9_+.\-]+@[a-z0-9]+(\.[a-z0-9]+)+$/; 65 | 66 | // Hint: Develop the regex in pieces. First make a pattern that 67 | // matches the username part, then one section of the domain, 68 | // then allow multiple domain sections, then glue it all together. 69 | // 70 | // Just like writing a program, a complicated regular expression 71 | // can often be broken into smaller, simpler pieces and then 72 | // assembled into the final whole. 73 | 74 | // valid 75 | expect( 'myname@internet.com' ).toMatch(fixThisPattern); 76 | expect( 'my_name2@server16.subdomain.internet.org' ).toMatch(fixThisPattern); 77 | expect( 'jdoe87@college.edu' ).toMatch(fixThisPattern); 78 | expect( 'jdoe87+myalias@gmail.com' ).toMatch(fixThisPattern); 79 | expect( 'my.really.long.first-name@internet.co.uk' ).toMatch(fixThisPattern); 80 | 81 | // invalid 82 | expect( 'myname@internet..com' ).not.toMatch(fixThisPattern); 83 | expect( 'myname@internet' ).not.toMatch(fixThisPattern); 84 | expect( 'myname' ).not.toMatch(fixThisPattern); 85 | expect( 'myname!@internet.com' ).not.toMatch(fixThisPattern); 86 | expect( 'myname@internet.my-subdomain.com' ).not.toMatch(fixThisPattern); 87 | expect( 'MyName@internet.com' ).not.toMatch(fixThisPattern); 88 | }); 89 | 90 | }); 91 | -------------------------------------------------------------------------------- /__tests__/09_Grouping.js: -------------------------------------------------------------------------------- 1 | describe("Grouping", function() {describe 2 | 3 | // Until now, we have used special characters and modifiers like ? and + 4 | // with single characters. All regular expression operators can also be 5 | // applied to sequences by grouping the sequence inside ( ) like 6 | // "ababababab" =~ /^(ab)+$/ 7 | 8 | it('the cat is optional', function() { 9 | 10 | const fixThisPattern = /^DogCatPony$/; 11 | 12 | expect( 'DogCatPony' ).toMatch(fixThisPattern); 13 | expect( 'DogPony' ).toMatch(fixThisPattern); 14 | }); 15 | 16 | it('anything goes as long as we get to keep the pony', function() { 17 | 18 | const fixThisPattern = /^DogCatPony$/; 19 | 20 | expect( 'DogCatPony' ).toMatch(fixThisPattern); 21 | expect( 'DogPony' ).toMatch(fixThisPattern); 22 | expect( 'CatPony' ).toMatch(fixThisPattern); 23 | expect( 'DogDogDogDogCatPony' ).toMatch(fixThisPattern); 24 | }); 25 | 26 | it('you can nest regular expression operations inside ( )', function() { 27 | 28 | const fixThisPattern = /^DogCatPony$/; 29 | 30 | expect( 'DogCatPony' ).toMatch(fixThisPattern); 31 | expect( 'DogCatsPony' ).toMatch(fixThisPattern); 32 | expect( 'DogPony' ).toMatch(fixThisPattern); 33 | expect( 'CatPony' ).toMatch(fixThisPattern); 34 | expect( 'DogDogDogDogCatsPony' ).toMatch(fixThisPattern); 35 | }); 36 | 37 | it('you can nest ( ) inside ( )', function() { 38 | 39 | const fixThisPattern = /^___$/; 40 | 41 | expect( 'abcd' ).toMatch(fixThisPattern); 42 | expect( 'abcdabcdabcd' ).toMatch(fixThisPattern); 43 | expect( 'ababababcdabab' ).toMatch(fixThisPattern); 44 | }); 45 | 46 | it('Real World 3: very VERY basic email address verification', function() { 47 | // Assumptions: 48 | // * email address is composed of [username]@[domain] 49 | // * both username and domain are required 50 | // * username portion of email may contain letters, numbers, _ - . + 51 | // * domain is composed of [letters/numbers].[letters/numbers]....repeat 52 | // * in domain, . is the separator between each unit. Domain cannot begin or end with . 53 | // * email address will be all in lowercase 54 | // 55 | // Note that this is an oversimplification of the real email address 56 | // syntax requirements. In fact, using a regular expression to check 57 | // the full email address specification (http://www.ietf.org/rfc/rfc822.txt) 58 | // is very, very complicated. In this case, as with many others, 59 | // it is important to understand the business rules that apply, and 60 | // to not over-engineer. Here, we're agreeing that these are the only 61 | // rules we care about for email addresses, so our regex doesn't need 62 | // to worry about all the other stuff. 63 | 64 | const fixThisPattern = /^___$/; 65 | 66 | // Hint: Develop the regex in pieces. First make a pattern that 67 | // matches the username part, then one section of the domain, 68 | // then allow multiple domain sections, then glue it all together. 69 | // 70 | // Just like writing a program, a complicated regular expression 71 | // can often be broken into smaller, simpler pieces and then 72 | // assembled into the final whole. 73 | 74 | // valid 75 | expect( 'myname@internet.com' ).toMatch(fixThisPattern); 76 | expect( 'my_name2@server16.subdomain.internet.org' ).toMatch(fixThisPattern); 77 | expect( 'jdoe87@college.edu' ).toMatch(fixThisPattern); 78 | expect( 'jdoe87+myalias@gmail.com' ).toMatch(fixThisPattern); 79 | expect( 'my.really.long.first-name@internet.co.uk' ).toMatch(fixThisPattern); 80 | 81 | // invalid 82 | expect( 'myname@internet..com' ).not.toMatch(fixThisPattern); 83 | expect( 'myname@internet' ).not.toMatch(fixThisPattern); 84 | expect( 'myname' ).not.toMatch(fixThisPattern); 85 | expect( 'myname!@internet.com' ).not.toMatch(fixThisPattern); 86 | expect( 'myname@internet.my-subdomain.com' ).not.toMatch(fixThisPattern); 87 | expect( 'MyName@internet.com' ).not.toMatch(fixThisPattern); 88 | }); 89 | 90 | }); 91 | -------------------------------------------------------------------------------- /answers/12_RegexReplace.js: -------------------------------------------------------------------------------- 1 | describe("Regular Expression Replacement", function() { 2 | 3 | // Sometimes, you want to do more than just FIND some text, or validate 4 | // its format. You may want to construct new strings from old strings. 5 | // Most (all?) programming languages and editors that provide Regular 6 | // Expression operations have some kind of "replace" function. This 7 | // works just like a "Find-and-Replace" tool that is common in any 8 | // decent text editor, but it adds the power of Regular Expressions 9 | // to the "Find" part, and it adds Regex capture groups to the 10 | // "Replace" part. 11 | // 12 | // Note: Every language and text editor has a different way of referencing 13 | // capture groups in the Replace operation, so be sure to check the 14 | // documentation before using this tool. 15 | // 16 | // Once you've done find/replace using Regex, you'll never go back. 17 | 18 | it('simple replacement', function() { 19 | 20 | const newString = "dog cat pony".replace( /cat/, 'lemming' ); 21 | 22 | expect( newString ).toEqual('dog lemming pony'); 23 | }); 24 | 25 | it('the //g suffix changes "Replace One" into "Replace All"', function() { 26 | const original = "a b a b a b"; 27 | 28 | const newFirst = original.replace( /b/, "X"); 29 | const newAll = original.replace( /b/g, "X"); 30 | 31 | expect( newFirst ).toEqual('a X a b a b'); 32 | expect( newAll ).toEqual('a X a X a X'); 33 | }); 34 | 35 | it('String.replace is safe even when there is no match', function() { 36 | const original = "a b a b a b"; 37 | 38 | const newString = original.replace(/x/g, "z"); 39 | 40 | expect( newString ).toEqual('a b a b a b'); 41 | }); 42 | 43 | it('String.replace will replace the full match (or each full match with //g) with the new string', function() { 44 | const original = '
Hello
'; 45 | 46 | const newString = original.replace(/^
.*?<\/div>$/g, 'newId'); 47 | 48 | expect( newString ).toEqual('newId'); 49 | }); 50 | 51 | it('to reference a capture group, use $n inside the replacement string', function() { 52 | 53 | // Note: Since, in Javascript, $ is a special character in the replacement string, 54 | // to put a literal $ in the replacement string, say $$ 55 | 56 | const original = '
Hello
'; 57 | 58 | const newString = original.replace(/^
.*?<\/div>$/g, '$1'); 59 | 60 | expect( newString ).toEqual('someId'); 61 | }); 62 | 63 | it('to match a full string but only replace a portion, use group captures carefully', function() { 64 | 65 | const original = '
Hello
'; 66 | 67 | const newString = original.replace(/^
(.*?)<\/div>$/g, '
$1
'); 68 | 69 | expect( newString ).toEqual('
someId
'); 70 | }); 71 | 72 | // By combining all the earlier lessons of capture groups and repeating character logic 73 | // that we've already covered with backreferences to those captured groups in 74 | // replacement strings, Regular Expression replacement can be a very powerful tool 75 | // in a wide variety of circumstances. 76 | 77 | it('make a URL into an HTML link', function() { 78 | const url = 'http://www.google.com/'; 79 | 80 | const fixThisPattern = /^(.*)$/; 81 | const fixThisReplacementString = '$1'; 82 | 83 | const newString = url.replace(fixThisPattern, fixThisReplacementString); 84 | 85 | expect( newString ).toEqual('http://www.google.com/'); 86 | }); 87 | 88 | it('reformat a date string', function() { 89 | const originalDate = '20120229'; 90 | 91 | const fixThisPattern = /^(\d{4})(\d{2})(\d{2})$/; 92 | const fixThisReplacementString = '$2/$3/$1'; 93 | 94 | const newDate = originalDate.replace(fixThisPattern, fixThisReplacementString); 95 | 96 | expect( newDate ).toEqual('02/29/2012'); 97 | }); 98 | 99 | it('strip the comment from this HTML code', function() { 100 | const html = '

My Title


'; 101 | 102 | const fixThisPattern = //; 103 | const fixThisReplacementString = ''; 104 | 105 | const newHtml = html.replace(fixThisPattern, fixThisReplacementString); 106 | 107 | expect( newHtml ).toEqual('

My Title


'); 108 | }); 109 | 110 | }); 111 | -------------------------------------------------------------------------------- /__tests__/12_RegexReplace.js: -------------------------------------------------------------------------------- 1 | describe("Regular Expression Replacement", function() { 2 | 3 | // Sometimes, you want to do more than just FIND some text, or validate 4 | // its format. You may want to construct new strings from old strings. 5 | // Most (all?) programming languages and editors that provide Regular 6 | // Expression operations have some kind of "replace" function. This 7 | // works just like a "Find-and-Replace" tool that is common in any 8 | // decent text editor, but it adds the power of Regular Expressions 9 | // to the "Find" part, and it adds Regex capture groups to the 10 | // "Replace" part. 11 | // 12 | // Note: Every language and text editor has a different way of referencing 13 | // capture groups in the Replace operation, so be sure to check the 14 | // documentation before using this tool. 15 | // 16 | // Once you've done find/replace using Regex, you'll never go back. 17 | 18 | it('simple replacement', function() { 19 | 20 | const newString = "dog cat pony".replace( /cat/, 'lemming' ); 21 | 22 | expect( newString ).toEqual('___'); 23 | }); 24 | 25 | it('the //g suffix changes "Replace One" into "Replace All"', function() { 26 | const original = "a b a b a b"; 27 | 28 | const newFirst = original.replace( /b/, "X"); 29 | const newAll = original.replace( /b/g, "X"); 30 | 31 | expect( newFirst ).toEqual('___'); 32 | expect( newAll ).toEqual('___'); 33 | }); 34 | 35 | it('String.replace is safe even when there is no match', function() { 36 | const original = "a b a b a b"; 37 | 38 | const newString = original.replace(/x/g, "z"); 39 | 40 | expect( newString ).toEqual('___'); 41 | }); 42 | 43 | it('String.replace will replace the full match (or each full match with //g) with the new string', function() { 44 | const original = '
Hello
'; 45 | 46 | const newString = original.replace(/^
.*?<\/div>$/g, 'newId'); 47 | 48 | expect( newString ).toEqual('___'); 49 | }); 50 | 51 | it('to reference a capture group, use $n inside the replacement string', function() { 52 | 53 | // Note: Since, in Javascript, $ is a special character in the replacement string, 54 | // to put a literal $ in the replacement string, say $$ 55 | 56 | const original = '
Hello
'; 57 | 58 | const newString = original.replace(/^
.*?<\/div>$/g, '$1'); 59 | 60 | expect( newString ).toEqual('___'); 61 | }); 62 | 63 | it('to match a full string but only replace a portion, use group captures carefully', function() { 64 | 65 | const original = '
Hello
'; 66 | 67 | const newString = original.replace(/^
(.*?)<\/div>$/g, '
$1
'); 68 | 69 | expect( newString ).toEqual('___'); 70 | }); 71 | 72 | // By combining all the earlier lessons of capture groups and repeating character logic 73 | // that we've already covered with backreferences to those captured groups in 74 | // replacement strings, Regular Expression replacement can be a very powerful tool 75 | // in a wide variety of circumstances. 76 | 77 | it('make a URL into an HTML link', function() { 78 | const url = 'http://www.google.com/'; 79 | 80 | const fixThisPattern = /___/; 81 | const fixThisReplacementString = '___'; 82 | 83 | const newString = url.replace(fixThisPattern, fixThisReplacementString); 84 | 85 | expect( newString ).toEqual('http://www.google.com/'); 86 | }); 87 | 88 | it('reformat a date string', function() { 89 | const originalDate = '20120229'; 90 | 91 | const fixThisPattern = /___/; 92 | const fixThisReplacementString = '___'; 93 | 94 | const newDate = originalDate.replace(fixThisPattern, fixThisReplacementString); 95 | 96 | expect( newDate ).toEqual('02/29/2012'); 97 | }); 98 | 99 | it('strip the comment from this HTML code', function() { 100 | const html = '

My Title


'; 101 | 102 | const fixThisPattern = /___/; 103 | const fixThisReplacementString = '___'; 104 | 105 | const newHtml = html.replace(fixThisPattern, fixThisReplacementString); 106 | 107 | expect( newHtml ).toEqual('

My Title


'); 108 | }); 109 | 110 | }); 111 | -------------------------------------------------------------------------------- /answers/07_RepeatingCharacters.js: -------------------------------------------------------------------------------- 1 | describe("Repeating Characters", function() { 2 | 3 | it('use ? to match Zero or One of a character', function() { 4 | // If a character is optional, follow it with a ? 5 | // in the pattern 6 | 7 | const fixThisPattern = /^soo?n$/; 8 | 9 | expect( 'son' ).toMatch(fixThisPattern); 10 | expect( 'soon' ).toMatch(fixThisPattern); 11 | expect( 'sooon' ).not.toMatch(fixThisPattern); 12 | }); 13 | 14 | it('use + to match One Or More of a character', function() { 15 | const thisPatternWorks = /^so+n$/ 16 | 17 | const fixThisPattern = /^x +y$/; 18 | 19 | expect( 'son' ).toMatch(thisPatternWorks); 20 | expect( 'soon' ).toMatch(thisPatternWorks); 21 | expect( 'soooooooon' ).toMatch(thisPatternWorks); 22 | expect( 'sun' ).not.toMatch(thisPatternWorks); 23 | 24 | expect( 'x y' ).toMatch(fixThisPattern); 25 | expect( 'x y' ).toMatch(fixThisPattern); 26 | expect( 'xy' ).not.toMatch(fixThisPattern); 27 | }); 28 | 29 | it('use * to match Zero Or More of a character', function() { 30 | const fixThisPattern = /^x *y$/; 31 | 32 | expect( 'x y' ).toMatch(fixThisPattern); 33 | expect( 'x y' ).toMatch(fixThisPattern); 34 | expect( 'xy' ).toMatch(fixThisPattern); 35 | }); 36 | 37 | it('use {n} to match a specific count of repeated characters', function() { 38 | 39 | const thisPatternWorks = /^xy{3}z$/; 40 | 41 | const fixThisPattern = /^\d{3}-?\d{3}-?\d{4}$/; 42 | 43 | expect( 'xyyyz' ).toMatch(thisPatternWorks); 44 | 45 | expect( '614-555-1234' ).toMatch(fixThisPattern); 46 | expect( '6145551234' ).toMatch(fixThisPattern); 47 | expect( '614-555-123' ).not.toMatch(fixThisPattern); 48 | 49 | expect(fixThisPattern.source).toMatch(/\{/); 50 | }); 51 | 52 | it('use {n,m} to match a range number of repeated characters', function() { 53 | 54 | const thisPatternWorks = /^xy{2,5}z$/; 55 | 56 | const fixThisPattern = /^3\.\d{1,3}$/; 57 | 58 | expect( 'xyyyz' ).toMatch(thisPatternWorks); 59 | expect( 'xyyz' ).toMatch(thisPatternWorks); 60 | expect( 'xyyyyyz' ).toMatch(thisPatternWorks); 61 | expect( 'xyz' ).not.toMatch(thisPatternWorks); 62 | 63 | expect( '3.1' ).toMatch(fixThisPattern); 64 | expect( '3.142' ).toMatch(fixThisPattern); 65 | expect( '3.14159' ).not.toMatch(fixThisPattern); 66 | 67 | expect(fixThisPattern.source).toMatch(/\{/); 68 | }); 69 | 70 | it('use {n,} for "at least n" and {,m} for "not more than m" repeated characters', function() { 71 | const fixThisPattern = /^a {2,}b$/; 72 | 73 | expect( 'a b' ).toMatch(fixThisPattern); 74 | expect( 'a b' ).toMatch(fixThisPattern); 75 | expect( 'a b' ).not.toMatch(fixThisPattern); 76 | 77 | expect(fixThisPattern.source).toMatch(/\{/); 78 | }); 79 | 80 | it('repeater characters (?, +, etc.) also work with character sets and shorthand sets like ., backslash-d, etc.', function() { 81 | // We will try to match a floating point number. 82 | // Assumptions: 83 | // The number must be positive 84 | // There must be a whole number part (before the decimal) 85 | // There must be a fractional part (after the decimal) 86 | // Either or both of these parts may be zero (0) 87 | 88 | const fixThisPattern = /^\d+\.\d+$/; 89 | 90 | expect( '3.14159' ).toMatch(fixThisPattern); 91 | expect( '0.9' ).toMatch(fixThisPattern); 92 | expect( '12345.67890' ).toMatch(fixThisPattern); 93 | expect( '777' ).not.toMatch(fixThisPattern); 94 | 95 | expect(fixThisPattern.source).toMatch(/[d\[]/); 96 | expect(fixThisPattern.source).toMatch(/[*+]/); 97 | 98 | // Important Lesson: 99 | // When writing a regular expression, it is always important to thoroughly 100 | // define and understand the requirements and assumptions. The regular expression 101 | // language is very precise. If the requirements are vague or not well- 102 | // understood, the regex will be unreliable for edge cases. 103 | }); 104 | 105 | it('repeater characters are special characters that must be backslash-escaped to match in strings', function() { 106 | expect( 'x*y=z' ).toMatch(/^x\*y=z$/); 107 | }); 108 | 109 | it('.* can match any amount of anything... except newline', function() { 110 | 111 | expect( '' ).toMatch(/^.*$/); // here are a couple of free ones for you 112 | expect( 'jgi493ujitgj8g*##@!uiofg893ign4q389A*(eu89*(#=U*@UJ()()0jijge' ).toMatch(/^.*$/); 113 | 114 | expect( 'ab\ncd' ).not.toMatch(/^.*$/); // fix this string to make the test pass 115 | }); 116 | 117 | it('use a character set to match anything, including newline', function() { 118 | 119 | expect( 'The quick brown fox\njumped over the lazy dog.\n' ).toMatch(/^[\w\W]*$/); 120 | 121 | }); 122 | 123 | it('repeater characters are NOT special characters when used inside [ ]', function() { 124 | const fixThisPattern = /^x[*+]y=z$/; 125 | 126 | expect( 'x*y=z' ).toMatch(fixThisPattern); 127 | expect( 'x+y=z' ).toMatch(fixThisPattern); 128 | }); 129 | 130 | it('Real World: Match a quotation', function() { 131 | // Assumptions: 132 | // The quotation will begin and end with " 133 | // The quotation will be a single line (no newlines) 134 | // Any other character besides newline may appear in the quotation 135 | 136 | const fixThisPattern = /^".*"$/; 137 | 138 | expect( '"Here today, gone tomorrow."' ).toMatch(fixThisPattern); 139 | expect( '"Secant, tangent, and cosine. 3.14159!"' ).toMatch(fixThisPattern); 140 | 141 | // This example is incomplete and not robust enough for real life. 142 | // We will learn some techniques to improve it in later Koans. 143 | }); 144 | 145 | }); 146 | -------------------------------------------------------------------------------- /__tests__/07_RepeatingCharacters.js: -------------------------------------------------------------------------------- 1 | describe("Repeating Characters", function() { 2 | 3 | it('use ? to match Zero or One of a character', function() { 4 | // If a character is optional, follow it with a ? 5 | // in the pattern 6 | 7 | const fixThisPattern = /^___$/; 8 | 9 | expect( 'son' ).toMatch(fixThisPattern); 10 | expect( 'soon' ).toMatch(fixThisPattern); 11 | expect( 'sooon' ).not.toMatch(fixThisPattern); 12 | }); 13 | 14 | it('use + to match One Or More of a character', function() { 15 | const thisPatternWorks = /^so+n$/ 16 | 17 | const fixThisPattern = /^___$/; 18 | 19 | expect( 'son' ).toMatch(thisPatternWorks); 20 | expect( 'soon' ).toMatch(thisPatternWorks); 21 | expect( 'soooooooon' ).toMatch(thisPatternWorks); 22 | expect( 'sun' ).not.toMatch(thisPatternWorks); 23 | 24 | expect( 'x y' ).toMatch(fixThisPattern); 25 | expect( 'x y' ).toMatch(fixThisPattern); 26 | expect( 'xy' ).not.toMatch(fixThisPattern); 27 | }); 28 | 29 | it('use * to match Zero Or More of a character', function() { 30 | const fixThisPattern = /^___$/; 31 | 32 | expect( 'x y' ).toMatch(fixThisPattern); 33 | expect( 'x y' ).toMatch(fixThisPattern); 34 | expect( 'xy' ).toMatch(fixThisPattern); 35 | }); 36 | 37 | it('use {n} to match a specific count of repeated characters', function() { 38 | 39 | const thisPatternWorks = /^xy{3}z$/; 40 | 41 | const fixThisPattern = /^___$/; 42 | 43 | expect( 'xyyyz' ).toMatch(thisPatternWorks); 44 | 45 | expect( '614-555-1234' ).toMatch(fixThisPattern); 46 | expect( '6145551234' ).toMatch(fixThisPattern); 47 | expect( '614-555-123' ).not.toMatch(fixThisPattern); 48 | 49 | expect(fixThisPattern.source).toMatch(/\{/); 50 | }); 51 | 52 | it('use {n,m} to match a range number of repeated characters', function() { 53 | 54 | const thisPatternWorks = /^xy{2,5}z$/; 55 | 56 | const fixThisPattern = /^___$/; 57 | 58 | expect( 'xyyyz' ).toMatch(thisPatternWorks); 59 | expect( 'xyyz' ).toMatch(thisPatternWorks); 60 | expect( 'xyyyyyz' ).toMatch(thisPatternWorks); 61 | expect( 'xyz' ).not.toMatch(thisPatternWorks); 62 | 63 | expect( '3.1' ).toMatch(fixThisPattern); 64 | expect( '3.142' ).toMatch(fixThisPattern); 65 | expect( '3.14159' ).not.toMatch(fixThisPattern); 66 | 67 | expect(fixThisPattern.source).toMatch(/\{/); 68 | }); 69 | 70 | it('use {n,} for "at least n" and {,m} for "not more than m" repeated characters', function() { 71 | const fixThisPattern = /^___$/; 72 | 73 | expect( 'a b' ).toMatch(fixThisPattern); 74 | expect( 'a b' ).toMatch(fixThisPattern); 75 | expect( 'a b' ).not.toMatch(fixThisPattern); 76 | 77 | expect(fixThisPattern.source).toMatch(/\{/); 78 | }); 79 | 80 | it('repeater characters (?, +, etc.) also work with character sets and shorthand sets like ., backslash-d, etc.', function() { 81 | // We will try to match a floating point number. 82 | // Assumptions: 83 | // The number must be positive 84 | // There must be a whole number part (before the decimal) 85 | // There must be a fractional part (after the decimal) 86 | // Either or both of these parts may be zero (0) 87 | 88 | const fixThisPattern = /^___$/; 89 | 90 | expect( '3.14159' ).toMatch(fixThisPattern); 91 | expect( '0.9' ).toMatch(fixThisPattern); 92 | expect( '12345.67890' ).toMatch(fixThisPattern); 93 | expect( '777' ).not.toMatch(fixThisPattern); 94 | 95 | expect(fixThisPattern.source).toMatch(/[d\[]/); 96 | expect(fixThisPattern.source).toMatch(/[*+]/); 97 | 98 | // Important Lesson: 99 | // When writing a regular expression, it is always important to thoroughly 100 | // define and understand the requirements and assumptions. The regular expression 101 | // language is very precise. If the requirements are vague or not well- 102 | // understood, the regex will be unreliable for edge cases. 103 | }); 104 | 105 | it('repeater characters are special characters that must be backslash-escaped to match in strings', function() { 106 | expect( 'x*y=z' ).toMatch(/^___$/); 107 | }); 108 | 109 | it('.* can match any amount of anything... except newline', function() { 110 | 111 | expect( '' ).toMatch(/^.*$/); // here are a couple of free ones for you 112 | expect( 'jgi493ujitgj8g*##@!uiofg893ign4q389A*(eu89*(#=U*@UJ()()0jijge' ).toMatch(/^.*$/); 113 | 114 | expect( '___' ).not.toMatch(/^.*$/); // fix this string to make the test pass 115 | }); 116 | 117 | it('use a character set to match anything, including newline', function() { 118 | 119 | expect( 'The quick brown fox\njumped over the lazy dog.\n' ).toMatch(/^___*$/); 120 | 121 | }); 122 | 123 | it('repeater characters are NOT special characters when used inside [ ]', function() { 124 | const fixThisPattern = /^___$/; 125 | 126 | expect( 'x*y=z' ).toMatch(fixThisPattern); 127 | expect( 'x+y=z' ).toMatch(fixThisPattern); 128 | }); 129 | 130 | it('Real World: Match a quotation', function() { 131 | // Assumptions: 132 | // The quotation will begin and end with " 133 | // The quotation will be a single line (no newlines) 134 | // Any other character besides newline may appear in the quotation 135 | 136 | const fixThisPattern = /^___$/; 137 | 138 | expect( '"Here today, gone tomorrow."' ).toMatch(fixThisPattern); 139 | expect( '"Secant, tangent, and cosine. 3.14159!"' ).toMatch(fixThisPattern); 140 | 141 | // This example is incomplete and not robust enough for real life. 142 | // We will learn some techniques to improve it in later Koans. 143 | }); 144 | 145 | }); 146 | -------------------------------------------------------------------------------- /__tests__/10_CapturingGroups.js: -------------------------------------------------------------------------------- 1 | describe("Capturing Groups", function() { 2 | 3 | const ___ = 0; 4 | 5 | // In most languages and editors that support regular expressions, 6 | // ( ) are used for more than just grouping sequences of characters 7 | // in the regex. Each ( ) group is remembered and the characters that 8 | // are matched inside will be stored in a variable so it can be 9 | // accessed later. JavaScript supports this facility in the RegExp 10 | // object, through the RegExp.exec() method. 11 | // 12 | // RegExp.exec() returns a String Array where index 0 is the full 13 | // matched string and each index after that, from 1 to however many 14 | // groups are in the pattern, contains the remembered text from each 15 | // group. 16 | // 17 | // For example: 18 | // const matchGroups = /^(\D+)\d+(\D+)$/.exec("abc1234xyz"); 19 | // expect(matchGroups.length).toEqual(3); 20 | // expect(matchGroups[0]).toEqual("abc1234xyz"); 21 | // expect(matchGroups[1]).toEqual("abc"); 22 | // expect(matchGroups[2]).toEqual("xyz"); 23 | // 24 | // Capture groups also work with nested ( ). To keep track of which ( ) 25 | // group goes with which array index, count the open parens ( from left 26 | // to right. 27 | // 28 | // Note: A matching capture may be null (example: (.*) ) 29 | 30 | it('getting the hang of it capture groups', function() { 31 | 32 | const pattern = /^([A-Za-z][\w\-.]*(\+([\w+]+))?)@(([a-z\d]+)((\.([a-z\d]+))+))$/; 33 | 34 | const matches1 = pattern.exec( 'My.Name01+alias@mail.gmail.com' ); 35 | const matches2 = pattern.exec( 'simple@gmail.com' ); 36 | 37 | expect( matches1[0] ).toEqual('___'); 38 | expect( matches1[1] ).toEqual('___'); 39 | expect( matches1[3] ).toEqual('___'); 40 | expect( matches1[___] ).toEqual('mail.gmail.com'); 41 | 42 | expect( matches2[2] ).toEqual(___); 43 | expect( matches2[___] ).toEqual('.com'); // there are two possible answers here 44 | 45 | // Do you recognize this pattern? It's a more complex version of the email 46 | // pattern from the Grouping Koan. What enhancements have been made? 47 | }); 48 | 49 | it('Real World 4: Find the ID of a DIV tag', function() { 50 | // Assumption: 51 | // * Each DIV tag will be of the simple form: 52 | //
Some text may be here
53 | // * The spaces within the tag will be exactly as shown, so
is illegal 54 | // * The id will be the only attribute inside the div tag 55 | // * Only double-quote will be used (") 56 | 57 | const fixThisPattern = /^___$/; 58 | 59 | const matches = fixThisPattern.exec( '
Here is my text node!
' ); 60 | 61 | expect( matches[___] ).toEqual("anArbitraryId"); 62 | 63 | }); 64 | 65 | // Important Note: Mathematically speaking, it is not possible for regular 66 | // expressions to exactly capture or match the full range of valid HTML, as 67 | // defined by the W3C specifications. For a more entertaining way of saying 68 | // the same thing, see: 69 | // http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags 70 | // 71 | // That said, if the expectations and requirements are narrower than "parse 72 | // the whole HTML file", and the goal is a simple one of just finding some 73 | // relevant piece of information within the file, and if it's safe to assume 74 | // that the HTML file is properly formed and syntactically correct... 75 | // THEN you can get away with using a regular expression to get text out of HTML. 76 | 77 | it('Real World 5: Find the class of an arbitrary HTML tag', function() { 78 | // Assumption: 79 | // * The tag may be any tag: DIV, SPAN, H1, TABLE, whatever 80 | // * The tag may be a singleton like 81 | // * The tag may contain other attributes (see example above) 82 | // * The class attribute may be multiple classes, like class="one two". In this case, just capture "one two". 83 | // * The attribute spacing will be just as shown, with no extra spaces around = 84 | // * There will only be one HTML tag in the input string 85 | // * Only double-quote will be used (") 86 | // * The class attribute is guaranteed to exist 87 | 88 | const fixThisPattern = /^___$/; 89 | 90 | const matches1 = fixThisPattern.exec( '
Some text here
' ); 91 | const matches2 = fixThisPattern.exec( ' Error! ' ); 92 | const matches3 = fixThisPattern.exec( '' ); 93 | 94 | expect( matches1[___] ).toEqual("boxed"); 95 | expect( matches2[___] ).toEqual("bold red"); 96 | expect( matches3[___] ).toEqual("framed"); 97 | }); 98 | 99 | it('Captured groups can be referenced within the pattern itself using backslash-n', function() { 100 | 101 | // Using the same counting system that the Pattern.exec() method uses, \1 \2 ... \n will 102 | // reference a group that has been defined within that same pattern. 103 | 104 | const thisPatternWorks = /^([a-z]+)\d+\1$/; 105 | 106 | const fixThisPattern = /^___$/; 107 | 108 | expect( 'ab12345ab' ).toMatch(thisPatternWorks); 109 | expect( 'a12345x' ).not.toMatch(thisPatternWorks); 110 | 111 | expect( '"Hello there"' ).toMatch(fixThisPattern); 112 | expect( '|some word here|' ).toMatch(fixThisPattern); 113 | expect( '(an expression)' ).not.toMatch(fixThisPattern); 114 | }); 115 | 116 | it('Real World 6: HTML tags may use single- or double-quotes', function() { 117 | // Assumption: 118 | // Same as above, except that attributes may be: 119 | // class="one" 120 | // class='one' 121 | // The attribute will never use a mismatch, like class="one' 122 | 123 | 124 | const fixThisPattern = /^___$/; 125 | 126 | const matches1 = fixThisPattern.exec( '
Some text here
' ); 127 | const matches2 = fixThisPattern.exec( " Error! " ); 128 | const matches3 = fixThisPattern.exec( '' ); 129 | 130 | expect( matches1[___] ).toEqual('boxed'); 131 | expect( matches2[___] ).toEqual('bold red'); 132 | expect( matches3[___] ).toEqual('framed'); 133 | }); 134 | 135 | }); 136 | -------------------------------------------------------------------------------- /answers/10_CapturingGroups.js: -------------------------------------------------------------------------------- 1 | describe("Capturing Groups", function() { 2 | 3 | const ___ = 0; 4 | 5 | // In most languages and editors that support regular expressions, 6 | // ( ) are used for more than just grouping sequences of characters 7 | // in the regex. Each ( ) group is remembered and the characters that 8 | // are matched inside will be stored in a variable so it can be 9 | // accessed later. JavaScript supports this facility in the RegExp 10 | // object, through the RegExp.exec() method. 11 | // 12 | // RegExp.exec() returns a String Array where index 0 is the full 13 | // matched string and each index after that, from 1 to however many 14 | // groups are in the pattern, contains the remembered text from each 15 | // group. 16 | // 17 | // For example: 18 | // const matchGroups = /^(\D+)\d+(\D+)$/.exec("abc1234xyz"); 19 | // expect(matchGroups.length).toEqual(3); 20 | // expect(matchGroups[0]).toEqual("abc1234xyz"); 21 | // expect(matchGroups[1]).toEqual("abc"); 22 | // expect(matchGroups[2]).toEqual("xyz"); 23 | // 24 | // Capture groups also work with nested ( ). To keep track of which ( ) 25 | // group goes with which array index, count the open parens ( from left 26 | // to right. 27 | // 28 | // Note: A matching capture may be null (example: (.*) ) 29 | 30 | it('getting the hang of it capture groups', function() { 31 | 32 | const pattern = /^([A-Za-z][\w\-.]*((\+([\w+]+))?))@(([a-z\d]+)((\.([a-z\d]+))+))$/; 33 | 34 | const matches1 = pattern.exec( 'My.Name01+alias@mail.gmail.com' ); 35 | const matches2 = pattern.exec( 'simple@gmail.com' ); 36 | 37 | expect( matches1[0] ).toEqual('My.Name01+alias@mail.gmail.com'); 38 | expect( matches1[1] ).toEqual('My.Name01+alias'); 39 | expect( matches1[4] ).toEqual('alias'); 40 | expect( matches1[5] ).toEqual('mail.gmail.com'); 41 | 42 | expect( matches2[2] ).toEqual(''); 43 | expect( matches2[7] ).toEqual('.com'); // there are two possible answers here 44 | 45 | // Do you recognize this pattern? It's a more complex version of the email 46 | // pattern from the Grouping Koan. What enhancements have been made? 47 | }); 48 | 49 | it('Real World 4: Find the ID of a DIV tag', function() { 50 | // Assumption: 51 | // * Each DIV tag will be of the simple form: 52 | //
Some text may be here
53 | // * The spaces within the tag will be exactly as shown, so
is illegal 54 | // * The id will be the only attribute inside the div tag 55 | // * Only double-quote will be used (") 56 | 57 | const fixThisPattern = /^
Here is my text node!
' ); 60 | 61 | expect( matches[1] ).toEqual("anArbitraryId"); 62 | 63 | }); 64 | 65 | // Important Note: Mathematically speaking, it is not possible for regular 66 | // expressions to exactly capture or match the full range of valid HTML, as 67 | // defined by the W3C specifications. For a more entertaining way of saying 68 | // the same thing, see: 69 | // http://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags 70 | // 71 | // That said, if the expectations and requirements are narrower than "parse 72 | // the whole HTML file", and the goal is a simple one of just finding some 73 | // relevant piece of information within the file, and if it's safe to assume 74 | // that the HTML file is properly formed and syntactically correct... 75 | // THEN you can get away with using a regular expression to get text out of HTML. 76 | 77 | it('Real World 5: Find the class of an arbitrary HTML tag', function() { 78 | // Assumption: 79 | // * The tag may be any tag: DIV, SPAN, H1, TABLE, whatever 80 | // * The tag may be a singleton like 81 | // * The tag may contain other attributes (see example above) 82 | // * The class attribute may be multiple classes, like class="one two". In this case, just capture "one two". 83 | // * The attribute spacing will be just as shown, with no extra spaces around = 84 | // * There will only be one HTML tag in the input string 85 | // * Only double-quote will be used (") 86 | // * The class attribute is guaranteed to exist 87 | 88 | const fixThisPattern = /^<[^>]*class="([^"]+)".*$/; 89 | 90 | const matches1 = fixThisPattern.exec( '
Some text here
' ); 91 | const matches2 = fixThisPattern.exec( ' Error! ' ); 92 | const matches3 = fixThisPattern.exec( '' ); 93 | 94 | expect( matches1[1] ).toEqual("boxed"); 95 | expect( matches2[1] ).toEqual("bold red"); 96 | expect( matches3[1] ).toEqual("framed"); 97 | }); 98 | 99 | it('Captured groups can be referenced within the pattern itself using backslash-n', function() { 100 | 101 | // Using the same counting system that the Pattern.exec() method uses, \1 \2 ... \n will 102 | // reference a group that has been defined within that same pattern. 103 | 104 | const thisPatternWorks = /^([a-z]+)\d+\1$/; 105 | 106 | const fixThisPattern = /^(["|(])[\w\s]+\1$/; 107 | 108 | expect( 'ab12345ab' ).toMatch(thisPatternWorks); 109 | expect( 'a12345x' ).not.toMatch(thisPatternWorks); 110 | 111 | expect( '"Hello there"' ).toMatch(fixThisPattern); 112 | expect( '|some word here|' ).toMatch(fixThisPattern); 113 | expect( '(an expression)' ).not.toMatch(fixThisPattern); 114 | }); 115 | 116 | it('Real World 6: HTML tags may use single- or double-quotes', function() { 117 | // Assumption: 118 | // Same as above, except that attributes may be: 119 | // class="one" 120 | // class='one' 121 | // The attribute will never use a mismatch, like class="one' 122 | 123 | 124 | const fixThisPattern = /^<[^>]*class=(['"])(.+?)\1.*$/; 125 | 126 | const matches1 = fixThisPattern.exec( '
Some text here
' ); 127 | const matches2 = fixThisPattern.exec( " Error! " ); 128 | const matches3 = fixThisPattern.exec( '' ); 129 | 130 | expect( matches1[2] ).toEqual('boxed'); 131 | expect( matches2[2] ).toEqual('bold red'); 132 | expect( matches3[2] ).toEqual('framed'); 133 | }); 134 | 135 | }); 136 | -------------------------------------------------------------------------------- /answers/11_Intermission.js: -------------------------------------------------------------------------------- 1 | describe("Intermission", function() { 2 | 3 | const ___ = 0; 4 | 5 | // Koans 1 through 10 teach enough about Regular Expressions 6 | // to handle most common RegEx scenarios. This Koan is a series 7 | // of puzzles and problems to solve with Regular Expressions 8 | // to help cement your understanding as well as provide some 9 | // more Real World examples. 10 | 11 | // Note: The starter patterns (like /___/) will NOT include 12 | // the ^ $ hints that were used in the earlier Koans. It's up 13 | // to you to decide whether you want to capture the whole 14 | // string or just a substring to solve the puzzle. 15 | 16 | it('Find City, State, and Zip code', function() { 17 | // Assumptions: 18 | // * US Addresses (no Canadian provinces, etc.) 19 | // * Common American address format: City, ST 12345 20 | // * Comma separating City and State is optional 21 | // * US State will always use two uppercase letter abbreviation 22 | 23 | const fixThisPattern = /^\s*([a-zA-Z .]+),?\s+([A-Z]{2})\s+(\d{5}(-\d{4})?)/; 24 | 25 | // in matches1[cityGroup], etc., set the index to find each value 26 | const cityGroup = 1; 27 | const stateGroup = 2; 28 | const zipGroup = 3; 29 | 30 | const matches1 = fixThisPattern.exec(' Columbus, OH 43215' ); 31 | const matches2 = fixThisPattern.exec(' San Francisco, CA 94118-4503' ); 32 | const matches3 = fixThisPattern.exec(' APO AE 09499-0074' ); 33 | const matches4 = fixThisPattern.exec(' Port St. Lucie FL 34952' ); 34 | 35 | // Debug: 36 | // jasmine.log("Debug: " + matches1); 37 | 38 | const city1 = matches1[cityGroup]; 39 | const state1 = matches1[stateGroup]; 40 | const zip1 = matches1[zipGroup]; 41 | 42 | const city2 = matches2[cityGroup]; 43 | const state2 = matches2[stateGroup]; 44 | const zip2 = matches2[zipGroup]; 45 | 46 | const city3 = matches3[cityGroup]; 47 | const state3 = matches3[stateGroup]; 48 | const zip3 = matches3[zipGroup]; 49 | 50 | const city4 = matches4[cityGroup]; 51 | const state4 = matches4[stateGroup]; 52 | const zip4 = matches4[zipGroup]; 53 | 54 | expect( city1 ).toEqual('Columbus'); 55 | expect( state1 ).toEqual('OH'); 56 | expect( zip1 ).toEqual('43215'); 57 | 58 | expect( city2 ).toEqual('San Francisco'); 59 | expect( state2 ).toEqual('CA'); 60 | expect( zip2 ).toEqual('94118-4503'); 61 | 62 | expect( city3 ).toEqual('APO'); 63 | expect( state3 ).toEqual('AE'); 64 | expect( zip3 ).toEqual('09499-0074'); 65 | 66 | expect( city4 ).toEqual('Port St. Lucie'); 67 | expect( state4 ).toEqual('FL'); 68 | expect( zip4 ).toEqual('34952'); 69 | }); 70 | 71 | it('Parse URL, detect invalid format', function() { 72 | // Assumptions: 73 | // * Only accept these protocols: http, https, ftp, sftp, ssh 74 | // * server is required, may be numeric (IPv4) or named 75 | // * server name may only be one word (e.g. 'localhost') 76 | // * port is optional 77 | // * directory and file name are both optional 78 | // * directory, if present, will always end in / 79 | // * GET parameters are optional 80 | // * any missing value will be "" (instead of null) 81 | 82 | const fixThisPattern = /^(https?|s?ftp|ssh):\/\/(\d{1,3}(\.\d{1,3}){3}|[a-z]+(\.[a-z]+)*)((:\d+)?)\/(([^\/?]+\/)*)(([\w.]+)?)((\?.*)?)$/; 83 | 84 | const protocolGroup = 1; 85 | const serverGroup = 2; 86 | const portGroup = 5; 87 | const directoryGroup = 7; 88 | const fileGroup = 9; 89 | const getParamsGroup = 11; 90 | 91 | // valid 92 | const matches1 = fixThisPattern.exec( 'http://www.google.com/' ); 93 | const matches2 = fixThisPattern.exec( 'https://mysearch.com/search.jsp?q=regular+expressions' ); 94 | const matches3 = fixThisPattern.exec( 'ftp://192.168.0.100/home/myself/music/' ); 95 | const matches4 = fixThisPattern.exec( 'ssh://localhost/etc/passwd' ); 96 | const matches5 = fixThisPattern.exec( 'http://127.0.0.1:8080/Admin/index.jsp' ); 97 | 98 | // invalid 99 | const matches6 = fixThisPattern.exec( 'google.com' ); 100 | const matches7 = fixThisPattern.exec( 'gopher://oldserver.arpanet/' ); 101 | 102 | // Debug: 103 | // jasmine.log("Debug: " + matches1.length); 104 | 105 | const protocol1 = matches1.length > protocolGroup ? matches1[protocolGroup] : ""; 106 | const server1 = matches1.length > serverGroup ? matches1[serverGroup] : ""; 107 | const port1 = matches1.length > portGroup ? matches1[portGroup] : ""; 108 | const directory1 = matches1.length > directoryGroup ? matches1[directoryGroup] : ""; 109 | const file1 = matches1.length > fileGroup ? matches1[fileGroup] : ""; 110 | const getParams1 = matches1.length > getParamsGroup ? matches1[getParamsGroup] : ""; 111 | 112 | const protocol2 = matches2.length > protocolGroup ? matches2[protocolGroup] : ""; 113 | const server2 = matches2.length > serverGroup ? matches2[serverGroup] : ""; 114 | const port2 = matches2.length > portGroup ? matches2[portGroup] : ""; 115 | const directory2 = matches2.length > directoryGroup ? matches2[directoryGroup] : ""; 116 | const file2 = matches2.length > fileGroup ? matches2[fileGroup] : ""; 117 | const getParams2 = matches2.length > getParamsGroup ? matches2[getParamsGroup] : ""; 118 | 119 | const protocol3 = matches3.length > protocolGroup ? matches3[protocolGroup] : ""; 120 | const server3 = matches3.length > serverGroup ? matches3[serverGroup] : ""; 121 | const port3 = matches3.length > portGroup ? matches3[portGroup] : ""; 122 | const directory3 = matches3.length > directoryGroup ? matches3[directoryGroup] : ""; 123 | const file3 = matches3.length > fileGroup ? matches3[fileGroup] : ""; 124 | const getParams3 = matches3.length > getParamsGroup ? matches3[getParamsGroup] : ""; 125 | 126 | const protocol4 = matches4.length > protocolGroup ? matches4[protocolGroup] : ""; 127 | const server4 = matches4.length > serverGroup ? matches4[serverGroup] : ""; 128 | const port4 = matches4.length > portGroup ? matches4[portGroup] : ""; 129 | const directory4 = matches4.length > directoryGroup ? matches4[directoryGroup] : ""; 130 | const file4 = matches4.length > fileGroup ? matches4[fileGroup] : ""; 131 | const getParams4 = matches4.length > getParamsGroup ? matches4[getParamsGroup] : ""; 132 | 133 | const protocol5 = matches5.length > protocolGroup ? matches5[protocolGroup] : ""; 134 | const server5 = matches5.length > serverGroup ? matches5[serverGroup] : ""; 135 | const port5 = matches5.length > portGroup ? matches5[portGroup] : ""; 136 | const directory5 = matches5.length > directoryGroup ? matches5[directoryGroup] : ""; 137 | const file5 = matches5.length > fileGroup ? matches5[fileGroup] : ""; 138 | const getParams5 = matches5.length > getParamsGroup ? matches5[getParamsGroup] : ""; 139 | 140 | // http://www.google.com/ 141 | expect( protocol1 ).toEqual('http'); 142 | expect( server1 ).toEqual('www.google.com'); 143 | expect( port1 ).toEqual(''); 144 | expect( directory1 ).toEqual(''); 145 | expect( file1 ).toEqual(''); 146 | expect( getParams1 ).toEqual(''); 147 | 148 | // https://mysearch.com/search.jsp?q=regular+expressions 149 | expect( protocol2 ).toEqual('https'); 150 | expect( server2 ).toEqual('mysearch.com'); 151 | expect( port2 ).toEqual(''); 152 | expect( directory2 ).toEqual(''); 153 | expect( file2 ).toEqual('search.jsp'); 154 | expect( getParams2 ).toEqual('?q=regular+expressions'); 155 | 156 | // ftp://192.168.0.100/home/myself/music/ 157 | expect( protocol3 ).toEqual('ftp'); 158 | expect( server3 ).toEqual('192.168.0.100'); 159 | expect( port3 ).toEqual(''); 160 | expect( directory3 ).toEqual('home/myself/music/'); 161 | expect( file3 ).toEqual(''); 162 | expect( getParams3 ).toEqual(''); 163 | 164 | // ftp://192.168.0.100/home/myself/music/ 165 | expect( protocol4 ).toEqual('ssh'); 166 | expect( server4 ).toEqual('localhost'); 167 | expect( port4 ).toEqual(''); 168 | expect( directory4 ).toEqual('etc/'); 169 | expect( file4 ).toEqual('passwd'); 170 | expect( getParams4 ).toEqual(''); 171 | 172 | // http://127.0.0.1:8080/Admin/index.jsp 173 | expect( protocol5 ).toEqual('http'); 174 | expect( server5 ).toEqual('127.0.0.1'); 175 | expect( port5 ).toEqual(':8080'); 176 | expect( directory5 ).toEqual('Admin/'); 177 | expect( file5 ).toEqual('index.jsp'); 178 | expect( getParams5 ).toEqual(''); 179 | 180 | // google.com 181 | expect( matches6 ).toBeNull(); 182 | 183 | // gopher://oldserver.arpanet/ 184 | expect( matches7 ).toBeNull(); 185 | 186 | }); 187 | 188 | }); 189 | -------------------------------------------------------------------------------- /__tests__/13_LookingAhead.js: -------------------------------------------------------------------------------- 1 | describe("Looking Ahead", function() { 2 | 3 | const ___ = 0; 4 | 5 | // Given all we've learned so far, there are still some surprisingly 6 | // simple patterns that we cannot match with the tools in the 7 | // earlier Koans. For example: 8 | // * Password must be 6-8 characters and must include a capital letter and a number 9 | // * Escape all & in an HTML file (& is a special character and is escaped with &) 10 | // but don't escape any & that is already escaped (should not create any &amp;) 11 | // * Verify that every IMG tag in an HTML file has an alt attribute and add one 12 | // if needed 13 | // 14 | // To solve these problems, a regular expression must be able to, seemingly, 15 | // fast-forward through the string looking for certain criteria, then rewind 16 | // back to an earlier point in order to continue the match. Most modern 17 | // Regular Expression engines can do this. The technical term is: 18 | // Zero Width Positive Lookahead Assertion 19 | // You can also do a Negative Lookahead, for example when evaluating "the string 20 | // must not contain any numbers". 21 | // 22 | // It is a "Zero Width" assertion because the criteria is evaluated, but then the 23 | // regular expression engine returns to the point where this assertion was 24 | // first encountered to continue with the rest of the Regex pattern. You can think 25 | // of it like matching one character at a time against a Regex pattern, then when 26 | // one of these lookahead assertions is encountered, you put your finger on the string 27 | // at that point while you scan ahead to evaluate the lookahead. When the lookahead 28 | // is complete, and it matches, you return to where your finger is and continue 29 | // with the rest of the pattern and string. 30 | // 31 | // If the lookahead does not match, then the engine will stop - the Regex is a 32 | // non-match and it quits right away. 33 | // 34 | // We will solve all the examples given above in Koans below. 35 | 36 | // Some examples to show the syntax 37 | it('match a q that is followed by a u', function() { 38 | const str1 = 'The quick brown fox jumped over the lazy dog'; 39 | const str2 = 'The other brown fox was from Qatar.'; 40 | 41 | const lookaheadPattern = /q(?=u)/i; 42 | 43 | expect( str1 ).toMatch(lookaheadPattern); 44 | expect( str2 ).not.toMatch(lookaheadPattern); 45 | 46 | expect(1).toEqual(2); // remove this line to continue the tutorial 47 | }); 48 | 49 | // The Koan above could easily be matched with a pattern without any lookahead 50 | // features: /qu/ would do the trick. So when are lookahead operations useful? 51 | // Answer: When the string must match more than one criteria and the order of 52 | // appearance doesn't matter. For example, in a typical password validator 53 | // that requires both a letter and a number, but it's okay to have a2 or 2a, 54 | // either is correct. This can still be done without lookahead, but it's much 55 | // harder because you have to write a Regex for "letter-digit OR digit-letter". 56 | // This becomes unusably difficult when you add more criteria: 57 | // "letter-digit-punctuation OR letter-punctuation-digit OR ... 58 | // This is the kind of problem that makes lookahead so powerful. 59 | // 60 | // Lookahead has another valuable use when used with string replacement, which 61 | // we'll get to later. 62 | 63 | it('verify that the string contains at least one uppercase letter and one number', function() { 64 | const str1 = 'abcde'; 65 | const str2 = 'a6cD'; 66 | const str3 = 'Abcd9'; 67 | const str4 = '12345'; 68 | 69 | const fixThisPattern = /___/; 70 | 71 | // Hint: Just like any other Regex element, the lookahead element is position- 72 | // specific. See the "qu" example above: the "u" must be present in exactly 73 | // the position after the "q". To make the lookahead scan ahead farther into 74 | // the string, you'll have to incorporate a .* 75 | 76 | expect( 'abcde' ).not.toMatch(fixThisPattern); 77 | 78 | expect( 'a6cD' ).toMatch(fixThisPattern); 79 | expect( 'Abcd9' ).toMatch(fixThisPattern); 80 | expect( 'X3' ).toMatch(fixThisPattern); 81 | 82 | expect( '' ).not.toMatch(fixThisPattern); 83 | expect( '12345' ).not.toMatch(fixThisPattern); 84 | }); 85 | 86 | it('validate a password', function() { 87 | // Requirements: 88 | // * Must contain at least one lowercase letter 89 | // * Must contain at least one uppercase letter 90 | // * Must contain at least one number 91 | // * Must be between 6 and 16 characters long 92 | // * Any non-whitespace character is allowed 93 | 94 | const fixThisPattern = /___/; 95 | 96 | expect( 'abcXYZ123' ).toMatch(fixThisPattern); 97 | expect( '89ghV.' ).toMatch(fixThisPattern); 98 | expect( 'X0aaaaaaaaaaaaaa' ).toMatch(fixThisPattern); 99 | expect( 'abc123' ).not.toMatch(fixThisPattern); 100 | expect( 'aX5##' ).not.toMatch(fixThisPattern); 101 | expect( 'abc123XXXXXXXXXXX' ).not.toMatch(fixThisPattern); 102 | expect( 'abc123 ZZ' ).not.toMatch(fixThisPattern); 103 | }); 104 | 105 | // You can combine lookahead with group captures to solve more interesting 106 | // problems. This next Koan will require using the negative lookahead: 107 | // (?! ) 108 | // Note: The lookahead ( ) does not count as a capture group, so when you 109 | // count ( ) to get the index of the captured text, you skip the lookahead 110 | // operators. In fact, any ( ) with a ? like (?someRegexStuff) will not be 111 | // remembered as a capture group. 112 | 113 | it('find the id of every IMG tag without an "alt" attribute', function() { 114 | // Assume every IMG tag has an id defined 115 | 116 | const fixThisPattern = /___/; 117 | const idGroupIdx = ___; 118 | 119 | const matches1 = fixThisPattern.exec( '' ); 120 | const matches2 = fixThisPattern.exec( '' ); 121 | const matches3 = fixThisPattern.exec( 'My Pic!' ); 122 | 123 | expect( matches1[idGroupIdx] ).toEqual('my_pic'); 124 | expect( matches2[idGroupIdx] ).toEqual('localPic'); 125 | 126 | expect( matches3 ).toBeNull(); 127 | }); 128 | 129 | it('Escape & in XML/HTML code', function() { 130 | // Assumptions: 131 | // * & is escaped in XML and HTML by entering: & 132 | // * If & is already part of an &...; escape sequence, don't escape it 133 | // * & escape sequences are always &, followed by some number of letters, then ; 134 | 135 | const fixThisPattern = /___/; 136 | const escaped = '___'; 137 | 138 | const str1 = 'Strunk & White'.replace(fixThisPattern, escaped); 139 | const str2 = 'This & is already escaped.'.replace(fixThisPattern, escaped); 140 | const str3 = ''.replace(fixThisPattern, escaped); 141 | const str4 = 'Sample code: if (x < y && x^2 > y^2) { println "x is negative" }'.replace(fixThisPattern, escaped); 142 | 143 | expect( str1 ).toEqual( 'Strunk & White' ); 144 | expect( str2 ).toEqual( 'This & is already escaped.' ); 145 | expect( str3 ).toEqual( '' ); 146 | expect( str4 ).toEqual( 'Sample code: if (x < y && x^2 > y^2) { println "x is negative" }' ); 147 | }); 148 | 149 | it('For every IMG tag, if no alt attribute is present, insert one using the src value', function() { 150 | // Assumptions: 151 | // * Every IMG tag has a "src" attribute with a defined value 152 | // * Some IMG tags may already have "alt" attribute. Do not change these strings. 153 | // * In real HTML, the inserted "alt" could be anywhere in the tag, but in this 154 | // Koan, put it here: some alt value 155 | 156 | // Hint: You will need to use $n references to capture groups to solve this 157 | 158 | const fixThisPattern = /___/; 159 | const replacementString = '___'; 160 | 161 | const str1 = ''.replace(fixThisPattern, replacementString); 162 | const str2 = ' src="Not it!" alt="Tricky!"'.replace(fixThisPattern, replacementString); 163 | const str3 = 'Can\'t touch this!'.replace(fixThisPattern, replacementString); 164 | const str4 = 'Reverse Order'.replace(fixThisPattern, replacementString); 165 | 166 | expect( str1 ).toEqual( 'pic.jpg' ); 167 | expect( str2 ).toEqual( 'trickyPic.jpg src="Not it!" alt="Tricky!"' ); 168 | expect( str3 ).toEqual( 'Can\'t touch this!' ); 169 | expect( str4 ).toEqual( 'Reverse Order' ); 170 | }); 171 | 172 | }); 173 | -------------------------------------------------------------------------------- /answers/13_LookingAhead.js: -------------------------------------------------------------------------------- 1 | describe("Looking Ahead", function() { 2 | 3 | const ___ = 0; 4 | 5 | // Given all we've learned so far, there are still some surprisingly 6 | // simple patterns that we cannot match with the tools in the 7 | // earlier Koans. For example: 8 | // * Password must be 6-8 characters and must include a capital letter and a number 9 | // * Escape all & in an HTML file (& is a special character and is escaped with &) 10 | // but don't escape any & that is already escaped (should not create any &amp;) 11 | // * Verify that every IMG tag in an HTML file has an alt attribute and add one 12 | // if needed 13 | // 14 | // To solve these problems, a regular expression must be able to, seemingly, 15 | // fast-forward through the string looking for certain criteria, then rewind 16 | // back to an earlier point in order to continue the match. Most modern 17 | // Regular Expression engines can do this. The technical term is: 18 | // Zero Width Positive Lookahead Assertion 19 | // You can also do a Negative Lookahead, for example when evaluating "the string 20 | // must not contain any numbers". 21 | // 22 | // It is a "Zero Width" assertion because the criteria is evaluated, but then the 23 | // regular expression engine returns to the point where this assertion was 24 | // first encountered to continue with the rest of the Regex pattern. You can think 25 | // of it like matching one character at a time against a Regex pattern, then when 26 | // one of these lookahead assertions is encountered, you put your finger on the string 27 | // at that point while you scan ahead to evaluate the lookahead. When the lookahead 28 | // is complete, and it matches, you return to where your finger is and continue 29 | // with the rest of the pattern and string. 30 | // 31 | // If the lookahead does not match, then the engine will stop - the Regex is a 32 | // non-match and it quits right away. 33 | // 34 | // We will solve all the examples given above in Koans below. The next Koan will 35 | // exercise another special case, the "Look Behind" version. 36 | 37 | // Some examples to show the syntax 38 | it('match a q that is followed by a u', function() { 39 | const str1 = 'The quick brown fox jumped over the lazy dog'; 40 | const str2 = 'The other brown fox was from Qatar.'; 41 | 42 | const lookaheadPattern = /q(?=u)/i; 43 | 44 | expect( str1 ).toMatch(lookaheadPattern); 45 | expect( str2 ).not.toMatch(lookaheadPattern); 46 | 47 | // expect(1).toEqual(2); // remove this line to continue the tutorial 48 | }); 49 | 50 | // The Koan above could easily be matched with a pattern without any lookahead 51 | // features: /qu/ would do the trick. So when are lookahead operations useful? 52 | // Answer: When the string must match more than one criteria and the order of 53 | // appearance doesn't matter. For example, in a typical password validator 54 | // that requires both a letter and a number, but it's okay to have a2 or 2a, 55 | // either is correct. This can still be done without lookahead, but it's much 56 | // harder because you have to write a Regex for "letter-digit OR digit-letter". 57 | // This becomes unusably difficult when you add more criteria: 58 | // "letter-digit-punctuation OR letter-punctuation-digit OR ... 59 | // This is the kind of problem that makes lookahead so powerful. 60 | // 61 | // Lookahead has another valuable use when used with string replacement, which 62 | // we'll get to later. 63 | 64 | it('verify that the string contains at least one uppercase letter and one number', function() { 65 | const str1 = 'abcde'; 66 | const str2 = 'a6cD'; 67 | const str3 = 'Abcd9'; 68 | const str4 = '12345'; 69 | 70 | const fixThisPattern = /(?=.*[A-Z])(?=.*\d)/; 71 | 72 | // Hint: Just like any other Regex element, the lookahead element is position- 73 | // specific. See the "qu" example above: the "u" must be present in exactly 74 | // the position after the "q". To make the lookahead scan ahead farther into 75 | // the string, you'll have to incorporate a .* 76 | 77 | expect( 'abcde' ).not.toMatch(fixThisPattern); 78 | 79 | expect( 'a6cD' ).toMatch(fixThisPattern); 80 | expect( 'Abcd9' ).toMatch(fixThisPattern); 81 | expect( 'X3' ).toMatch(fixThisPattern); 82 | 83 | expect( '' ).not.toMatch(fixThisPattern); 84 | expect( '12345' ).not.toMatch(fixThisPattern); 85 | }); 86 | 87 | it('validate a password', function() { 88 | // Requirements: 89 | // * Must contain at least one lowercase letter 90 | // * Must contain at least one uppercase letter 91 | // * Must contain at least one number 92 | // * Must be between 6 and 16 characters long 93 | // * Any non-whitespace character is allowed 94 | 95 | const fixThisPattern = /^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)\S{6,16}$/; 96 | 97 | expect( 'abcXYZ123' ).toMatch(fixThisPattern); 98 | expect( '89ghV.' ).toMatch(fixThisPattern); 99 | expect( 'X0aaaaaaaaaaaaaa' ).toMatch(fixThisPattern); 100 | expect( 'abc123' ).not.toMatch(fixThisPattern); 101 | expect( 'aX5##' ).not.toMatch(fixThisPattern); 102 | expect( 'abc123XXXXXXXXXXX' ).not.toMatch(fixThisPattern); 103 | expect( 'abc123 ZZ' ).not.toMatch(fixThisPattern); 104 | }); 105 | 106 | // You can combine lookahead with group captures to solve more interesting 107 | // problems. This next Koan will require using the negative lookahead: 108 | // (?! ) 109 | // Note: The lookahead ( ) does not count as a capture group, so when you 110 | // count ( ) to get the index of the captured text, you skip the lookahead 111 | // operators. 112 | 113 | it('find the id of every IMG tag without an "alt" attribute', function() { 114 | // Assume every IMG tag has an id defined 115 | 116 | const fixThisPattern = /^' ); 120 | const matches2 = fixThisPattern.exec( '' ); 121 | const matches3 = fixThisPattern.exec( 'My Pic!' ); 122 | 123 | expect( matches1[idGroupIdx] ).toEqual('my_pic'); 124 | expect( matches2[idGroupIdx] ).toEqual('localPic'); 125 | 126 | expect( matches3 ).toBeNull(); 127 | }); 128 | 129 | it('Escape & in XML/HTML code', function() { 130 | // Assumptions: 131 | // * & is escaped in XML and HTML by entering: & 132 | // * If & is already part of an &...; escape sequence, don't escape it 133 | // * & escape sequences are always &, followed by some number of letters, then ; 134 | 135 | const fixThisPattern = /&(?![a-z]+;)/g; 136 | const escaped = '&'; 137 | 138 | const str1 = 'Strunk & White'.replace(fixThisPattern, escaped); 139 | const str2 = 'This & is already escaped.'.replace(fixThisPattern, escaped); 140 | const str3 = ''.replace(fixThisPattern, escaped); 141 | const str4 = 'Sample code: if (x < y && x^2 > y^2) { println "x is negative" }'.replace(fixThisPattern, escaped); 142 | 143 | expect( str1 ).toEqual( 'Strunk & White' ); 144 | expect( str2 ).toEqual( 'This & is already escaped.' ); 145 | expect( str3 ).toEqual( '' ); 146 | expect( str4 ).toEqual( 'Sample code: if (x < y && x^2 > y^2) { println "x is negative" }' ); 147 | }); 148 | 149 | it('For every IMG tag, if no alt attribute is present, insert one using the src value', function() { 150 | // Assumptions: 151 | // * Every IMG tag has a "src" attribute with a defined value 152 | // * Some IMG tags may already have "alt" attribute. Do not change these strings. 153 | // * In real HTML, the inserted "alt" could be anywhere in the tag, but in this 154 | // Koan, put it here: some alt value 155 | 156 | // Hint: You will need to use $n references to capture groups to solve this 157 | 158 | const fixThisPattern = /^]*alt="[^"]*")(.*?)(src="([^"]+)")(.*)/; 159 | const replacementString = '$3'.replace(fixThisPattern, replacementString); 164 | const str4 = 'Reverse Order'.replace(fixThisPattern, replacementString); 165 | 166 | expect( str1 ).toEqual( 'pic.jpg' ); 167 | expect( str2 ).toEqual( 'trickyPic.jpg src="Not it!" alt="Tricky!"' ); 168 | expect( str3 ).toEqual( 'Can\'t touch this!' ); 169 | expect( str4 ).toEqual( 'Reverse Order' ); 170 | }); 171 | 172 | }); 173 | -------------------------------------------------------------------------------- /__tests__/11_Intermission.js: -------------------------------------------------------------------------------- 1 | describe("Intermission", function() { 2 | 3 | const ___ = 0; 4 | 5 | // Koans 1 through 10 teach enough about Regular Expressions 6 | // to handle most common RegEx scenarios. This Koan is a series 7 | // of puzzles and problems to solve with Regular Expressions 8 | // to help cement your understanding as well as provide some 9 | // more Real World examples. 10 | 11 | // Note: The starter patterns (like /___/) will NOT include 12 | // the ^ $ hints that were used in the earlier Koans. It's up 13 | // to you to decide whether you want to capture the whole 14 | // string or just a substring to solve the puzzle. 15 | 16 | it('Find City, State, and Zip code', function() { 17 | // Assumptions: 18 | // * US Addresses (no Canadian provinces, etc.) 19 | // * Common American address format: City, ST 12345 20 | // * Comma separating City and State is optional 21 | // * US State will always use two uppercase letter abbreviation 22 | 23 | const fixThisPattern = /___/; 24 | 25 | // in matches1[cityGroup], etc., set the index to find each value 26 | const cityGroup = ___; 27 | const stateGroup = ___; 28 | const zipGroup = ___; 29 | 30 | const matches1 = fixThisPattern.exec(' Columbus, OH 43215' ); 31 | const matches2 = fixThisPattern.exec(' San Francisco, CA 94118-4503' ); 32 | const matches3 = fixThisPattern.exec(' APO AE 09499-0074' ); 33 | const matches4 = fixThisPattern.exec(' Port St. Lucie FL 34952' ); 34 | 35 | const city1 = matches1[cityGroup]; 36 | const state1 = matches1[stateGroup]; 37 | const zip1 = matches1[zipGroup]; 38 | 39 | const city2 = matches2[cityGroup]; 40 | const state2 = matches2[stateGroup]; 41 | const zip2 = matches2[zipGroup]; 42 | 43 | const city3 = matches3[cityGroup]; 44 | const state3 = matches3[stateGroup]; 45 | const zip3 = matches3[zipGroup]; 46 | 47 | const city4 = matches4[cityGroup]; 48 | const state4 = matches4[stateGroup]; 49 | const zip4 = matches4[zipGroup]; 50 | 51 | expect( city1 ).toEqual('Columbus'); 52 | expect( state1 ).toEqual('OH'); 53 | expect( zip1 ).toEqual('43215'); 54 | 55 | expect( city2 ).toEqual('San Francisco'); 56 | expect( state2 ).toEqual('CA'); 57 | expect( zip2 ).toEqual('94118-4503'); 58 | 59 | expect( city3 ).toEqual('APO'); 60 | expect( state3 ).toEqual('AE'); 61 | expect( zip3 ).toEqual('09499-0074'); 62 | 63 | expect( city4 ).toEqual('Port St. Lucie'); 64 | expect( state4 ).toEqual('FL'); 65 | expect( zip4 ).toEqual('34952'); 66 | }); 67 | 68 | it('Parse URL, detect invalid format', function() { 69 | // Assumptions: 70 | // * Only accept these protocols: http, https, ftp, sftp, ssh 71 | // * server is required, can be numeric (IPv4) or named 72 | // * server name might only be one word (e.g. 'localhost') 73 | // * port is optional 74 | // * directory and file name are both optional 75 | // * directory, if present, will always end in / 76 | // * query string (GET parameters) is optional 77 | // * any missing value will be "" (instead of null) 78 | 79 | const fixThisPattern = /^(http)(:)/; 80 | 81 | const protocolGroupIndex = ___; 82 | const serverGroupIndex = ___; 83 | const portGroupIndex = ___; 84 | const directoryGroupIndex = ___; 85 | const fileGroupIndex = ___; 86 | const queryGroupIndex = ___; 87 | 88 | // valid 89 | const matches1 = fixThisPattern.exec( 'http://www.google.com/' ); 90 | const matches2 = fixThisPattern.exec( 'https://mysearch.com/search.jsp?q=regular+expressions' ); 91 | const matches3 = fixThisPattern.exec( 'ftp://192.168.0.100/home/myself/music/' ); 92 | const matches4 = fixThisPattern.exec( 'ssh://localhost/etc/passwd' ); 93 | const matches5 = fixThisPattern.exec( 'http://127.0.0.1:8080/Admin/index.jsp' ); 94 | 95 | // invalid 96 | const matches6 = fixThisPattern.exec( 'google.com' ); 97 | const matches7 = fixThisPattern.exec( 'gopher://oldserver.arpanet/' ); 98 | 99 | // Debug: 100 | // jasmine.log("Debug: " + matches1); 101 | 102 | const protocol1 = matches1.length > protocolGroupIndex ? matches1[protocolGroupIndex] : ""; 103 | const server1 = matches1.length > serverGroupIndex ? matches1[serverGroupIndex] : ""; 104 | const port1 = matches1.length > portGroupIndex ? matches1[portGroupIndex] : ""; 105 | const directory1 = matches1.length > directoryGroupIndex ? matches1[directoryGroupIndex] : ""; 106 | const file1 = matches1.length > fileGroupIndex ? matches1[fileGroupIndex] : ""; 107 | const getParams1 = matches1.length > queryGroupIndex ? matches1[queryGroupIndex] : ""; 108 | 109 | const protocol2 = matches2.length > protocolGroupIndex ? matches2[protocolGroupIndex] : ""; 110 | const server2 = matches2.length > serverGroupIndex ? matches2[serverGroupIndex] : ""; 111 | const port2 = matches2.length > portGroupIndex ? matches2[portGroupIndex] : ""; 112 | const directory2 = matches2.length > directoryGroupIndex ? matches2[directoryGroupIndex] : ""; 113 | const file2 = matches2.length > fileGroupIndex ? matches2[fileGroupIndex] : ""; 114 | const getParams2 = matches2.length > queryGroupIndex ? matches2[queryGroupIndex] : ""; 115 | 116 | const protocol3 = matches3.length > protocolGroupIndex ? matches3[protocolGroupIndex] : ""; 117 | const server3 = matches3.length > serverGroupIndex ? matches3[serverGroupIndex] : ""; 118 | const port3 = matches3.length > portGroupIndex ? matches3[portGroupIndex] : ""; 119 | const directory3 = matches3.length > directoryGroupIndex ? matches3[directoryGroupIndex] : ""; 120 | const file3 = matches3.length > fileGroupIndex ? matches3[fileGroupIndex] : ""; 121 | const getParams3 = matches3.length > queryGroupIndex ? matches3[queryGroupIndex] : ""; 122 | 123 | const protocol4 = matches4.length > protocolGroupIndex ? matches4[protocolGroupIndex] : ""; 124 | const server4 = matches4.length > serverGroupIndex ? matches4[serverGroupIndex] : ""; 125 | const port4 = matches4.length > portGroupIndex ? matches4[portGroupIndex] : ""; 126 | const directory4 = matches4.length > directoryGroupIndex ? matches4[directoryGroupIndex] : ""; 127 | const file4 = matches4.length > fileGroupIndex ? matches4[fileGroupIndex] : ""; 128 | const getParams4 = matches4.length > queryGroupIndex ? matches4[queryGroupIndex] : ""; 129 | 130 | const protocol5 = matches5.length > protocolGroupIndex ? matches5[protocolGroupIndex] : ""; 131 | const server5 = matches5.length > serverGroupIndex ? matches5[serverGroupIndex] : ""; 132 | const port5 = matches5.length > portGroupIndex ? matches5[portGroupIndex] : ""; 133 | const directory5 = matches5.length > directoryGroupIndex ? matches5[directoryGroupIndex] : ""; 134 | const file5 = matches5.length > fileGroupIndex ? matches5[fileGroupIndex] : ""; 135 | const getParams5 = matches5.length > queryGroupIndex ? matches5[queryGroupIndex] : ""; 136 | 137 | // http://www.google.com/ 138 | expect( protocol1 ).toEqual('http'); 139 | expect( server1 ).toEqual('www.google.com'); 140 | expect( port1 ).toEqual(''); 141 | expect( directory1 ).toEqual(''); 142 | expect( file1 ).toEqual(''); 143 | expect( getParams1 ).toEqual(''); 144 | 145 | // https://mysearch.com/search.jsp?q=regular+expressions 146 | expect( protocol2 ).toEqual('https'); 147 | expect( server2 ).toEqual('mysearch.com'); 148 | expect( port2 ).toEqual(''); 149 | expect( directory2 ).toEqual(''); 150 | expect( file2 ).toEqual('search.jsp'); 151 | expect( getParams2 ).toEqual('?q=regular+expressions'); 152 | 153 | // ftp://192.168.0.100/home/myself/music/ 154 | expect( protocol3 ).toEqual('ftp'); 155 | expect( server3 ).toEqual('192.168.0.100'); 156 | expect( port3 ).toEqual(''); 157 | expect( directory3 ).toEqual('home/myself/music/'); 158 | expect( file3 ).toEqual(''); 159 | expect( getParams3 ).toEqual(''); 160 | 161 | // ftp://192.168.0.100/home/myself/music/ 162 | expect( protocol4 ).toEqual('ssh'); 163 | expect( server4 ).toEqual('localhost'); 164 | expect( port4 ).toEqual(''); 165 | expect( directory4 ).toEqual('etc/'); 166 | expect( file4 ).toEqual('passwd'); 167 | expect( getParams4 ).toEqual(''); 168 | 169 | // http://127.0.0.1:8080/Admin/index.jsp 170 | expect( protocol5 ).toEqual('http'); 171 | expect( server5 ).toEqual('127.0.0.1'); 172 | expect( port5 ).toEqual(':8080'); 173 | expect( directory5 ).toEqual('Admin/'); 174 | expect( file5 ).toEqual('index.jsp'); 175 | expect( getParams5 ).toEqual(''); 176 | 177 | // google.com 178 | expect( matches6 ).toBeNull(); 179 | 180 | // gopher://oldserver.arpanet/ 181 | expect( matches7 ).toBeNull(); 182 | 183 | }); 184 | 185 | }); 186 | --------------------------------------------------------------------------------