└── Drill07.java /Drill07.java: -------------------------------------------------------------------------------- 1 | import java.util.HashSet; 2 | import java.util.Set; 3 | import java.util.regex.Matcher; 4 | import java.util.regex.Pattern; 5 | 6 | public class Drill07 { 7 | 8 | /* 9 | * Takes in a string of the HTML source for a webPage. 10 | * Returns a Set containing all of the valid wiki link 11 | * titles found in the HTML source. 12 | * 13 | * In order for a link to be a valid wikiLink for our case, it must 14 | * match the pattern: 15 | * 16 | * and NOT contain the character '#' nor ':'. From the above match, you 17 | * would then extract the link name which in this case is: 18 | * Marine_mammal 19 | * Refer to the testcases for more examples. 20 | * 21 | * The fact that the input to this parameter is HTML is largely 22 | * irrelevant to this function. It is just a string processing function. 23 | * You take in a string, and need to search for matches to a specific 24 | * pattern in that string. We will go through a brief description of HTML 25 | * in class this week if you are curious. 26 | * 27 | * Your first job is to pass all of the tests. This means you either have 28 | * a functionally correct algorithm, or are close to one. However, performance 29 | * and efficiency will be very important for this PA. After finding a functionally 30 | * correct algorithm, spend time designing other approaches to see if 31 | * you can determine a more efficient approach. It will pay off when 32 | * writing the PA! i.e. do not do anything inefficient, for 33 | * instance, you should only go through the string once. 34 | */ 35 | public static Set findWikiLinks(String html) { 36 | Set result = new HashSet(); 37 | String regex = ""; 38 | Pattern pattern = Pattern.compile(regex); 39 | Matcher matcher = pattern.matcher(html); 40 | while (matcher.find()) { 41 | String name = matcher.group(1); 42 | if (name.contains(Character.toString('"'))) { 43 | int end = name.indexOf('"'); 44 | name = name.substring(0, end); 45 | } 46 | if (!(name.contains(":") || name.contains("#"))) { 47 | result.add(name); 48 | } 49 | } 50 | return result; 51 | } 52 | 53 | } 54 | --------------------------------------------------------------------------------