├── docs ├── bg.jpg ├── pat.jpg ├── tutorial3.html └── tutorial4.html ├── README.md └── com └── stevesoft └── pat ├── patInf.java ├── NotImplementedError.java ├── SpecialRule.java ├── PatternSub.java ├── UniValidator.java ├── PopRule.java ├── BasicStringBufferLike.java ├── MultiMin.java ├── CodeRule.java ├── AmpersandRule.java ├── NullRule.java ├── DirFileRegex.java ├── SubMark.java ├── LeftRule.java ├── NoPattern.java ├── NonDirFileRegex.java ├── RegSyntax.java ├── WantMoreTextReplaceRule.java ├── RightRule.java ├── RuleHolder.java ├── NullPattern.java ├── StringRule.java ├── BackG.java ├── apps ├── ShutDown.java ├── ColorLine.java ├── Deck.java ├── ReGap.java ├── Message.java ├── ColorText.java ├── TestGroup.java ├── Grep.java └── guigrep.java ├── CustomEndpoint.java ├── BackRefRule.java ├── PushRule.java ├── wrap ├── StringBufferWrap.java ├── StringWrap.java ├── CharArrayBufferWrap.java ├── WriterWrap.java ├── CharArrayWrap.java └── RandomAccessFileWrap.java ├── Any.java ├── ChangeRule.java ├── Backup.java ├── RBuffer.java ├── Start.java ├── Skipped.java ├── BackMatch.java ├── RegSyntaxError.java ├── TransPat.java ├── Pthings.java ├── Skip2.java ├── End.java ├── Custom.java ├── Group.java ├── StringLike.java ├── Bracket.java ├── lookAhead.java ├── Rthings.java ├── oneChar.java ├── OrMark.java ├── Boundary.java ├── StringBufferLike.java ├── Prop.java ├── Range.java ├── Multi.java ├── Validator.java ├── PartialBuffer.java ├── Or.java ├── patInt.java ├── Multi_stage2.java ├── FastMulti.java ├── StrPos.java ├── DotMulti.java ├── RegexTokenizer.java ├── Skip.java ├── Ctrl.java ├── Transformer.java ├── FastBracket.java ├── RegexWriter.java ├── Pattern.java ├── RegRes.java ├── RegexReader.java ├── SkipBMH.java └── FileRegex.java /docs/bg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenrbrandt/pat-regex/master/docs/bg.jpg -------------------------------------------------------------------------------- /docs/pat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stevenrbrandt/pat-regex/master/docs/pat.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Index of Documentation 2 | 3 | * [Quick Start](http://htmlpreview.github.io/?https://github.com/stevenrbrandt/pat-regex/blob/master/docs/tutorial.html) 4 | * [Tutorial](http://htmlpreview.github.io/?https://github.com/stevenrbrandt/pat-regex/blob/master/docs/tutorial.html) 5 | -------------------------------------------------------------------------------- /com/stevesoft/pat/patInf.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | final class patInf extends patInt { 11 | patInf() { inf = true; } 12 | }; 13 | -------------------------------------------------------------------------------- /com/stevesoft/pat/NotImplementedError.java: -------------------------------------------------------------------------------- 1 | package// 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | com.stevesoft.pat; 9 | 10 | public class NotImplementedError extends Error { 11 | public NotImplementedError(String s) { 12 | super(s); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /com/stevesoft/pat/SpecialRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | public class SpecialRule extends ReplaceRule { 11 | public SpecialRule() {} 12 | public void apply(StringBufferLike sb,RegRes rr) {} 13 | } 14 | -------------------------------------------------------------------------------- /com/stevesoft/pat/PatternSub.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This is simply a subclass of pattern that 11 | * contains a sub pattern. 12 | */ 13 | abstract class PatternSub extends Pattern { 14 | Pattern sub; 15 | } 16 | -------------------------------------------------------------------------------- /com/stevesoft/pat/UniValidator.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This provides a base class for all the Unicode character 11 | * matching rules. 12 | */ 13 | class UniValidator extends Validator { 14 | public patInt minChars() { return new patInt(1); } 15 | public patInt maxChars() { return new patInt(1); } 16 | } 17 | -------------------------------------------------------------------------------- /com/stevesoft/pat/PopRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** Implements substitution rule $POP. See the example 11 | * file trans3.html. 12 | */ 13 | public class PopRule extends SpecialRule { 14 | public PopRule() {} 15 | public String toString1() { return "${POP}"; } 16 | } 17 | -------------------------------------------------------------------------------- /com/stevesoft/pat/BasicStringBufferLike.java: -------------------------------------------------------------------------------- 1 | package// 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | com.stevesoft.pat; 9 | 10 | /** An abstraction of the StringBuffer which only 11 | implements a subset of StringBuffer's methods. 12 | */ 13 | public interface BasicStringBufferLike { 14 | public void append(char c); 15 | public void append(String s); 16 | public StringLike toStringLike(); 17 | public Object unwrap(); 18 | } 19 | -------------------------------------------------------------------------------- /com/stevesoft/pat/MultiMin.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This extension of multi is the version of multi 11 | which wants to match the fewest number of characters. 12 | It implements the *? type of syntax. */ 13 | class MultiMin extends Multi { 14 | MultiMin(patInt i1,patInt i2,Pattern p) throws RegSyntax { 15 | super(i1,i2,p); 16 | matchFewest = true; 17 | } 18 | }; 19 | -------------------------------------------------------------------------------- /com/stevesoft/pat/CodeRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** Implements the rules for \U, \L, \E, \Q in substitutions. */ 11 | public final class CodeRule extends SpecialRule { 12 | char c = 'E'; 13 | public CodeRule() {} 14 | public CodeRule(char c) { 15 | this.c = c; 16 | } 17 | public void apply(StringBufferLike sb,RegRes res) { 18 | sb.setMode(c); 19 | } 20 | public String toString1() { return "\\"+c; } 21 | } 22 | -------------------------------------------------------------------------------- /com/stevesoft/pat/AmpersandRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This implements the $& element of the second argument to 11 | * Regex. 12 | * @see com.stevesoft.pat.ReplaceRule 13 | */ 14 | public final class AmpersandRule extends ReplaceRule { 15 | public AmpersandRule() {} 16 | public void apply(StringBufferLike sb,RegRes res) { 17 | sb.append(res.stringMatched()); 18 | } 19 | public String toString1() { return "$&"; } 20 | } 21 | -------------------------------------------------------------------------------- /com/stevesoft/pat/NullRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** The apply(StringBufferLike sb,RegRes res) method of this derivation 11 | of ReplaceRule appends nothing to the contents of the StringBuffer sb. 12 | @see com.stevesoft.pat.ReplaceRule 13 | */ 14 | public class NullRule extends ReplaceRule { 15 | public NullRule() {} 16 | public void apply(StringBufferLike sb,RegRes res) { 17 | } 18 | public String toString1() { return ""; } 19 | } 20 | -------------------------------------------------------------------------------- /com/stevesoft/pat/DirFileRegex.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.io.File; 10 | 11 | /** This class is just like FileRegex, except that its accept method 12 | only returns true if the file matching the pattern is a directory.*/ 13 | public class DirFileRegex extends FileRegex { 14 | public DirFileRegex() { dirflag = DIR; } 15 | public DirFileRegex(String fp) { super(fp); dirflag = DIR; } 16 | public static String[] list(String f) { 17 | return list(f,DIR); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /com/stevesoft/pat/SubMark.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** OrMark and SubMark together implement ( ... ) */ 11 | class SubMark extends Pattern { 12 | int end_pos,start_pos; 13 | OrMark om; 14 | public String toString() { return ""; } 15 | public int matchInternal(int i,Pthings pt) { 16 | pt.marks[om.id+pt.nMarks] = i; 17 | int ret=nextMatch(i,pt); 18 | if(ret < 0) 19 | pt.marks[om.id+pt.nMarks] = -1; 20 | return ret; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /com/stevesoft/pat/LeftRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** The apply(StringBufferLike sb,RegRes res) method of this derivation 11 | of ReplaceRule appends the contents of res.left() to the StringBuffer 12 | sb. 13 | @see com.stevesoft.pat.ReplaceRule 14 | */ 15 | public class LeftRule extends ReplaceRule { 16 | public LeftRule() {} 17 | public void apply(StringBufferLike sb,RegRes res) { 18 | sb.append(res.left()); 19 | } 20 | public String toString1() { return "$`"; } 21 | } 22 | -------------------------------------------------------------------------------- /com/stevesoft/pat/NoPattern.java: -------------------------------------------------------------------------------- 1 | package// 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | com.stevesoft.pat; 9 | import java.util.*; 10 | 11 | /** The idea behind this class is simply to eliminate the need for 12 | * testing to see if Regex.thePattern is null. Every instruction 13 | * we can eliminate from _search will help. 14 | */ 15 | public class NoPattern extends Pattern { 16 | public String toString() { return "(?e=#)[^#d#D]"; } 17 | public int matchInternal(int i,Pthings p) { return -1; } 18 | Pattern clone1(Hashtable h) { return new NoPattern(); } 19 | } 20 | -------------------------------------------------------------------------------- /com/stevesoft/pat/NonDirFileRegex.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.io.File; 10 | 11 | /** This class is just like FileRegex, except that its accept method 12 | only returns true if the file matching the pattern is not a directory.*/ 13 | public class NonDirFileRegex extends FileRegex { 14 | public NonDirFileRegex() { dirflag = NONDIR; } 15 | public NonDirFileRegex(String fp) { super(fp); dirflag = NONDIR; } 16 | public static String[] list(String f) { 17 | return list(f,NONDIR); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /com/stevesoft/pat/RegSyntax.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /* 11 | Shareware: package pat 12 | Copyright 2001, Steven R. Brandt 13 | */ 14 | /** 15 | This type of syntax error is thrown whenever a syntax error 16 | is encountered in the pattern. It may not be caught directly, as 17 | it is not in the throws clause of any method. 18 | */ 19 | public class RegSyntax extends Exception { 20 | RegSyntax() {} 21 | RegSyntax(String msg) { 22 | super(msg); 23 | } 24 | }; 25 | -------------------------------------------------------------------------------- /com/stevesoft/pat/WantMoreTextReplaceRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** Triggers the reading of another line of text, allowing a longer 11 | pattern to match -- for details see 12 | WantMore.java. 13 | */ 14 | public class WantMoreTextReplaceRule extends SpecialRule { 15 | public WantMoreTextReplaceRule() {} 16 | public void apply(StringBufferLike sb,RegRes res) { 17 | } 18 | public String toString1() { return "${WANT_MORE_TEXT}"; } 19 | } 20 | -------------------------------------------------------------------------------- /com/stevesoft/pat/RightRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** The apply(StringBufferLike sb,RegRes res) method of this derivation 11 | of ReplaceRule appends the contents of res.right() to the StringBufferLike sb. 12 | @see com.stevesoft.pat.ReplaceRule 13 | */ 14 | public class RightRule extends ReplaceRule { 15 | public RightRule() {} 16 | public void apply(StringBufferLike sb,RegRes res) { 17 | sb.append(res.right()); 18 | } 19 | public String toString1() { 20 | return "$'"; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /com/stevesoft/pat/RuleHolder.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | /** This class is used internally. */ 10 | class RuleHolder extends ReplaceRule { 11 | ReplaceRule held = null; 12 | RuleHolder() {} 13 | RuleHolder(ReplaceRule h) { held = h; } 14 | public Object clone1() { return new RuleHolder(held); } 15 | public String toString1() { return held.toString1(); } 16 | public void apply(StringBufferLike sb,RegRes rr) { 17 | held.apply(sb,rr); 18 | } 19 | public ReplaceRule arg(String s) { return new RuleHolder(held.arg(s)); } 20 | } 21 | -------------------------------------------------------------------------------- /com/stevesoft/pat/NullPattern.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** This pattern matches nothing -- it is found in patterns 12 | * like (hello|world|) where a zero-length subelement occurs. 13 | */ 14 | class NullPattern extends Pattern { 15 | public String toString() { return nextString(); } 16 | public int matchInternal(int p,Pthings pt) { 17 | return nextMatch(p,pt); 18 | } 19 | public patInt maxChars() { return new patInt(0); } 20 | Pattern clone1(Hashtable h) { return new NullPattern(); } 21 | } 22 | -------------------------------------------------------------------------------- /com/stevesoft/pat/StringRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** The apply method of this ReplaceRule simply appends the text 11 | it was initialized with to the StringBufferLike. 12 | @see com.stevesoft.pat.ReplaceRule 13 | */ 14 | public class StringRule extends ReplaceRule { 15 | String s; 16 | public StringRule(String s) { this.s = s; } 17 | public void apply(StringBufferLike sb,RegRes res) { 18 | sb.append(s); 19 | } 20 | public String toString1() { return s; } 21 | public Object clone1() { return new StringRule(s); } 22 | } 23 | -------------------------------------------------------------------------------- /com/stevesoft/pat/BackG.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** This class represents the \G pattern element. */ 12 | class BackG extends Pattern { 13 | char c,altc,altc2; 14 | int mask; 15 | public BackG() { 16 | } 17 | public int matchInternal(int pos,Pthings pt) { 18 | return pos==pt.lastPos ? nextMatch(pos,pt) : -1; 19 | } 20 | public String toString() { 21 | return "\\G"+nextString(); 22 | } 23 | public patInt minChars() { return new patInt(1); } 24 | public patInt maxChars() { return new patInt(1); } 25 | Pattern clone1(Hashtable h) { return new BackG(); } 26 | } 27 | -------------------------------------------------------------------------------- /com/stevesoft/pat/apps/ShutDown.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat.apps; 9 | 10 | import java.awt.*; 11 | import java.awt.event.*; 12 | 13 | /** Just a little class to listen to the windowClosing event 14 | and call System.exit(0); */ 15 | public class ShutDown implements WindowListener { 16 | public void windowActivated(WindowEvent we) {} 17 | public void windowDeactivated(WindowEvent we) {} 18 | public void windowIconified(WindowEvent we) {} 19 | public void windowDeiconified(WindowEvent we) {} 20 | public void windowClosing(WindowEvent we) { System.exit(0); } 21 | public void windowClosed(WindowEvent we) {} 22 | public void windowOpened(WindowEvent we) {} 23 | } 24 | -------------------------------------------------------------------------------- /com/stevesoft/pat/CustomEndpoint.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** This class is needed internally to make backtracking work 12 | * correctly in user-defined patterns. 13 | */ 14 | class CustomEndpoint extends Pattern { 15 | Custom c; 16 | CustomEndpoint(Custom cm) { c = cm; } 17 | public int matchInternal(int pos,Pthings pt) { 18 | int npos = c.v.validate(pt.src,c.start,pos); 19 | if(npos >= 0) 20 | return nextMatch(npos,pt); 21 | return -1; 22 | } 23 | public String toString() { return ""; } 24 | Pattern clone1(Hashtable h) { 25 | return new CustomEndpoint((Custom)c.clone(h)); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /com/stevesoft/pat/BackRefRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This method implements the pattern elements $1, $2, etc in 11 | a substitution rule. The apply(StringBufferLike sb,RegRes rr) method of this ReplaceRule 12 | simply appends the contents of rr.stringMatched(n), where n is 13 | the integer supplied to the constructor. */ 14 | public class BackRefRule extends ReplaceRule { 15 | int n; 16 | public BackRefRule(int n) { this.n = n; } 17 | public void apply(StringBufferLike sb,RegRes res) { 18 | String x = res.stringMatched(n); 19 | sb.append(x == null ? "" : x); 20 | } 21 | public String toString1() { return "$"+n; } 22 | public Object clone1() { return new BackRefRule(n); } 23 | } 24 | -------------------------------------------------------------------------------- /com/stevesoft/pat/PushRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** See the example file 11 | trans3.java for 12 | further examples of how this is used. You will probably not 13 | want to call it directly. */ 14 | public class PushRule extends SpecialRule { 15 | Regex NewRule; 16 | public PushRule(PushRule p) { NewRule = p.NewRule; } 17 | public PushRule(String nm,Regex rr) { name=nm; NewRule = rr; } 18 | public PushRule(String nm,Transformer tr) { name = nm; NewRule = tr.rp; } 19 | public Object clone1() { return new PushRule(this); } 20 | public String String1() { return "${+"+name+"}"; } 21 | public void apply(StringBufferLike sbl,RegRes rr) {} 22 | } 23 | -------------------------------------------------------------------------------- /com/stevesoft/pat/wrap/StringBufferWrap.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat.wrap; 9 | 10 | import com.stevesoft.pat.*; 11 | 12 | /** This provides a wrapper for StringBuffer to 13 | capture the output of a replacement. */ 14 | public class StringBufferWrap 15 | implements BasicStringBufferLike 16 | { 17 | StringBuffer sb = new StringBuffer(); 18 | public void append(char c) { 19 | sb.append(c); 20 | } 21 | public void append(String s) { 22 | sb.append(s); 23 | } 24 | public int length() { 25 | return sb.length(); 26 | } 27 | public String toString() { 28 | return sb.toString(); 29 | } 30 | public StringLike toStringLike() { 31 | return new StringWrap(sb.toString()); 32 | } 33 | public Object unwrap() { 34 | return sb; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Any.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** This is the '.' character in a Pattern. It 12 | matches any character. */ 13 | class Any extends Pattern { 14 | public int matchInternal(int pos,Pthings pt) { 15 | if(pos < pt.src.length()) 16 | if(pt.dotDoesntMatchCR) { 17 | if(pt.src.charAt(pos) != '\n') 18 | return nextMatch(pos+1,pt); 19 | } else return nextMatch(pos+1,pt); 20 | return -1; 21 | } 22 | public String toString() { 23 | return "."+nextString(); 24 | } 25 | public patInt minChars() { return new patInt(1); } 26 | public patInt maxChars() { return new patInt(1); } 27 | public Pattern clone1(Hashtable h) { return new Any(); } 28 | }; 29 | -------------------------------------------------------------------------------- /com/stevesoft/pat/ChangeRule.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This class implements user defined special replacement rules 11 | * of the form ${=name}. See trans2.java 12 | * and trans2a.java. 13 | */ 14 | public class ChangeRule extends SpecialRule { 15 | Regex NewRule; 16 | public ChangeRule(ChangeRule c) { NewRule=c.NewRule; } 17 | public ChangeRule(String nm,Regex rr) { name=nm; NewRule = rr; } 18 | public ChangeRule(String nm,Transformer tr) { name=nm; NewRule = tr.rp; } 19 | public Object clone1() { return new ChangeRule(this); } 20 | public String toString1() { return "${="+name+"}"; } 21 | public void apply(StringBufferLike sb,RegRes rr) {} 22 | } 23 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Backup.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** Implements the (?<number) Pattern, where number is 12 | an integer telling us how far to back up in the Pattern. 13 | Not in perl 5. It also allows (?>number). */ 14 | class Backup extends Pattern { 15 | int bk; 16 | Backup(int ii) { bk = ii; } 17 | public String toString() { 18 | return "(?" + (bk < 0 ? ">" + (-bk) : "<" + bk) + ")" + nextString(); 19 | } 20 | public int matchInternal(int pos,Pthings pt) { 21 | if(pos < bk) return -1; 22 | return nextMatch(pos-bk,pt); 23 | } 24 | public patInt minChars() { return new patInt(-bk); } 25 | public patInt maxChars() { return new patInt(-bk); } 26 | public Pattern clone1(Hashtable h) { return new Backup(bk); } 27 | }; 28 | -------------------------------------------------------------------------------- /com/stevesoft/pat/RBuffer.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | import java.io.*; 11 | import com.stevesoft.pat.wrap.*; 12 | 13 | /** This class is used internally by RegexReader to 14 | store blocks of data. */ 15 | class RBuffer { 16 | boolean done = false; 17 | StringBuffer sb; 18 | int pos,epos; 19 | RBuffer next; 20 | RBuffer() {} 21 | RBuffer(StringBuffer sb) { this.sb = sb; } 22 | public String toString() { 23 | return "sb="+sb.toString().replace('\n',' ')+ 24 | " pos="+pos+" epos="+epos+ 25 | " sb.length()="+sb.length()+ 26 | "\n"+sp(pos+3)+"^"+sp(epos-pos-1)+"^"; 27 | } 28 | String sp(int n) { 29 | if(n<=0) 30 | return ""; 31 | StringBuffer sb = new StringBuffer(n); 32 | for(int i=0;i 0 && pt.src.charAt(pos-1)=='\n') 19 | return nextMatch(pos,pt); 20 | if(pos == 0) return nextMatch(pos,pt); 21 | return -1; 22 | } 23 | public String toString() { 24 | if(retIsStart) 25 | return "^"+nextString(); 26 | else 27 | return "\\A"+nextString(); 28 | } 29 | public patInt maxChars() { return new patInt(0); } 30 | Pattern clone1(Hashtable h) { return new Start(retIsStart); } 31 | }; 32 | -------------------------------------------------------------------------------- /com/stevesoft/pat/wrap/StringWrap.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat.wrap; 9 | 10 | import com.stevesoft.pat.*; 11 | 12 | /** A basic wrapper for the String object. Regex does 13 | not search String directly any longer, it searches StringLike. */ 14 | public class StringWrap implements StringLike { 15 | String s; 16 | public StringWrap(String s) { 17 | this.s = s; 18 | } 19 | public String toString() { return s; } 20 | public char charAt(int i) { return s.charAt(i); } 21 | public int length() { return s.length(); } 22 | public String substring(int i1,int i2) { 23 | return s.substring(i1,i2); 24 | } 25 | public Object unwrap() { return s; } 26 | public BasicStringBufferLike newStringBufferLike() { 27 | return new StringBufferWrap(); 28 | } 29 | 30 | public int indexOf(char c) { 31 | return s.indexOf(c); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /com/stevesoft/pat/wrap/CharArrayBufferWrap.java: -------------------------------------------------------------------------------- 1 | package// 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | com.stevesoft.pat.wrap; 9 | 10 | import com.stevesoft.pat.*; 11 | 12 | /** Allows the outcome of a replaceAll() or replaceFirst() 13 | to be an array of characters rather than a String. 14 | */ 15 | public class CharArrayBufferWrap 16 | implements BasicStringBufferLike 17 | { 18 | StringBuffer sb = new StringBuffer(); 19 | public void append(char c) { 20 | sb.append(c); 21 | } 22 | public void append(String s) { 23 | sb.append(s); 24 | } 25 | public StringLike toStringLike() { 26 | char[] ca = new char[sb.length()]; 27 | for(int i=0;ip.src.length()) return -1; 23 | int ns = p.src.length()-pos; 24 | if(imax < ns) ns = imax; 25 | for(int i=0;i= 0) { 33 | pn = i; 34 | return r; 35 | } 36 | } 37 | pn = -1; 38 | return -1; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Pthings.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.*; 10 | 11 | /** 12 | Things that need to be kept track of during a 13 | match. Passed along with Pattern.matchInternal. */ 14 | public class Pthings { 15 | /** The current text we are attempting to match. */ 16 | public StringLike src; 17 | /** Whether we should ignore the case of letters in 18 | this match. */ 19 | public boolean ignoreCase; 20 | public boolean mFlag; 21 | /** The mask to use when dontMatchInQuotes is set. */ 22 | public BitSet cbits; 23 | /** Used to keep track of backreferences. */ 24 | //public Hashtable marks; 25 | public int[] marks; 26 | public int nMarks; 27 | /** Used to set the behavior of "." By default, it 28 | now fails to match the '\n' character. */ 29 | public boolean dotDoesntMatchCR; 30 | /** Determine if Skipped strings need to be checked. */ 31 | public boolean no_check; 32 | int lastPos; 33 | } 34 | -------------------------------------------------------------------------------- /com/stevesoft/pat/wrap/WriterWrap.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat.wrap; 9 | 10 | import com.stevesoft.pat.*; 11 | import java.io.*; 12 | 13 | /** Allows the outcome of a replaceAll() or replaceFirst() 14 | to be directed to a Writer rather than a String. 15 |

16 | The method toStringLike() cannot work, however. 17 | This means that the return value of replaceAll() will 18 | be null if this Object is used as the StringBufferLike.*/ 19 | public class WriterWrap 20 | implements BasicStringBufferLike 21 | { 22 | Writer w; 23 | public WriterWrap(Writer w) { 24 | this.w = w; 25 | } 26 | public void append(char c) { 27 | try { 28 | w.write((int)c); 29 | } catch(IOException ioe) {} 30 | } 31 | public void append(String s) { 32 | try { 33 | w.write(s); 34 | } catch(IOException ioe) {} 35 | } 36 | 37 | /** This operation can't really be done. */ 38 | public StringLike toStringLike() { 39 | return null; 40 | } 41 | 42 | public Object unwrap() { 43 | return w; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /com/stevesoft/pat/wrap/CharArrayWrap.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat.wrap; 9 | 10 | import com.stevesoft.pat.*; 11 | 12 | /** This provides a wrapper for a char array so that 13 | it can be searched by Regex. */ 14 | public class CharArrayWrap implements StringLike { 15 | char[] ca; 16 | public char[] getCharArray() { return ca; } 17 | public CharArrayWrap(char[] ca) { this.ca = ca; } 18 | public String toString() { 19 | return new String(ca); 20 | } 21 | public char charAt(int i) { return ca[i]; } 22 | public int length() { return ca.length; } 23 | public String substring(int i1,int i2) { 24 | StringBuffer sb = new StringBuffer(); 25 | for(int i=i1;i end) return -1; 28 | start += offset; 29 | int vend = min(s.length()-2,end+offset); 30 | for(int i=start;i<=vend;i++) 31 | if(0 == (s.charAt(i)&mask) && 0 == (s.charAt(i+1)&mask1)) { 32 | //if(m1||s.regionMatches(ign,i,src,0,src.length()) ) 33 | if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) ) 34 | return i-offset; 35 | } 36 | return -1; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /com/stevesoft/pat/End.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | //class AddToEnd extends RegSyntax {}; 12 | 13 | /** Compiles the '$' or the '\Z' Pattern. It is 14 | an error to have further Pattern elements after 15 | '\Z'. It is the end of the String. */ 16 | class End extends Pattern { 17 | boolean retIsEnd; 18 | End(boolean b) { retIsEnd = b; } 19 | public int matchInternal(int pos,Pthings pt) { 20 | if(retIsEnd && pt.mFlag && pos < pt.src.length()) { 21 | if(pt.src.charAt(pos)=='\n') { 22 | return nextMatch(pos,pt); 23 | } 24 | } 25 | if(pt.src.length() == pos) 26 | return nextMatch(pos,pt); 27 | else if(posderiv2.java 13 | and deriv3.java 14 | in the test directory. 15 | @see com.stevesoft.pat.CustomEndpoint 16 | */ 17 | class Custom extends PatternSub { 18 | String select; 19 | Validator v; 20 | int start; 21 | Custom(String s) { 22 | select = s; 23 | v = (Validator)Regex.validators.get(s); 24 | } 25 | public int matchInternal(int pos,Pthings pt) { 26 | start = pos; 27 | return sub.matchInternal(pos,pt); 28 | } 29 | public String toString() { 30 | String a = v.argsave == null ? "" : ":"+v.argsave; 31 | return "(??"+select+a+")"+nextString(); 32 | } 33 | public patInt minChars() { return v.minChars(); } 34 | public patInt maxChars() { return v.maxChars(); } 35 | Pattern clone1(Hashtable h) { 36 | Custom c = new Custom(select); 37 | h.put(c,c); 38 | h.put(this,c); 39 | c.sub = sub.clone(h); 40 | return c; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Group.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** This class implements the (?@<>) syntax that matches 12 | a balanced parenthesis. Not in perl 5. */ 13 | class Group extends Pattern { 14 | char op,cl; 15 | Group(char opi,char cli) { 16 | op = opi; 17 | cl = cli; 18 | } 19 | public int matchInternal(int pos,Pthings pt) { 20 | int i,count=1; 21 | if(pos < pt.src.length()) 22 | if(!Masked(pos,pt) && pt.src.charAt(pos) != op) 23 | return -1; 24 | for(i=pos+1;i 13 |

  • The StringLike object will not change. Calls to 14 | charAt(int) will not vary with time. 15 |
  • The length of the object being searched is known 16 | before the search begins and does not vary with time. 17 | 18 | Note that searching String is probably faster than searching 19 | other objects, so searching String is still preferred if 20 | possible. 21 | */ 22 | public interface StringLike { 23 | public char charAt(int i); 24 | public String toString(); 25 | public int length(); 26 | public String substring(int i1,int i2); 27 | /** Obtain the underlying object, be it a String, char[], 28 | RandomAccessFile, whatever. */ 29 | public Object unwrap(); 30 | /** By default, the result is put in a String or char[] 31 | when a replace is done. If you wish to save the result 32 | in some other StringBufferLike then you can do this 33 | by implementing this method, or over-riding it's behavior 34 | from an existing class. */ 35 | public BasicStringBufferLike newStringBufferLike(); 36 | public int indexOf(char c); 37 | } 38 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Bracket.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Vector; 10 | import java.util.Hashtable; 11 | 12 | /** The Bracket is a form of the Or class, 13 | implements the pattern element [ ]. */ 14 | class Bracket extends Or { 15 | boolean neg; 16 | Bracket(boolean n) { neg = n; } 17 | String leftForm() { 18 | if(neg) 19 | return "[^"; 20 | else 21 | return "["; 22 | } 23 | String rightForm() { return "]"; } 24 | String sepForm() { return ""; } 25 | public int matchInternal(int pos,Pthings pt) { 26 | if(pos >= pt.src.length()) return -1; 27 | int r = super.matchInternal(pos,pt); 28 | if((neg && r<0)||(!neg && r>=0)) 29 | return nextMatch(pos+1,pt); 30 | return -1; 31 | } 32 | public patInt minChars() { return new patInt(1); } 33 | public patInt maxChars() { return new patInt(1); } 34 | 35 | public Or addOr(Pattern p) { 36 | pv = null; 37 | v.addElement(p); 38 | p.setParent(null); 39 | return this; 40 | } 41 | public Pattern clone1(Hashtable h) { 42 | Bracket b = new Bracket(neg); 43 | b.v = new Vector(); 44 | for(int i=0;i= 0) { 23 | if(reverse) return -1; 24 | else return nextMatch(pos,pt); 25 | } else { 26 | if(reverse) return nextMatch(pos,pt); 27 | else return -1; 28 | } 29 | } 30 | String leftForm() { 31 | if(reverse) 32 | return "(?!"; 33 | else 34 | return "(?="; 35 | } 36 | public patInt minChars() { return new patInt(0); } 37 | public patInt maxChars() { return new patInt(0); } 38 | Pattern clone1(Hashtable h) { 39 | lookAhead la=new lookAhead(reverse); 40 | h.put(this,la); 41 | h.put(la,la); 42 | for(int i=0;iCopyright 2001, Steven R. Brandt 13 | */ /** 14 | This class only exists to store data needed during the 15 | compilation of a regular expression. */ 16 | public class Rthings { 17 | /** The numeric identity of the next () to be encountered 18 | while compiling the pattern. */ 19 | public int val=Regex.BackRefOffset; 20 | /** Needed in case (?i) is encountered, to pass back the 21 | message that ignoreCase should be set. */ 22 | public boolean ignoreCase; 23 | /** Needed in case (?Q) is encountered, to pass back the 24 | message that dontMatchInQuotes should be set. */ 25 | public boolean dontMatchInQuotes; 26 | public boolean optimizeMe = false; 27 | public boolean noBackRefs = false; 28 | public int parenLevel = 0; 29 | boolean gFlag = false, mFlag = false, sFlag = false; 30 | Pattern p; 31 | Or o; 32 | Rthings(Regex r) { 33 | ignoreCase = r.ignoreCase; 34 | dontMatchInQuotes = r.dontMatchInQuotes; 35 | } 36 | void set(Regex r) { 37 | r.gFlag = gFlag; 38 | r.mFlag = mFlag; 39 | r.sFlag = sFlag; 40 | r.ignoreCase = ignoreCase; 41 | r.dontMatchInQuotes = dontMatchInQuotes; 42 | if(optimizeMe) r.optimize(); 43 | } 44 | }; 45 | -------------------------------------------------------------------------------- /com/stevesoft/pat/oneChar.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** This class matches a single character. */ 12 | class oneChar extends Pattern { 13 | char c,altc,altc2; 14 | int mask; 15 | public oneChar(char ci) { 16 | c = ci; 17 | char cu,cl,ct; 18 | cu = CaseMgr.toUpperCase(c); 19 | cl = CaseMgr.toLowerCase(c); 20 | ct = CaseMgr.toTitleCase(c); 21 | if(c == cu) { 22 | altc = cl; 23 | altc2 = ct; 24 | } else if(c == cl) { 25 | altc = cu; 26 | altc2 = ct; 27 | } else { 28 | altc = cl; 29 | altc2 = cu; 30 | } 31 | mask = c & altc & altc2; 32 | } 33 | public int matchInternal(int pos,Pthings pt) { 34 | char p; 35 | int ret=-1; 36 | if (pos < pt.src.length() && !Masked(pos,pt) 37 | && ((p=pt.src.charAt(pos))==c || 38 | (pt.ignoreCase&& (p==altc||p==altc2) ) )) 39 | ret = nextMatch(pos+1,pt); 40 | return ret; 41 | } 42 | public String toString() { 43 | return protect(""+c,PROTECT_THESE,ESC)+nextString(); 44 | } 45 | public patInt minChars() { return new patInt(1); } 46 | public patInt maxChars() { return new patInt(1); } 47 | Pattern clone1(Hashtable h) { return new oneChar(c); } 48 | }; 49 | -------------------------------------------------------------------------------- /com/stevesoft/pat/apps/ColorLine.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat.apps; 9 | import java.awt.*; 10 | import java.util.*; 11 | 12 | /* A helper class for the ColorText class. This provides a single 13 | line of text with various colored parts. */ 14 | public class ColorLine { 15 | Vector v = new Vector(); 16 | public Object clone() { 17 | ColorLine cl = new ColorLine(); 18 | cl.v = (Vector)v.clone(); 19 | return cl; 20 | } 21 | int ColorLineWidth(FontMetrics fm) { 22 | int width = 0; 23 | int i; 24 | for(i=0;i pt.marks[id+pt.nMarks]) { 37 | int swap = pt.marks[id]; 38 | pt.marks[id] = pt.marks[id+pt.nMarks]+1; 39 | pt.marks[id+pt.nMarks] = swap+1; 40 | } 41 | return ret; 42 | } 43 | public Pattern clone1(Hashtable h) { 44 | OrMark om = new OrMark(id); 45 | h.put(om,om); 46 | h.put(this,om); 47 | for(int i=0;i= 'a' && c <= 'z') 18 | return true; 19 | if(c >= 'A' && c <= 'Z') 20 | return true; 21 | if(c >= '0' && c <= '9') 22 | return true; 23 | if(c == '_') 24 | return true; 25 | return false; 26 | } 27 | boolean matchLeft(int pos,Pthings pt) { 28 | if(pos <= 0) 29 | return true; 30 | if(isAChar(pt.src.charAt(pos)) 31 | && isAChar(pt.src.charAt(pos-1))) 32 | return false; 33 | return true; 34 | } 35 | boolean matchRight(int pos,Pthings pt) { 36 | if(pos < 0) return false; 37 | if(pos+1 >= pt.src.length()) 38 | return true; 39 | if(isAChar(pt.src.charAt(pos)) 40 | && isAChar(pt.src.charAt(pos+1))) 41 | return false; 42 | return true; 43 | } 44 | public int matchInternal(int pos,Pthings pt) { 45 | if(matchRight(pos-1,pt) || matchLeft(pos,pt)) 46 | return nextMatch(pos,pt); 47 | return -1; 48 | } 49 | public patInt maxChars() { return new patInt(0); } 50 | public Pattern clone1(Hashtable h) { return new Boundary(); } 51 | }; 52 | -------------------------------------------------------------------------------- /com/stevesoft/pat/StringBufferLike.java: -------------------------------------------------------------------------------- 1 | package// 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | com.stevesoft.pat; 9 | 10 | import com.stevesoft.pat.*; 11 | 12 | /** A tool that is used to make the \E, \U, \L, and \Q 13 | elements of a substitution. */ 14 | public class StringBufferLike implements BasicStringBufferLike { 15 | BasicStringBufferLike sbl; 16 | public StringBufferLike(BasicStringBufferLike sbl) { 17 | this.sbl = sbl; 18 | } 19 | char mode = 'E', altMode = ' '; 20 | public StringLike toStringLike() { 21 | return sbl.toStringLike(); 22 | } 23 | public String toString() { 24 | return sbl.toString(); 25 | } 26 | public void append(char c) { 27 | 28 | switch(mode) { 29 | case 'u': 30 | mode = altMode; 31 | altMode = ' '; 32 | case 'U': 33 | sbl.append(CaseMgr.toUpperCase(c)); 34 | break; 35 | case 'l': 36 | mode = altMode; 37 | altMode = ' '; 38 | case 'L': 39 | sbl.append(CaseMgr.toLowerCase(c)); 40 | break; 41 | case 'Q': 42 | if((c >= 'a' && c <= 'z') 43 | || (c >= 'A' && c <= 'Z') 44 | || (c >= '0' && c <= '9')) 45 | ; 46 | else 47 | sbl.append('\\'); 48 | default: 49 | sbl.append(c); 50 | break; 51 | } 52 | } 53 | public void append(String s) { 54 | for(int i=0;ihttp://unicode.org. */ 12 | public class Prop { 13 | /** Is this a "Decimal Digit" according to Unicode? */ 14 | public final static boolean isDecimalDigit(char c) { 15 | if(Bits.decimal_digit == null) 16 | Bits.decimal_digit_f(); 17 | return Bits.decimal_digit.get(c); 18 | } 19 | /** Is this a "Alphabetic" according to Unicode? */ 20 | public final static boolean isAlphabetic(char c) { 21 | if(Bits.letter == null) 22 | Bits.letter_f(); 23 | return Bits.letter.get(c); 24 | } 25 | /** Is this a "Math" according to Unicode? */ 26 | public final static boolean isMath(char c) { 27 | if(Bits.math == null) 28 | Bits.math_f(); 29 | return Bits.math.get(c); 30 | } 31 | 32 | /** Is this a "Currency" according to Unicode? */ 33 | public final static boolean isCurrency(char c) { 34 | if(Bits.currency == null) 35 | Bits.currency_f(); 36 | return Bits.currency.get(c); 37 | } 38 | 39 | /** Is c a white space character according to Unicode? */ 40 | public final static boolean isWhite(char c) { 41 | if(Bits.white == null) 42 | Bits.white_f(); 43 | return Bits.white.get(c); 44 | } 45 | 46 | /** Is c a punctuation character according to Unicode? */ 47 | public final static boolean isPunct(char c) { 48 | if(Bits.punct == null) 49 | Bits.punct_f(); 50 | return Bits.punct.get(c); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Range.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** Thrown when one encounters things like [z-a] */ 12 | class BadRangeArgs extends RegSyntax {}; 13 | 14 | /** Implments a subelement (ranges) of the [] pattern element. 15 | For example, [a-z023] is implemented using a range and tree oneChar 16 | classes. 17 | @see Bracket 18 | @see oneChar 19 | */ 20 | class Range extends Pattern { 21 | char lo,hi,altlo,althi; 22 | boolean printBrackets = false; 23 | public String toString() { 24 | String s=protect(""+lo,PROTECT_THESE,ESC)+"-" 25 | +protect(""+hi,PROTECT_THESE,ESC); 26 | if(!printBrackets) 27 | return s; 28 | return "["+s+"]"; 29 | } 30 | Range(char loi,char hii) throws RegSyntax { 31 | lo = loi; hi = hii; 32 | oneChar o = null; 33 | if(lo >= hi) 34 | //throw new BadRangeArgs(); 35 | RegSyntaxError.endItAll("Badly formed []'s : "+lo+" >= "+hi); 36 | o = new oneChar(lo); 37 | altlo = o.altc; 38 | o = new oneChar(hi); 39 | althi = o.altc; 40 | } 41 | public int matchInternal(int pos,Pthings pt) { 42 | if(pos >= pt.src.length()) return -1; 43 | if(Masked(pos,pt)) return -1; 44 | char c = pt.src.charAt(pos); 45 | if(lo <= c && c <= hi || 46 | (pt.ignoreCase && (altlo <= c && c <= althi))) 47 | return nextMatch(pos+1,pt); 48 | return -1; 49 | } 50 | public patInt minChars() { return new patInt(1); } 51 | public patInt maxChars() { return new patInt(1); } 52 | public Pattern clone1(Hashtable h) { 53 | try { 54 | Range r = new Range(lo,hi); 55 | r.printBrackets = printBrackets; 56 | return r; 57 | } catch(RegSyntax rs) { 58 | return null; 59 | } 60 | } 61 | }; 62 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Multi.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** Matches any number of instances of sub Pattern 12 | this was the hardest method to write. It implements 13 | '+', '*', '?', "{0,10}", "{5,}", "{5}", etc. 14 | @see pat.Multi_stage2 15 | @see pat.MultiMin 16 | */ 17 | class Multi extends PatternSub { 18 | patInt a,b; 19 | public patInt minChars() { return a.mul(p.countMinChars()); } 20 | public patInt maxChars() { return b.mul(p.countMaxChars()); } 21 | Pattern p; 22 | Multi_stage2 st2; 23 | public boolean matchFewest = false; 24 | /** 25 | @param a The fewest number of times the sub pattern can match. 26 | @param b The maximum number of times the sub pattern can match. 27 | @param p The sub pattern. 28 | @see Multi_stage2 29 | @see MultiMin 30 | */ 31 | public Multi(patInt a,patInt b,Pattern p) throws RegSyntax { 32 | if(!a.lessEq(b)) 33 | throw new RegSyntax("{"+a+","+b+"} is invalid"); 34 | this.a = a; 35 | this.b = b; 36 | this.p = p; 37 | st2 = new Multi_stage2(a,b,p); 38 | st2.parent = this; 39 | sub = st2.sub; 40 | } 41 | public String toString() { 42 | st2.matchFewest = matchFewest; 43 | return st2.toString(); 44 | } 45 | public int matchInternal(int pos,Pthings pt) { 46 | try { 47 | st2 = new Multi_stage2(a,b,p); 48 | } catch(RegSyntax r__) {} 49 | st2.matchFewest = matchFewest; 50 | st2.parent = this; 51 | return st2.matchInternal(pos,pt); 52 | } 53 | public Pattern clone1(Hashtable h) { 54 | try { 55 | Multi m = new Multi(a,b,((Pattern)p).clone(h)); 56 | m.matchFewest = matchFewest; 57 | return m; 58 | } catch(RegSyntax rs) { 59 | return null; 60 | } 61 | } 62 | }; 63 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Validator.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This class makes it easy to create your own patterns 11 | and integrate them into Regex. For more detail, see the 12 | example file deriv2.java or 13 | deriv3.java. */ 14 | 15 | public class Validator { 16 | String argsave = null; 17 | String pattern = "."; 18 | /** 19 | This method does extra checking on a matched section of 20 | a String beginning at position start and ending at end. 21 | The idea is that you can do extra checking with this 22 | that you don't know how to do with a standard Regex. 23 | 24 | If this method is successful, it returns the location 25 | of the end of this pattern element -- that may be the 26 | value end provided or some other value. A negative 27 | value signifies that a match failure. 28 | 29 | By default, this method just returns end and thus 30 | does nothing. 31 | @see com.stevesoft.pat.Regex#define(java.lang.String,java.lang.String,com.stevesoft.pat.Validator) 32 | */ 33 | public int validate(StringLike src,int start,int end) { 34 | return end; 35 | } 36 | /* This method allows you to modify the behavior of this 37 | validator by making a new Validator object. If a Validator 38 | named "foo" is defined, then the pattern "{??foo:bar}" will 39 | cause Regex to first get the Validator given to Regex.define 40 | and then to call its arg method with the string "bar". 41 | If this method returns a null (the default) you get the same 42 | behavior as the pattern "{??foo}" would supply. */ 43 | public Validator arg(String s) { return null; } 44 | 45 | /** For optimization it is helpful, but not necessary, that 46 | you define the minimum number of characters this validator 47 | will allow to match. To do this 48 | return new patInt(number) where number is the smallest 49 | number of characters that can match. */ 50 | public patInt minChars() { return new patInt(0); } 51 | 52 | /** For optimization it is helpful, but not necessary, that 53 | you define the maximum number of characters this validator 54 | will allow to match. To do this either 55 | return new patInt(number), or new patInf() if an infinite 56 | number of characters may match. */ 57 | public patInt maxChars() { return new patInf(); } 58 | } 59 | -------------------------------------------------------------------------------- /com/stevesoft/pat/PartialBuffer.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | import java.io.*; 11 | 12 | /** This class allows you to match on a partial string. 13 | If the allowOverRun flag is true, then the 14 | length() method returns a number 1 larger than 15 | is actually contained by the class. 16 |

    17 | If one attempts to access the last character as 18 | follows: 19 |

    20 |     StringBuffer sb = ...;
    21 |     ...
    22 |     PartialBuffer pb = new PartialBuffer(sb);
    23 |     char c = pb.charAt(pb.length()-1);
    24 |     
    25 | then two things happen. First, a zero is returned 26 | into the variable c. Second, the overRun flag is 27 | set to "true." Accessing data beyond the end of 28 | the buffer is considered an "overRun" of the data. 29 |

    30 | This can be helpful in determining whether more 31 | characters are required for a match to occur, as 32 | the pseudo-code below illustrates. 33 |

    34 |     int i = ...;
    35 |     Regex r = new Regex("some pattern");
    36 |     pb.allowOverRun = true;
    37 |     pb.overRun = true;
    38 |     boolean result = r.matchAt(pb,i);
    39 |     if(pb.overRun) {
    40 |       // The result of the match is not relevant, regardless
    41 |       // of whether result is true or false.  We need to
    42 |       // append more data to the buffer and try again.
    43 |       ....
    44 |       sb.append(more data);
    45 |     }
    46 |     
    47 | */ 48 | class PartialBuffer implements StringLike { 49 | int off; 50 | public boolean allowOverRun = true; 51 | public boolean overRun = false; 52 | StringBuffer sb; 53 | PartialBuffer(StringBuffer sb) { 54 | this.sb = sb; 55 | } 56 | public char charAt(int n) { 57 | n += off; 58 | if(n == sb.length()) { 59 | overRun = true; 60 | return 0; 61 | } 62 | return sb.charAt(n); 63 | } 64 | public int length() { 65 | return allowOverRun ? sb.length()+1 : sb.length(); 66 | } 67 | public int indexOf(char c) { 68 | for(int i=0;i0) 33 | sb.append( ((Pattern)v.elementAt(0)).toString() ); 34 | for(i=1;i= 0) 51 | return r; 52 | } 53 | return -1; 54 | } 55 | public patInt minChars() { 56 | if(v.size()==0) return new patInt(0); 57 | patInt m = ((Pattern)v.elementAt(0)).countMinChars(); 58 | for(int i=1;i v.size()) imax = v.size(); 50 | for(i=imin;i d.width ? xs : d.width; 69 | } 70 | ys += fm.getAscent(); 71 | Dimension d = new Dimension(xs,ys); 72 | return d; 73 | } 74 | final void 75 | drawColorLine(Graphics g,FontMetrics fm,ColorLine ln,int yi) { 76 | int i; 77 | int x = x_margin; 78 | int y = fm.getAscent()+yi*fm.getHeight()+y_margin; 79 | for(i=0;i i) i = p.i; 80 | return this; 81 | } 82 | /** Tests to see if this represents an infinite quantity. */ 83 | public boolean finite() { return !inf; } 84 | /** Converts to a patInt to an int. Infinity is 85 | mapped Integer.MAX_VALUE; 86 | */ 87 | public int intValue() { return inf ? Integer.MAX_VALUE : i; } 88 | }; 89 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Multi_stage2.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** If Multi were not split into a second stage, then 12 | a nested Multi would try to re-use the same count 13 | variable and the whole thing would break. */ 14 | class Multi_stage2 extends PatternSub { 15 | Pattern nextRet; 16 | patInt count; 17 | patInt matchMin,matchMax; 18 | public boolean matchFewest = false; 19 | public String toString() { 20 | String ret = ""; 21 | ret += sub.toString(); 22 | ret += "{"+matchMin+","+matchMax+"}"; 23 | if(matchFewest) ret += "?"; 24 | ret += parent.nextString(); 25 | return ret; 26 | } 27 | Multi_stage2(patInt a,patInt b,Pattern p) throws RegSyntax { 28 | if(p == null) RegSyntaxError.endItAll( 29 | "Multiple match of Null pattern requested."); 30 | sub = p; 31 | nextRet = this; 32 | sub.setParent(this); 33 | matchMin = a; 34 | matchMax = b; 35 | count = new patInt(0); 36 | // we must have b > a > -1 for this 37 | // to make sense. 38 | if(!a.lessEq(b)) 39 | //throw new BadMultiArgs(); 40 | RegSyntaxError.endItAll("Bad Multi Args: "+a+">"+b); 41 | patInt i = new patInt(-1); 42 | if(a.lessEq(i)) 43 | //throw new BadMultiArgs(); 44 | RegSyntaxError.endItAll("Bad Multi Args: "+a+"< 0"); 45 | } 46 | public Pattern getNext() { 47 | return nextRet; 48 | } 49 | int pos_old = -1; 50 | public int matchInternal(int pos,Pthings pt) { 51 | sub.setParent(this); 52 | 53 | int canUse = -1; 54 | 55 | // check for some forms of infinite recursion... 56 | if(pos_old >= 0 && pos == pos_old) { 57 | return -1; 58 | } 59 | pos_old = pos; 60 | 61 | if(matchMin.lessEq(count)) 62 | canUse = pos; 63 | if(!count.lessEq(matchMax) || pos > pt.src.length()) 64 | return -1; 65 | 66 | if((matchFewest||count.equals(matchMax)) && canUse >= 0) { 67 | Pattern n = super.getNext(); 68 | if(n == null) 69 | return canUse; 70 | int ret = testMatch(n,pos,pt); 71 | if(ret >= 0) { 72 | return ret; 73 | } 74 | else canUse = -1; 75 | } 76 | 77 | count.inc(); 78 | try { 79 | if(count.lessEq(matchMax)) { 80 | int r = testMatch(sub,pos,pt); 81 | if(r >= 0) 82 | return r; 83 | } 84 | } finally { count.dec(); } 85 | 86 | if(!matchFewest && canUse >= 0) { 87 | Pattern n = super.getNext(); 88 | if(n == null) 89 | return canUse; 90 | int ret = testMatch(n,pos,pt); 91 | return ret; 92 | } else return canUse; 93 | } 94 | public Pattern clone1(Hashtable h) { 95 | try { 96 | Multi_stage2 m = new Multi_stage2(matchMin,matchMax,sub.clone(h)); 97 | m.matchFewest = matchFewest; 98 | return m; 99 | } catch(RegSyntax rs) { 100 | return null; 101 | } 102 | } 103 | }; 104 | -------------------------------------------------------------------------------- /com/stevesoft/pat/wrap/RandomAccessFileWrap.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat.wrap; 9 | 10 | import com.stevesoft.pat.*; 11 | import java.io.*; 12 | 13 | /** Provides a wrapper for a RandomAccessFile so that it 14 | can be searched by Regex. */ 15 | public class RandomAccessFileWrap implements StringLike { 16 | 17 | long offset = 0; 18 | public void setOffset(long o) { 19 | offset = o; 20 | i0 = iend = 0; 21 | } 22 | public long getOffset() { 23 | return offset; 24 | } 25 | RandomAccessFile raf; 26 | int i0=0,iend=0; 27 | byte[] buf = new byte[1024]; 28 | 29 | public int getBufferSize() { 30 | return buf.length; 31 | } 32 | 33 | public void setBufferSize(int bs) { 34 | buf = new byte[bs]; 35 | i0 = iend = 0; 36 | } 37 | 38 | public RandomAccessFileWrap(String file) throws IOException { 39 | this.raf = new RandomAccessFile(file,"r"); 40 | } 41 | public RandomAccessFileWrap(RandomAccessFile raf) { 42 | this.raf = raf; 43 | } 44 | 45 | public char charAt(int i) { 46 | if(i >= i0 && i < iend) 47 | return (char)buf[i-i0]; 48 | 49 | try { 50 | i0 = i-5; 51 | //if(i0+offset<0) i0=(int)(-offset); 52 | if(i0<0) i0=0; 53 | raf.seek(i0+offset); 54 | iend = i0+raf.read(buf,0,buf.length); 55 | 56 | if(i >= i0 && i < iend) 57 | return (char)buf[i-i0]; 58 | } catch(Throwable t) {} 59 | 60 | throw new ArrayIndexOutOfBoundsException("Out of bounds for file:"+ 61 | " i="+i+ 62 | ", Final Buffer: i0="+i0+ 63 | " iend="+iend); 64 | } 65 | 66 | public String toString() { throw new Error("Not implemented"); } 67 | public int length() { 68 | try { 69 | long len = raf.length()-offset; 70 | if(len > Integer.MAX_VALUE) 71 | return Integer.MAX_VALUE; 72 | return (int)len; 73 | } catch(IOException ioe) { 74 | return 0; 75 | } 76 | } 77 | public String substring(int i1,int i2) { 78 | StringBuffer sb = new StringBuffer(); 79 | for(int i=i1;i"); 81 | ln.add(Color.black,r.left()); 82 | ln.add(darkgreen,"|"); 83 | ln.add(darkred,r.substring()); 84 | ln.add(darkgreen,"|"); 85 | ln.add(Color.black,r.right()); 86 | ln.add(darkgreen,"<=="); 87 | ctxt.addColorLine(ln); 88 | ctxt.addColorLine(new ColorLine()); 89 | if(r.numSubs() > 0) { 90 | ln = new ColorLine(); 91 | ln.add(darkblue,"Backreferences:"); 92 | ctxt.addColorLine(ln); 93 | } 94 | int i; 95 | for(i=1;i<=r.numSubs();i++) { 96 | ln = new ColorLine(); 97 | ln.add(darkblue,"("+i+") : "); 98 | if(r.left(i)==null) ln.add(darkblue,"[null]"); 99 | else { 100 | ln.add(Color.black,r.left(i)); 101 | ln.add(darkgreen,"|"); 102 | ln.add(darkred,r.substring(i)); 103 | ln.add(darkgreen,"|"); 104 | ln.add(Color.black,r.right(i)); 105 | } 106 | ctxt.addColorLine(ln); 107 | } 108 | ctxt.repaint(); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /com/stevesoft/pat/FastMulti.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** A special case of Multi, implemented when minChars().equals(maxChars()), 12 | * and some other conditions spelled out in RegOpt.safe4fm "Safe for 13 | * FastMulti." It avoids stack growth problems as well as being slightly 14 | * faster. 15 | */ 16 | class FastMulti extends PatternSub { 17 | patInt fewestMatches,mostMatches; 18 | public patInt minChars() { 19 | return sub.countMinChars().mul(fewestMatches); 20 | } 21 | public patInt maxChars() { 22 | return sub.countMaxChars().mul(mostMatches); 23 | } 24 | public boolean matchFewest = false; 25 | 26 | FastMulti(patInt a,patInt b,Pattern p) throws RegSyntax { 27 | if(!a.lessEq(b)) 28 | throw new RegSyntax("{"+a+","+b+"} is invalid"); 29 | if(p == null) RegSyntaxError.endItAll("Null length pattern "+ 30 | "followed by *, +, or other Multi."); 31 | fewestMatches = a; 32 | mostMatches = b; 33 | sub = p; 34 | step = p.countMinChars().intValue(); 35 | sub.setParent(null); 36 | } 37 | public String toString() { 38 | return sub.toString()+"{" 39 | +fewestMatches+","+mostMatches+"}"+ 40 | (matchFewest ? "?" : "")+"(?# <= fast multi)"+ 41 | nextString(); 42 | } 43 | int step = -1; 44 | public int matchInternal(int pos,Pthings pt) { 45 | int m=-1; 46 | int i=pos; 47 | int endstr = pt.src.length()-step; 48 | patInt matches = new patInt(0); 49 | if(matchFewest) { 50 | if(fewestMatches.lessEq(matches)) { 51 | int ii = nextMatch(i,pt); 52 | if(ii >= 0) return ii; 53 | } 54 | while(i >= 0 && i <= endstr) { 55 | i=sub.matchInternal(i,pt); 56 | if(i >= 0) { 57 | matches.inc(); 58 | if(fewestMatches.lessEq(matches)) { 59 | int ii = nextMatch(i,pt); 60 | if(ii >= 0) return ii; 61 | } 62 | if(matches.equals(mostMatches)) 63 | return -1; 64 | } 65 | } 66 | return -1; 67 | } 68 | int nMatches = 0; 69 | while(fewestMatches.intValue() > nMatches) { 70 | i=sub.matchInternal(i,pt); 71 | if(i >= 0) 72 | nMatches++; 73 | else 74 | return -1; 75 | } 76 | m=i; 77 | if(mostMatches.finite()) { 78 | while(nMatches < mostMatches.intValue()) { 79 | i = sub.matchInternal(i,pt); 80 | if(i>=0) { 81 | m=i; 82 | nMatches++; 83 | } else break; 84 | } 85 | } else { 86 | while(true) { 87 | i = sub.matchInternal(i,pt); 88 | if(i>=0) { 89 | m=i; 90 | nMatches++; 91 | } else break; 92 | } 93 | } 94 | while(m >= pos) { 95 | int r=nextMatch(m,pt); 96 | if(r >= 0) return r; 97 | m -= step; 98 | nMatches--; 99 | if(nMatches < fewestMatches.intValue()) 100 | return -1; 101 | } 102 | return -1; 103 | } 104 | public Pattern clone1(Hashtable h) { 105 | try { 106 | FastMulti fm = new FastMulti(fewestMatches,mostMatches,sub.clone(h)); 107 | fm.matchFewest = matchFewest; 108 | return fm; 109 | } catch(RegSyntax rs) { 110 | return null; 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /com/stevesoft/pat/StrPos.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | /** 10 | Shareware: package pat 11 | Copyright 2001, Steven R. Brandt 12 | */ /** 13 | StrPos is used internally by regex to parse the regular expression. */ 14 | public class StrPos { 15 | String s; 16 | int pos; 17 | /** Return the position in the string pointed to */ 18 | public int pos() { return pos; } 19 | 20 | /** This contains the escape character, which is \ by default. */ 21 | public char esc=Pattern.ESC; 22 | char c; 23 | /** Returns the current, possibly escaped, character. */ 24 | public char thisChar() { return c; } 25 | 26 | boolean dontMatch,eos; 27 | 28 | /** tell whether we are at end of string */ 29 | public boolean eos() { return eos; } 30 | /** initialize a StrPos from another StrPos. */ 31 | public StrPos(StrPos sp) { 32 | dup(sp); 33 | } 34 | /** copy a StrPos from sp to this. */ 35 | public void dup(StrPos sp) { 36 | s = sp.s; 37 | pos = sp.pos; 38 | c = sp.c; 39 | dontMatch = sp.dontMatch; 40 | eos = sp.eos; 41 | } 42 | /** Initialize a StrPos by giving it a String, and a 43 | position within the String. */ 44 | public StrPos(String s,int pos) { 45 | this.s=s; 46 | this.pos=pos-1; 47 | inc(); 48 | } 49 | /** Advance the place where StrPos points within the String. 50 | Counts a backslash as part of the next character. */ 51 | public StrPos inc() { 52 | pos++; 53 | if(pos >= s.length()) { 54 | eos = true; 55 | return this; 56 | } 57 | eos = false; 58 | c = s.charAt(pos); 59 | if(c == esc && pos+1st that matches a non-escaped 88 | character. */ 89 | public boolean incMatch(String st) { 90 | StrPos sp = new StrPos(this); 91 | int i; 92 | for(i=0;i= '0' && sp.c <= '9';i++) { 108 | cnt = 10*cnt+sp.c-'0'; 109 | sp.inc(); 110 | } 111 | if(i==0) return null; 112 | dup(sp); 113 | return new patInt(cnt); 114 | } 115 | /** get the string that we are processing. */ 116 | public String getString() { return s; } 117 | }; 118 | -------------------------------------------------------------------------------- /com/stevesoft/pat/DotMulti.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.Hashtable; 10 | 11 | /** A special optimization of multi that is used when the 12 | * common subpattern ".*" is encountered. 13 | */ 14 | class DotMulti extends PatternSub { 15 | patInt fewestMatches,mostMatches; 16 | public patInt minChars() { 17 | return fewestMatches; 18 | } 19 | public patInt maxChars() { 20 | return mostMatches; 21 | } 22 | public boolean matchFewest = false; 23 | 24 | StringLike src=null; 25 | int srclength=0; 26 | boolean dotDoesntMatchCR=true; 27 | DotMulti(patInt a,patInt b) { 28 | fewestMatches = a; 29 | mostMatches = b; 30 | } 31 | public String toString() { 32 | return ".{" 33 | +fewestMatches+","+mostMatches+"}"+ 34 | (matchFewest ? "?" : "")+"(?# <= dot multi)"+ 35 | nextString(); 36 | } 37 | final int submatchInternal(int pos,Pthings pt) { 38 | if(pos < srclength) { 39 | if(dotDoesntMatchCR) { 40 | if(src.charAt(pos) != '\n') 41 | return 1+pos; 42 | } else return 1+pos; 43 | } 44 | return -1; 45 | } 46 | final static int step = 1; 47 | static int idcount = 1; 48 | public int matchInternal(int pos,Pthings pt) { 49 | int m=-1; 50 | int i=pos; 51 | src = pt.src; 52 | srclength = src.length(); 53 | dotDoesntMatchCR = pt.dotDoesntMatchCR; 54 | if(matchFewest) { 55 | int nMatches = 0; 56 | while(fewestMatches.intValue() > nMatches) { 57 | i=submatchInternal(i,pt); 58 | if(i<0) return -1; 59 | nMatches++; 60 | } 61 | if(i<0) return -1; 62 | int ii = nextMatch(i,pt); 63 | if(ii >= 0) return ii; 64 | if(!mostMatches.finite()) { 65 | while(i >= 0) { 66 | i = submatchInternal(i,pt); 67 | if(i < 0) return -1; 68 | ii = nextMatch(i,pt); 69 | if(ii >= 0) return ii; 70 | } 71 | } else { 72 | while(i > 0) { 73 | i = submatchInternal(i,pt); 74 | if(i < 0) return -1; 75 | nMatches++; 76 | if(nMatches > mostMatches.intValue()) 77 | return -1; 78 | ii = nextMatch(i,pt); 79 | if(ii >= 0) return ii; 80 | } 81 | } 82 | return -1; 83 | } 84 | int nMatches = 0; 85 | while(fewestMatches.intValue() > nMatches) { 86 | i=submatchInternal(i,pt); 87 | if(i >= 0) 88 | nMatches++; 89 | else 90 | return -1; 91 | } 92 | m=i; 93 | if(mostMatches.finite()) { 94 | while(nMatches < mostMatches.intValue()) { 95 | i = submatchInternal(i,pt); 96 | if(i>=0) { 97 | m=i; 98 | nMatches++; 99 | } else break; 100 | } 101 | } else { 102 | while(true) { 103 | i = submatchInternal(i,pt); 104 | if(i>=0) { 105 | m=i; 106 | nMatches++; 107 | } else break; 108 | } 109 | } 110 | while(m >= pos) { 111 | int r=nextMatch(m,pt); 112 | if(r >= 0) return r; 113 | m -= step; 114 | nMatches--; 115 | if(nMatches < fewestMatches.intValue()) 116 | return -1; 117 | } 118 | return -1; 119 | } 120 | Pattern clone1(Hashtable h) { 121 | DotMulti dm = new DotMulti(fewestMatches,mostMatches); 122 | dm.matchFewest = matchFewest; 123 | return dm; 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /com/stevesoft/pat/RegexTokenizer.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.util.*; 10 | /** 11 | Shareware: package pat 12 | Copyright 2001, Steven R. Brandt 13 | */ /** 14 | The RegexTokenizer is similar to the StringTokenizer class 15 | provided with java, but allows one to tokenize using 16 | regular expressions, rather than a simple list of characters. 17 | Tokens are any strings between the supplied regular expression, 18 | as well as any backreferences (things in parenthesis) 19 | contained within the regular expression. */ 20 | public class RegexTokenizer implements Enumeration { 21 | String toParse; 22 | Regex r; 23 | int count = 0; 24 | Vector v = new Vector(); 25 | Vector vi = new Vector(); 26 | int pos=0; 27 | 28 | int offset = 1; 29 | void getMore() { 30 | String s = r.right(); 31 | if(r.searchFrom(toParse,pos)) { 32 | v.addElement(r.left().substring(pos)); 33 | vi.addElement(new Integer(r.matchFrom()+ 34 | r.charsMatched())); 35 | for(int i=0;i= v.size()) getMore(); 65 | return v.elementAt(count++); 66 | } 67 | /** This is the equivalent (String)nextElement(). */ 68 | public String nextToken() { return (String)nextElement(); } 69 | /** This asks for the next token, and changes the pattern 70 | being used at the same time. */ 71 | public String nextToken(String newpat) { 72 | try { r.compile(newpat); } catch (RegSyntax r_) {} 73 | return nextToken(r); 74 | } 75 | /** This asks for the next token, and changes the pattern 76 | being used at the same time. */ 77 | public String nextToken(Regex nr) { 78 | r = nr; 79 | if(vi.size() > count) { 80 | pos = ((Integer)vi.elementAt(count)).intValue(); 81 | v.setSize(count); 82 | vi.setSize(count); 83 | } 84 | getMore(); 85 | return nextToken(); 86 | } 87 | /** Tells whether there are more tokens in the pattern. */ 88 | public boolean hasMoreElements() { 89 | if(count >= v.size()) getMore(); 90 | return count < v.size(); 91 | } 92 | /** Tells whether there are more tokens in the pattern, but 93 | in the fashion of StringTokenizer. */ 94 | public boolean hasMoreTokens() { return hasMoreElements(); } 95 | /** Determines the # of remaining tokens */ 96 | public int countTokens() { 97 | int old_pos=pos,_count=count; 98 | while(hasMoreTokens()) 99 | nextToken(); 100 | count=_count; 101 | return v.size()-count; 102 | } 103 | /** Returns all tokens in the String */ 104 | public String[] allTokens() { 105 | countTokens(); 106 | String[] ret = new String[v.size()]; 107 | v.copyInto(ret); 108 | return ret; 109 | } 110 | }; 111 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Skip.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | /** This class is used internally to search ahead for some 11 | optimized Regex objects. It searches within a String 12 | for occrences of a given String -- like a more flexible 13 | version of String.indexOf. 14 | @see com.stevesoft.pat.Skip2 15 | @see com.stevesoft.pat.SkipBMH 16 | */ 17 | public class Skip { 18 | static int mkmask(int c) { 19 | char x = (char)c; 20 | return ~( CaseMgr.toUpperCase(x) | 21 | CaseMgr.toLowerCase(x) | 22 | CaseMgr.toTitleCase(x)); 23 | } 24 | static { int x = Regex.BackRefOffset; } 25 | String src; 26 | int c,mask; 27 | int offset; 28 | boolean ign,m1; 29 | /** Examine a Regex to determine what String it will 30 | attempt to skip to when searching for patterns. 31 | Return -1 if we aren't doing this. */ 32 | public static String string(Regex r) { 33 | return r.skipper == null ? null : r.skipper.src; 34 | } 35 | /** Determine the offset of the String within the pattern 36 | that we are skipping to. Return -1 if we aren't doing 37 | this. */ 38 | public static int offset(Regex r) { 39 | return r.skipper == null ? -1 : r.skipper.offset; 40 | } 41 | /** Initialize, give it a String to search for, tell it 42 | whether or not to ignoreCase, and what the offset is 43 | of the String within the String to be searched. */ 44 | public Skip(String s,boolean ign,int o) { 45 | src = s; 46 | c = s.charAt(0); 47 | if(ign) { 48 | mask = mkmask(c); 49 | } else mask = 0; 50 | offset = o; 51 | this.ign = ign; 52 | m1 = (s.length()==1); 53 | } 54 | /** The same as find(s,0,s.length()) */ 55 | public final int find(StringLike s) { 56 | return find(s,0,s.length()); 57 | } 58 | static final int min(int a,int b) { return a end) return -1; 63 | start += offset; 64 | int vend = min(s.length()-1,end+offset); 65 | if(mask != c) { 66 | for(int i=start;i<=vend;i++) 67 | if(0 == (s.charAt(i) & mask)) 68 | //if(m1||s.regionMatches(ign,i,src,0,src.length()) ) 69 | if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) ) 70 | return i-offset; 71 | } else { 72 | for(int i=start;i<=vend;i++) 73 | if(c == s.charAt(i)) 74 | //if(m1||s.regionMatches(ign,i,src,0,src.length()) ) 75 | if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) ) 76 | return i-offset; 77 | } 78 | return -1; 79 | } 80 | static Skip findSkip(Regex r) { 81 | return findSkip(r.thePattern,r.ignoreCase,!r.dontMatchInQuotes); 82 | } 83 | // look for things that can be skipped 84 | static Skip findSkip(Pattern p,boolean ignoreCase,boolean trnc) { 85 | StringBuffer sb = new StringBuffer(); 86 | Skip subsk = null; 87 | int offset = 0; 88 | int skipc = -1,skipoff=0; 89 | for(;p != null;p = p.next) { 90 | if(p instanceof oneChar) { 91 | skipc = ((oneChar)p).c; 92 | skipoff = offset; 93 | } 94 | if(p instanceof oneChar && p.next instanceof oneChar) { 95 | Pattern psav = p; 96 | sb.append(((oneChar)p).c); 97 | while(p.next instanceof oneChar) { 98 | sb.append(((oneChar)p.next).c); 99 | p = p.next; 100 | } 101 | String st = sb.toString(); 102 | char c0 = st.charAt(0), c1 = st.charAt(1); 103 | Skip sk=null; 104 | if(st.length()>2) 105 | sk = new SkipBMH(st,ignoreCase,offset); 106 | else 107 | sk = new Skip2(st,ignoreCase,offset); 108 | if(trnc && st.length()>2) { // chop out a whole string... 109 | psav.next = new Skipped(st.substring(1)); 110 | psav.next.next = p.next; 111 | psav.next.parent = p.parent; 112 | } 113 | return sk; 114 | } else if(p instanceof Or && ((Or)p).v.size()==1 115 | && !((Or)p).leftForm().equals("(?!") 116 | && null != (subsk= 117 | findSkip( (Pattern)((Or)p).v.elementAt(0),ignoreCase,trnc) )) { 118 | subsk.offset += offset; 119 | return subsk; 120 | } else if(p.minChars().equals(p.maxChars())) { 121 | offset += p.minChars().intValue(); 122 | } else return skipc < 0 ? null : 123 | new Skip(""+(char)skipc,ignoreCase,skipoff); 124 | } 125 | return null; 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Ctrl.java: -------------------------------------------------------------------------------- 1 | package// 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | com.stevesoft.pat; 9 | 10 | public class Ctrl { 11 | public final static char[] cmap = { 12 | (char)64, 13 | (char)65, 14 | (char)66, 15 | (char)67, 16 | (char)68, 17 | (char)69, 18 | (char)70, 19 | (char)71, 20 | (char)72, 21 | (char)73, 22 | (char)74, 23 | (char)75, 24 | (char)76, 25 | (char)77, 26 | (char)78, 27 | (char)79, 28 | (char)80, 29 | (char)81, 30 | (char)82, 31 | (char)83, 32 | (char)84, 33 | (char)85, 34 | (char)86, 35 | (char)87, 36 | (char)88, 37 | (char)89, 38 | (char)90, 39 | (char)91, 40 | (char)92, 41 | (char)93, 42 | (char)94, 43 | (char)95, 44 | (char)96, 45 | (char)97, 46 | (char)97, 47 | (char)99, 48 | (char)100, 49 | (char)101, 50 | (char)102, 51 | (char)103, 52 | (char)104, 53 | (char)105, 54 | (char)106, 55 | (char)107, 56 | (char)108, 57 | (char)109, 58 | (char)110, 59 | (char)111, 60 | (char)112, 61 | (char)113, 62 | (char)114, 63 | (char)115, 64 | (char)116, 65 | (char)117, 66 | (char)118, 67 | (char)119, 68 | (char)120, 69 | (char)121, 70 | (char)122, 71 | (char)123, 72 | (char)124, 73 | (char)125, 74 | (char)126, 75 | (char)127, 76 | (char)0, 77 | (char)1, 78 | (char)2, 79 | (char)3, 80 | (char)4, 81 | (char)5, 82 | (char)6, 83 | (char)7, 84 | (char)8, 85 | (char)9, 86 | (char)10, 87 | (char)11, 88 | (char)12, 89 | (char)13, 90 | (char)14, 91 | (char)15, 92 | (char)16, 93 | (char)17, 94 | (char)18, 95 | (char)19, 96 | (char)20, 97 | (char)21, 98 | (char)22, 99 | (char)23, 100 | (char)24, 101 | (char)25, 102 | (char)26, 103 | (char)27, 104 | (char)27, 105 | (char)29, 106 | (char)30, 107 | (char)31, 108 | (char)32, 109 | (char)1, 110 | (char)2, 111 | (char)3, 112 | (char)4, 113 | (char)5, 114 | (char)6, 115 | (char)7, 116 | (char)8, 117 | (char)9, 118 | (char)10, 119 | (char)11, 120 | (char)12, 121 | (char)13, 122 | (char)14, 123 | (char)15, 124 | (char)16, 125 | (char)17, 126 | (char)18, 127 | (char)19, 128 | (char)20, 129 | (char)21, 130 | (char)22, 131 | (char)23, 132 | (char)24, 133 | (char)25, 134 | (char)26, 135 | (char)59, 136 | (char)60, 137 | (char)61, 138 | (char)62, 139 | (char)63, 140 | (char)192, 141 | (char)193, 142 | (char)194, 143 | (char)195, 144 | (char)196, 145 | (char)197, 146 | (char)198, 147 | (char)199, 148 | (char)200, 149 | (char)201, 150 | (char)202, 151 | (char)203, 152 | (char)204, 153 | (char)205, 154 | (char)206, 155 | (char)207, 156 | (char)208, 157 | (char)209, 158 | (char)210, 159 | (char)211, 160 | (char)212, 161 | (char)213, 162 | (char)214, 163 | (char)215, 164 | (char)216, 165 | (char)217, 166 | (char)218, 167 | (char)219, 168 | (char)220, 169 | (char)221, 170 | (char)222, 171 | (char)223, 172 | (char)224, 173 | (char)225, 174 | (char)226, 175 | (char)227, 176 | (char)228, 177 | (char)229, 178 | (char)230, 179 | (char)231, 180 | (char)232, 181 | (char)233, 182 | (char)234, 183 | (char)235, 184 | (char)236, 185 | (char)237, 186 | (char)238, 187 | (char)239, 188 | (char)240, 189 | (char)241, 190 | (char)242, 191 | (char)243, 192 | (char)244, 193 | (char)245, 194 | (char)246, 195 | (char)247, 196 | (char)248, 197 | (char)249, 198 | (char)250, 199 | (char)251, 200 | (char)252, 201 | (char)253, 202 | (char)254, 203 | (char)255, 204 | (char)128, 205 | (char)129, 206 | (char)130, 207 | (char)131, 208 | (char)132, 209 | (char)133, 210 | (char)134, 211 | (char)135, 212 | (char)136, 213 | (char)137, 214 | (char)138, 215 | (char)139, 216 | (char)140, 217 | (char)141, 218 | (char)142, 219 | (char)143, 220 | (char)144, 221 | (char)145, 222 | (char)146, 223 | (char)147, 224 | (char)148, 225 | (char)149, 226 | (char)150, 227 | (char)151, 228 | (char)152, 229 | (char)153, 230 | (char)154, 231 | (char)155, 232 | (char)156, 233 | (char)157, 234 | (char)158, 235 | (char)159, 236 | (char)160, 237 | (char)161, 238 | (char)162, 239 | (char)163, 240 | (char)164, 241 | (char)165, 242 | (char)166, 243 | (char)167, 244 | (char)168, 245 | (char)169, 246 | (char)170, 247 | (char)171, 248 | (char)172, 249 | (char)173, 250 | (char)174, 251 | (char)175, 252 | (char)176, 253 | (char)177, 254 | (char)178, 255 | (char)179, 256 | (char)180, 257 | (char)181, 258 | (char)182, 259 | (char)183, 260 | (char)184, 261 | (char)185, 262 | (char)186, 263 | (char)187, 264 | (char)188, 265 | (char)189, 266 | (char)190, 267 | (char)191 268 | }; 269 | } 270 | -------------------------------------------------------------------------------- /com/stevesoft/pat/Transformer.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | import java.util.Vector; 11 | import com.stevesoft.pat.wrap.StringWrap; 12 | 13 | /** Replacement rule used by the Transformer. 14 | @see com.stevesoft.pat.Transformer 15 | */ 16 | class TransRepRule extends ReplaceRule { 17 | Transformer t; 18 | TransRepRule(Transformer t) { this.t = t; } 19 | public String toString1() { return ""; } 20 | public Object clone1() { return new TransRepRule(t); } 21 | public void apply(StringBufferLike sb,RegRes rr) { 22 | // get the ReplaceRule of the Regex that matched. 23 | next = t.tp.ra[t.tp.pn].getReplaceRule(); 24 | } 25 | } 26 | 27 | /** Sometimes you want to replace a whole bunch of things 28 | that might occur within a single line of text. One efficient 29 | way to do this, both in terms of performance and programming 30 | ease, is with Transformer. The Transformer contains an array 31 | of Regex's and uses the Regex that matches earliest within the 32 | text to do the replacing, if two Regex's match at the same 33 | time it uses the one put in the Transformer first. 34 |

    35 | This feature can be used to prevent transformations from 36 | occurring in certain regions. For example, if I add the rule 37 | s'//.*'$&' and then add the 38 | rule s/hello/goodbye/ the Transformer will replace "hello" 39 | with "goodbye" except when it occurs inside a double-slash 40 | style of comment. The transformation on the comment goes first, 41 | does nothing, and precludes transformation on the same region 42 | of text as the s/hello/goodbye/ rule. 43 |

    44 | So far, at least, this class does not have the capability of 45 | turning into a giant robot :-) 46 | */ 47 | public class Transformer { 48 | TransPat tp; 49 | Regex rp = new Regex(); 50 | boolean auto_optimize; 51 | 52 | /** Get a replacer to that works with the current Regex. 53 | @see com.stevesoft.pat.Replacer 54 | */ 55 | public Replacer getReplacer() { return rp.getReplacer(); } 56 | 57 | /** Instantiate a new Transformer object. */ 58 | public Transformer(boolean auto) { 59 | auto_optimize = auto; 60 | tp = new TransPat(); 61 | rp.setReplaceRule(new TransRepRule(this)); 62 | rp.thePattern = tp; 63 | } 64 | 65 | /** Add a new Regex to the set of Regex's. */ 66 | public void add(Regex r) { 67 | if(auto_optimize) r.optimize(); 68 | tp.ra[tp.ra_len++] = r; 69 | if(tp.ra.length==tp.ra_len) { 70 | Regex[] ra2 = new Regex[tp.ra_len+10]; 71 | for(int i=0;i rp.numSubs_ ? r.numSubs_ : rp.numSubs_; 76 | } 77 | 78 | /** Returns the number of Regex's in this Transformer. */ 79 | public int patterns() { return tp.ra_len; } 80 | 81 | /** Get the Regex at position i in this Transformer. */ 82 | public Regex getRegexAt(int i) { 83 | if(i >= tp.ra_len) 84 | throw new ArrayIndexOutOfBoundsException("i="+i+">="+patterns()); 85 | if(i < 0) 86 | throw new ArrayIndexOutOfBoundsException("i="+i+"< 0"); 87 | return tp.ra[i]; 88 | } 89 | /** Set the Regex at position i in this Transformer. */ 90 | public void setRegexAt(Regex rx,int i) { 91 | if(i >= tp.ra_len) 92 | throw new ArrayIndexOutOfBoundsException("i="+i+">="+patterns()); 93 | if(i < 0) 94 | throw new ArrayIndexOutOfBoundsException("i="+i+"< 0"); 95 | tp.ra[i] = rx; 96 | } 97 | 98 | /** Add a new Regex by calling Regex.perlCode 99 | @see com.stevesoft.pat.Regex#perlCode(java.lang.String) 100 | */ 101 | public void add(String rs) { 102 | Regex r = Regex.perlCode(rs); 103 | if(r == null) throw new NullPointerException("bad pattern to Regex.perlCode: "+rs); 104 | add(r); 105 | } 106 | /** Add an array of Strings (which will be converted to 107 | Regex's via the Regex.perlCode method. 108 | @see com.stevesoft.pat.Regex#perlCode(java.lang.String) 109 | */ 110 | public void add(String[] array) { 111 | for(int i=0;i 19 |

  • -i : ignore case 20 |
  • -p : paragraph based matching 21 |
  • -v : invert, print only lines that don't match 22 | 23 | */ 24 | public class Grep { 25 | 26 | boolean iflag=false, pflag=false,vflag=false,verbose=false; 27 | Regex re=null; 28 | Vector v=new Vector(); 29 | public static void main(String[] args) throws Exception { 30 | Grep g = new Grep(); 31 | g.doArgs(args); 32 | } 33 | 34 | // -- BEGIN OPTIONS -- // 35 | 36 | /** The ignore case flag */ 37 | public boolean getIFlag() { 38 | return iflag; 39 | } 40 | /** The ignore case flag */ 41 | public void setIFlag(boolean b) { 42 | iflag = b; 43 | } 44 | /** The paragraph mode flag */ 45 | public boolean getPFlag() { 46 | return pflag; 47 | } 48 | /** The paragraph mode flag */ 49 | public void setPFlag(boolean b) { 50 | pflag = b; 51 | } 52 | /** If the vflag is true, then only lines not 53 | matching the supplied pattern will be printed. */ 54 | public boolean getVFlag() { 55 | return vflag; 56 | } 57 | /** If the vflag is true, then only lines not 58 | matching the supplied pattern will be printed. */ 59 | public void setVFlag(boolean b) { 60 | vflag = b; 61 | } 62 | /** Determine if file and line number info is written. */ 63 | public boolean getVerbose() { 64 | return verbose; 65 | } 66 | /** Determine if file and line number info is written. */ 67 | public void setVerbose(boolean b) { 68 | verbose = b; 69 | } 70 | /** The pattern to be searched for */ 71 | public void setRegex(Regex r) { 72 | re = r; 73 | } 74 | /** The pattern to be searched for */ 75 | public Regex getRegex() { 76 | return re; 77 | } 78 | int lineno=0; 79 | /** Line number info reported by verbose */ 80 | void setLineno(int n) { 81 | lineno=n; 82 | } 83 | /** Line number info reported by verbose */ 84 | int getLineno() { 85 | return lineno; 86 | } 87 | /** Line number info reported by verbose */ 88 | void incLineno() { 89 | lineno++; 90 | } 91 | String _file = ""; 92 | /** File name info reported by verbose */ 93 | String getFile() { return _file; } 94 | /** File name info reported by verbose */ 95 | void setFile(String s) { _file=s; } 96 | 97 | // -- END OPTIONS -- // 98 | 99 | void doArgs(String[] args) throws Exception { 100 | // Process command line arguments 101 | for(int i=0;i1) 136 | setVerbose(true); 137 | 138 | // Process files 139 | if(v.size()==0) 140 | doInputStream(System.in); 141 | for(int i=0;i= getl(v.elementAt(i))) { 68 | Pattern p2 = (Pattern)v.elementAt(i); 69 | char lo = min(getl(p),getl(p2)); 70 | char hi = max(geth(p),geth(p2)); 71 | nv.setElementAt(p=mkelem(lo,hi),nv.size()-1); 72 | } else { 73 | p = (Pattern)v.elementAt(i); 74 | nv.addElement(p); 75 | } 76 | } 77 | 78 | b.v = v = nv; 79 | } catch(RegSyntax e) { 80 | e.printStackTrace(); 81 | } 82 | 83 | // We don't want these things to be empty. 84 | Vector negv = neg(v); 85 | if(v.size()==1) return b; 86 | if(negv.size()==1) { 87 | b.v = negv; 88 | b.neg = !b.neg; 89 | return b; 90 | } 91 | 92 | // Now consider if we can make a FastBracket. 93 | // Uses a BitSet to do a lookup. 94 | FastBracket fb = newbrack(v,b.neg); 95 | if(fb == null) 96 | fb = newbrack(negv,!b.neg); 97 | if(fb != null) { 98 | fb.parent = b.parent; 99 | fb.next = b.next; 100 | return fb; 101 | } 102 | 103 | // return the normal Bracket. 104 | return b; 105 | } 106 | 107 | // Build a FastBracket and set bits. If this can't 108 | // be done, return null. 109 | final static FastBracket newbrack(Vector v,boolean neg) { 110 | FastBracket fb = new FastBracket(neg); 111 | fb.v = v; 112 | if(v.size()==0) return null; 113 | fb.min = getl(v.elementAt(0)); 114 | fb.max = geth(v.elementAt(v.size()-1)); 115 | if(fb.max-fb.min <= 256) { 116 | fb.bs = new BitSet(fb.max-fb.min+1); 117 | for(int i=0;ib ? a : b; 165 | } 166 | 167 | // getl -- get lower value of Range object, 168 | // or get value of oneChar object. 169 | final static char getl(Object o) { 170 | Pattern p = (Pattern)o; 171 | if(p instanceof Range) 172 | return ((Range)p).lo; 173 | return ((oneChar)p).c; 174 | } 175 | // geth -- get higher value of Range object, 176 | // or get value of oneChar object. 177 | final static char geth(Object o) { 178 | Pattern p = (Pattern)o; 179 | if(p instanceof Range) 180 | return ((Range)p).hi; 181 | return ((oneChar)p).c; 182 | } 183 | 184 | // This is the easy part! 185 | public int matchInternal(int pos,Pthings pt) { 186 | if(pos >= pt.src.length() || Masked(pos,pt)) return -1; 187 | char c = pt.src.charAt(pos); 188 | return (neg ^ (c >= min && c <= max && bs.get(c-min)) ) ? 189 | nextMatch(pos+1,pt) : -1; 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /com/stevesoft/pat/RegexWriter.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | import java.io.*; 11 | import com.stevesoft.pat.wrap.*; 12 | 13 | /** A basic extension of FilterWriter that uses Transformer 14 | to make replacements in data as it is written out. It attempts 15 | to transform a string whenever the End-of-Line (EOL) character 16 | is written (which is, by default, the carriage return '\n'). 17 | Only the transformed portion of the line is written out, allowing 18 | the RegexWriter to wait until a complete pattern is present before 19 | attempting to write out info. Until a pattern completes, data is 20 | stored in a StringBuffer -- which can be accessed through the 21 | length() and charAt() methods of this class. 22 |

    23 | Note a subtlety here -- while a Transformer normally matches 24 | at higher priority against the pattern added to it first, this 25 | will not necessarily be true when a multi-line match is in progress 26 | because one of the complete multi-line patterns may not be completely 27 | loaded in RegexWriter's buffer. For this reason, the Transformer 28 | class is equipped with a way to add a pattern and replacement rule 29 | in three pieces -- a beginning (once this matches, nothing else in 30 | the Transformer can match until the whole pattern matches), an 31 | ending (the whole pattern is a String formed by adding the beginning 32 | and ending), and a ReplaceRule. 33 |

    34 | An illustration of this is given in the this 35 | example. 36 | */ 37 | public class RegexWriter extends Writer { 38 | Replacer repr; 39 | Writer w; 40 | WriterWrap ww; 41 | StringBuffer sb = new StringBuffer(); 42 | PartialBuffer wrap = new PartialBuffer(sb); 43 | int pos, epos; 44 | int interval = 128; 45 | int bufferSize = 2*1024; 46 | 47 | public RegexWriter(Transformer t,Writer w) { 48 | this.w = w; 49 | ww = new WriterWrap(w); 50 | repr = t.getReplacer(); 51 | repr.setBuffer(new StringBufferLike(ww)); 52 | repr.setSource(wrap); 53 | } 54 | public RegexWriter(Regex r,Writer w) { 55 | this.w = w; 56 | ww = new WriterWrap(w); 57 | repr = r.getReplacer(); 58 | repr.setBuffer(new StringBufferLike(ww)); 59 | repr.setSource(wrap); 60 | } 61 | 62 | char EOLchar = '\n'; 63 | /** This method no longer serves any purpose. 64 | @deprecated 65 | */ 66 | public char getEOLchar() { 67 | return EOLchar; 68 | } 69 | /** This method no longer serves any purpose. 70 | @deprecated 71 | */ 72 | public void setEOLchar(char c) { 73 | EOLchar = c; 74 | } 75 | 76 | int max_lines=2; 77 | /** This method no longer serves any purpose. 78 | @deprecated 79 | */ 80 | public int getMaxLines() { return max_lines; } 81 | /** This method no longer serves any purpose. 82 | @deprecated 83 | */ 84 | public void setMaxLines(int ml) { max_lines = ml; } 85 | 86 | void write() throws IOException { 87 | Regex rex = repr.getRegex(); 88 | int eposOld = epos; 89 | if(rex.matchAt(wrap,epos) && !wrap.overRun) { 90 | while(pos < epos) 91 | w.write(sb.charAt(pos++)); 92 | int to = rex.matchedTo(); 93 | repr.setPos(to); 94 | repr.apply(rex,rex.getReplaceRule()); 95 | epos = pos = to; 96 | if(epos == eposOld && epos < sb.length()) 97 | epos++; 98 | } else if(!wrap.overRun && epos < sb.length()) { 99 | epos++; 100 | } 101 | while(pos < epos) 102 | w.write(sb.charAt(pos++)); 103 | if(epos == sb.length()) { 104 | sb.setLength(1); 105 | pos = epos = 1; 106 | } else if(pos > bufferSize) { 107 | for(int i=bufferSize;iCopyright 2001, Steven R. Brandt 13 | */ /** 14 | Class Pattern is the base class on which all the other pattern 15 | elements are built. */ 16 | 17 | public abstract class Pattern { 18 | /** The ESC character, the user can provide his own value 19 | for the escape character through regex.esc */ 20 | public final static char ESC = '\\'; 21 | final static String PROTECT_THESE = "[]{}(),$,-\"^."; 22 | 23 | /** The interal match function, it must be provided by any 24 | class which wishes to extend Pattern. */ 25 | public abstract int matchInternal(int i,Pthings p); 26 | public abstract String toString(); 27 | 28 | // Class Pattern is a singly linked list 29 | // chained together by member next. The member 30 | // parent is used so that sub patterns can access 31 | // the chain they are branching from. 32 | Pattern next=null,parent=null; 33 | 34 | /** This gets the next element of a Pattern that 35 | we wish to match. If we are at the end of a 36 | subchain of patterns, it will return us to the 37 | parent chain. */ 38 | public Pattern getNext() { 39 | return next != null ? next : 40 | (parent == null ? null : parent.getNext()); 41 | } 42 | /** Call this method if you have a pattern element that 43 | takes a sub pattern (such as Or), and 44 | after you have added a sub pattern to the current 45 | pattern element. */ 46 | public void setParent(Pattern p) { 47 | if(next != null) next.setParent(p); 48 | else parent = p; 49 | } 50 | /** This determines if the remainder of a Pattern 51 | matches. Type "return nextMatch" from within 52 | matchInternal if the current 53 | Pattern matches. Otherwise, return a -1.*/ 54 | public int nextMatch(int i,Pthings pt) { 55 | Pattern p = getNext(); 56 | /*if(p == null) return i; 57 | return p.matchInternal(i,pt);*/ 58 | return p==null ? i : p.matchInternal(i,pt); 59 | } 60 | /** This is a toString() for the remainder 61 | of the Pattern elements after this one. 62 | use this when overriding toString(). Called from 63 | within toString(). */ 64 | public String nextString() { 65 | if(next == null) return ""; 66 | return next.toString(); 67 | } 68 | 69 | /** a method to detect whether char c is in String s */ 70 | final static boolean inString(char c,String s) { 71 | int i; 72 | for(i=0;iCopyright 2001, Steven R. Brandt 14 | */ /** 15 | This class is used to store a result from Regex */ 16 | public class RegRes implements Cloneable { 17 | protected int[] marks = null; 18 | protected boolean didMatch_ = false; 19 | protected StringLike src=null; 20 | 21 | /** Obtain the text String that was matched against. */ 22 | public String getString() { return src.toString(); } 23 | /** Obtain the source StringLike object. */ 24 | public StringLike getStringLike() { return src; } 25 | protected int charsMatched_=0,matchFrom_=0,numSubs_=0; 26 | public String toString() { 27 | StringBuffer sb = new StringBuffer(); 28 | sb.append("match="+matchedFrom()+":"+charsMatched()); 29 | if(!didMatch()) return sb.toString(); 30 | for(int i=0;inumSubs_) return -1; 87 | //Integer in=(Integer)marks.get("left"+i); 88 | //return in == null ? -1 : in.intValue(); 89 | return marks[i]; 90 | } 91 | /** Obtains the number of characters matched by backreference i, or 92 | -1 if backreference i was not matched. */ 93 | public int charsMatched(int i) { 94 | if(marks==null||i>numSubs_||!didMatch_) return -1; 95 | //Integer in = (Integer)marks.get("right"+i); 96 | //int i2 = in==null ? -1 : in.intValue(); 97 | int mf = matchedFrom(i); 98 | return mf < 0 ? -1 : marks[i+numSubs_]-matchedFrom(i); 99 | } 100 | /** This is either equal to matchedFrom(i)+charsMatched(i) if the match 101 | was successful, or -1 if it was not. */ 102 | public int matchedTo(int i) { 103 | if(marks==null||i>numSubs_||!didMatch_) return -1; 104 | return marks[i+numSubs_]; 105 | } 106 | /** Obtains a substring matching the nth set 107 | of parenthesis from the pattern. See 108 | numSubs(void), or null if the nth backrefence did 109 | not match. */ 110 | public String stringMatched(int i) { 111 | int mf = matchedFrom(i), cm = charsMatched(i); 112 | return !didMatch_ || mf<0 || cm<0 ? null : 113 | src.substring(mf,mf+cm); 114 | } 115 | /** This returns the part of the string that preceeds the match, 116 | or null if the match failed.*/ 117 | public String left() { 118 | int mf = matchedFrom(); 119 | return !didMatch_ || (mf<0) ? null : src.substring(0,mf); 120 | } 121 | /** This returns the part of the string that follows the ith 122 | backreference, or null if the backreference did not match. */ 123 | public String left(int i) { 124 | int mf = matchedFrom(i); 125 | return !didMatch_ || (mf<0) ? null : src.substring(0,mf); 126 | } 127 | /** This returns the part of the string that follows the match, 128 | or null if the backreference did not match.*/ 129 | public String right() { 130 | int mf = matchedFrom(), cm = charsMatched(); 131 | return !didMatch_ || mf<0 || cm<0 ? null : src.substring(mf+ 132 | cm,src.length()); 133 | } 134 | /** This returns the string to the right of the ith backreference, 135 | or null if the backreference did not match. */ 136 | public String right(int i) { 137 | int mf = matchedFrom(i), cm = charsMatched(i); 138 | return !didMatch_ || mf<0 || cm<0 ? null : 139 | src.substring(mf+cm,src.length()); 140 | } 141 | /** After a successful match, this returns the location of 142 | the first matching character, or -1 if the match failed.*/ 143 | public int matchedFrom() { return !didMatch_ ? -1 : matchFrom_; } 144 | /** After a successful match, this returns the number of 145 | characters in the match, or -1 if the match failed. */ 146 | public int charsMatched() { return !didMatch_||matchFrom_<0 ? -1 : charsMatched_; } 147 | /** This is matchedFrom()+charsMatched() after a successful match, 148 | or -1 otherwise. */ 149 | public int matchedTo() { return !didMatch_ ? -1 : matchFrom_+charsMatched_;} 150 | /** This returns the number of 151 | backreferences (parenthesis) in the pattern, 152 | i.e. the pattern "(ab)" has 153 | one, the pattern "(a)(b)" has two, etc. */ 154 | public int numSubs() { return numSubs_; } 155 | /** Contains true if the last match was successful. */ 156 | public boolean didMatch() { return didMatch_; } 157 | 158 | /** An older name for matchedFrom. */ 159 | public int matchFrom() { return matchedFrom(); } 160 | /** An older name for stringMatched(). */ 161 | public String substring() { return stringMatched(); } 162 | /** An older name for matchedFrom. */ 163 | public int matchFrom(int i) { return matchedFrom(i); } 164 | /** An older name for stringMatched. */ 165 | public String substring(int i) { return stringMatched(i); } 166 | } 167 | -------------------------------------------------------------------------------- /com/stevesoft/pat/RegexReader.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | import java.io.*; 11 | import com.stevesoft.pat.wrap.*; 12 | 13 | /** This class allows you to replace the text in strings 14 | as you read them in. Be careful what you do with 15 | this freedom... using Regex.perlCode("s{.*}{x}s") 16 | as your pattern will result in loading the entire 17 | contents of the Reader into memory. 18 | */ 19 | public class RegexReader extends Reader { 20 | RBuffer rb = new RBuffer(new StringBuffer()); 21 | PartialBuffer wrap = new PartialBuffer(rb.sb); 22 | boolean moreToRead = true; 23 | Reader r; 24 | Replacer rp; 25 | 26 | // the buffer size 27 | int nmax = 2*1024; 28 | 29 | public RegexReader(Regex rex,Reader r) { 30 | this.r = r; 31 | rp = rex.getReplacer(); 32 | } 33 | public RegexReader(Transformer tex,Reader r) { 34 | this.r = r; 35 | rp = tex.getReplacer(); 36 | } 37 | public void reset() throws IOException { 38 | r.reset(); 39 | rb = new RBuffer(new StringBuffer()); 40 | wrap = new PartialBuffer(rb.sb); 41 | moreToRead = true; 42 | } 43 | void readData() throws IOException { 44 | int c; 45 | int n = 0; 46 | while( (c = r.read()) != -1) { 47 | rb.sb.append((char)c); 48 | if(n++ > nmax) 49 | break; 50 | } 51 | if(c == -1 && n == 0) { 52 | moreToRead = false; 53 | wrap.allowOverRun = false; 54 | } 55 | } 56 | void getMoreData() throws IOException { 57 | while(rb.pos >= rb.epos) { 58 | wrap.overRun = false; 59 | if(rb.next != null) { 60 | rb = rb.next; 61 | } else if(rb.done) { 62 | break; 63 | } else if(rb.epos >= rb.sb.length() 64 | && rb.epos > nmax) { 65 | rb.pos = 1; 66 | rb.epos = 1; 67 | rb.sb.setLength(1); 68 | readData(); 69 | } else if(rb.epos >= rb.sb.length() 70 | && moreToRead) { 71 | readData(); 72 | } else if(rp.getRegex().matchAt(wrap,rb.epos)) { 73 | if(wrap.overRun) { 74 | readData(); 75 | } else { 76 | StringBufferWrap sbw = new StringBufferWrap(); 77 | StringBufferLike sbl = new StringBufferLike(sbw); 78 | /* 79 | ReplaceRule rr = rex.getReplaceRule(); 80 | while(rr != null) { 81 | rr.apply(sbl,rex); 82 | rr = rr.next; 83 | } 84 | */ 85 | Regex rex = rp.getRegex(); 86 | int npos = rex.matchedTo(); 87 | rp.setBuffer(sbl); 88 | rp.setSource(wrap); 89 | rp.setPos(npos); 90 | rp.apply(rex,rex.getReplaceRule()); 91 | int opos = rb.epos; 92 | RBuffer rb2 = new RBuffer((StringBuffer)sbw.unwrap()); 93 | rb2.epos = rb2.sb.length(); 94 | RBuffer rb3 = new RBuffer(rb.sb); 95 | 96 | rb.next = rb2; 97 | rb2.next = rb3; 98 | 99 | if(npos == opos) { 100 | rb3.epos = npos+1; 101 | if(rb3.epos > rb3.sb.length()) { 102 | if(rb.pos >= rb.epos) 103 | rb = rb.next; 104 | rb3.pos = rb3.epos = 0; 105 | rb3.done = true; 106 | //break; 107 | } 108 | rb3.pos = npos; 109 | } else { 110 | rb3.pos = rb3.epos = npos; 111 | } 112 | 113 | } 114 | } else { 115 | if(wrap.overRun) { 116 | readData(); 117 | } else if(rb.epos= rb.epos) { 127 | getMoreData(); 128 | if(rb.pos >= rb.epos) 129 | return -1; 130 | } 131 | //System.out.println(rb); 132 | return rb.sb.charAt(rb.pos++); 133 | } 134 | public int read(char[] buf,int off,int len) 135 | throws IOException 136 | { 137 | int c = -1; 138 | int end = off+len; 139 | for(int i=off;i 2 | 3 | 4 | Tutorial for Regular Expressions in Java, Part 3 5 | 6 | 7 | 8 | 9 | 10 | 11 | 138 | 141 |
    12 | 13 | 14 | 20 | 59 | 137 |
      15 |

    Regular Expressions in Java

    16 | 17 |

    Package com.stevesoft.pat version 1.5

    18 |
    19 |
    22 | 23 |

    24 | 38 | Online help...
    39 | Quick Start
    40 | Tutorial Part 1
    41 | Tutorial Part 2
    42 | Tutorial Part 3
    43 | Tutorial Part 4
    44 | Tutorial Part 5
    45 | Tutorial Part 6
    46 | 58 |

    60 | 61 |
    62 |

    Tutorial Part 3

    63 |

    Pattern Elements

    64 |

    \A, \Z, ^, $, \b, \B

    65 |
    66 | There pattern elements "^" and "\A" matches the beginning of a String. 67 |
    Regex r = new Regex("^.....");
     68 | 
     69 | r.search("Hello world.");
     70 | System.out.println(r.stringMatched());
     71 | // Prints "Hello"
     72 | 
     73 | r.search(" Hello world.");
     74 | System.out.println(r.didMatch());
     75 | // Prints "false"
     76 | 
    77 | 78 | Likewise, the pattern element "$" or "\Z" matches the end of 79 | a String. 80 |
    Regex r = new Regex(".......$");
     81 | 
     82 | r.search("Say goodbye");
     83 | System.out.println(r.stringMatched());
     84 | // Prints "goodbye"
     85 | 
    86 | 87 | You may, however, be interested in matching on a 88 | different sort of boundary, a "word boundary." This 89 | is the sort of thing you search for when you select 90 | a "match whole word" option from a search dialog 91 | box in a word processor. For example, suppose you 92 | wish to match on the word "some" but not words like 93 | "somehow" or "twosome." 94 |
    Regex r = new Regex("\\bsome\\b");
     95 | 
     96 | r.search("somehow");
     97 | System.out.println(""+r.didMatch());
     98 | // Prints "false"
     99 | 
    100 | r.search("twosome");
    101 | System.out.println(""+r.didMatch());
    102 | // Prints "false"
    103 | 
    104 | r.search("some");
    105 | System.out.println(""+r.didMatch());
    106 | // Prints "true"
    107 | 
    108 | The "\\b" pattern element matches on the space 109 | between a word character ("\\w" or "[a-zA-Z0-9_]") 110 | or a non-word character ("\\W" or "[^a-zA-Z0-9_]"). 111 | It will also match on the beginning or end of a 112 | String. 113 |

    114 | There is also a "\\B" that will fail to match on a 115 | boundary that is a word boundary. In this example 116 | we want to match on a word that begins with the word 117 | word "some" but does not include the word "some." 118 |

    Regex r = new Regex("\\bsome\\B");
    119 | 
    120 | r.search("somehow");
    121 | System.out.println(""+r.didMatch());
    122 | // Prints "true"
    123 | 
    124 | r.search("twosome");
    125 | System.out.println(""+r.didMatch());
    126 | // Prints "false"
    127 | 
    128 | r.search("some");
    129 | System.out.println(""+r.didMatch());
    130 | // Prints "false"
    131 | 
    132 |
    133 | Previous 134 | Next 135 | 136 |
    142 | 143 | 144 | -------------------------------------------------------------------------------- /com/stevesoft/pat/SkipBMH.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | 10 | import com.stevesoft.pat.wrap.StringWrap; 11 | 12 | /** Like Skip, but implements a 13 | 14 | Boyer-Moore-Horspool type search 15 | method that has been modified to be more like a "T-search" (see 16 | the Michael Tamm''s article in C'T, magazin fuer computer und technic, August 97 17 | p 292). Yet another important source of information for me was 18 | the 19 | Deep Magic article on string searching. As of this writing, I can 20 | beat String's indexOf method in many cases. 21 | @see com.stevesoft.pat.Skip 22 | @see com.stevesoft.pat.Skip2 23 | */ 24 | public class SkipBMH extends Skip { 25 | // This number could be 256, but I think it's 26 | // big enough. Note, it must be a power of 2. 27 | final int MAX_CHAR = 64; 28 | final char[] skip = new char[MAX_CHAR]; 29 | int sm1; 30 | int jump_ahead = 0; 31 | char uc,lc,tc,x; 32 | final boolean exact(char c) { 33 | return (ign && anyc(c))||c==x; 34 | } 35 | final boolean anyc(char c) { 36 | return c==uc||c==lc||c==tc; 37 | } 38 | public SkipBMH(String pt,boolean ign) { this(pt,ign,0); } 39 | public SkipBMH(String pt) { this(pt,false,0); } 40 | public SkipBMH(String pt,boolean ign,int offset) { 41 | super(pt,ign,offset); 42 | for(int k=0;k vend) return -1; 109 | } 110 | } 111 | } else { 112 | for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { 113 | // table look-up is expensive, avoid it if possible 114 | if( x==s.charAt(k) ) { 115 | //if(src.regionMatches(0,s,k-sm1,sm1)) 116 | if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) 117 | return k-sm1-offset; 118 | k += jump_ahead; 119 | } 120 | } 121 | for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { 122 | // table look-up is expensive, avoid it if possible 123 | if( x==s.charAt(k) ) { 124 | //if(src.regionMatches(0,s,k-sm1,sm1)) 125 | if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) 126 | return k-sm1-offset; 127 | k += jump_ahead; 128 | if(k > vend) return -1; 129 | } 130 | } 131 | } 132 | 133 | return -1; 134 | } 135 | public int find(StringLike s,int start,int end) { 136 | if(s instanceof StringWrap) 137 | return find(s.toString(),start,end); 138 | start += offset+sm1; 139 | int vend = min(s.length()-1,end+sm1+offset),k; 140 | int vend1 = vend-jump_ahead; 141 | if(ign) { 142 | for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { 143 | // table look-up is expensive, avoid it if possible 144 | if( anyc(s.charAt(k)) ) { 145 | if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1)) 146 | return k-sm1-offset; 147 | k += jump_ahead; 148 | } 149 | } 150 | for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { 151 | // table look-up is expensive, avoid it if possible 152 | if( anyc(s.charAt(k)) ) { 153 | if(CaseMgr.regionMatches(src,ign,0,s,k-sm1,sm1)) 154 | return k-sm1-offset; 155 | k += jump_ahead; 156 | if(k > vend) return -1; 157 | } 158 | } 159 | } else { 160 | for(k=start; k <= vend1;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { 161 | // table look-up is expensive, avoid it if possible 162 | if( x==s.charAt(k) ) { 163 | //if(src.regionMatches(0,s,k-sm1,sm1)) 164 | if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) 165 | return k-sm1-offset; 166 | k += jump_ahead; 167 | } 168 | } 169 | for(; k <= vend;k += skip[s.charAt(k) & (MAX_CHAR-1)] ) { 170 | // table look-up is expensive, avoid it if possible 171 | if( x==s.charAt(k) ) { 172 | //if(src.regionMatches(0,s,k-sm1,sm1)) 173 | if(CaseMgr.regionMatches(src,false,0,s,k-sm1,sm1)) 174 | return k-sm1-offset; 175 | k += jump_ahead; 176 | if(k > vend) return -1; 177 | } 178 | } 179 | } 180 | 181 | return -1; 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /com/stevesoft/pat/FileRegex.java: -------------------------------------------------------------------------------- 1 | // 2 | // This software is now distributed according to 3 | // the Lesser Gnu Public License. Please see 4 | // http://www.gnu.org/copyleft/lesser.txt for 5 | // the details. 6 | // -- Happy Computing! 7 | // 8 | package com.stevesoft.pat; 9 | import java.io.*; 10 | import java.util.*; 11 | 12 | /** This class is a different form of Regex designed to work more 13 | like the file matching utility of a Unix shell. It is implemented 14 | by some simple string transformations: 15 |

    16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 |
    FileRegex Regex
    * .*
    . \.
    { (?:
    {?! (?!
    {?= (?=
    {?? (??
    } )
    ? .
    {,} (|)
    28 |
    29 | Note that a FileRegex pattern always ends with the Regex 30 | pattern element "$". If you like to experiment, try making 31 | FileRegex's and then printing them out. The toString() method 32 | does a decompile of the pattern to a standard Regex. Here are 33 | some more complete examples: 34 |
    35 | 36 | 37 | 38 | 39 | 40 |
    FileRegex Regex
    *.java .*\.java$
    *.{java,html} .*\.(java|html)$
    foo.[chC] foo.[chC]$
    41 |
    42 | */ 43 | public class FileRegex extends Regex { 44 | /** Build an unitialized FileRegex. */ 45 | public FileRegex() { dirflag=EITHER; } 46 | /** Build a FileRegex form String s. */ 47 | public FileRegex(String s) { 48 | super(s); 49 | dirflag = EITHER; 50 | } 51 | /** Compile a new pattern. 52 | Throws @exception com.stevesoft.pat.RegSyntax for 53 | nonsensical patterns like "[9-0]+" just as Regex does. 54 | @see com.stevesoft.pat#compile(java.lang.String) 55 | */ 56 | public void compile(String s) throws RegSyntax { 57 | String npat = toFileRegex(s); 58 | super.compile(npat); 59 | if(File.separatorChar == '\\') // MS-DOS 60 | ignoreCase = true; 61 | } 62 | /** This is the method required by FileNameFilter. 63 | To get a listing of files in the current directory 64 | ending in .java, do this: 65 |
     66 |         File dot = new File(".");
     67 |         FileRegex java_files = new FileRegex("*.java");
     68 |         String[] file_list = dot.list(java_files);
     69 |         
    70 | */ 71 | public boolean accept(File dir,String s) { 72 | if(dirflag != EITHER) { 73 | File f = new File(s); 74 | if(f.isDirectory() && dirflag == NONDIR) 75 | return false; 76 | if(!f.isDirectory() && dirflag == DIR) 77 | return false; 78 | } 79 | return matchAt(s,0); 80 | } 81 | int dirflag = 0; 82 | final static int EITHER=0,DIR=1,NONDIR=2; 83 | 84 | /** Provides an alternative to File.list -- this 85 | separates its argument according to File.pathSeparator. 86 | To each path, it splits off a directory -- all characters 87 | up to and including the first instance of File.separator -- 88 | and a file pattern -- the part that comes after the directory. 89 | It then produces a list of all the pattern matches on all 90 | the paths. Thus "*.java:../*.java" would produce a list of 91 | all the java files in this directory and in the ".." directory 92 | on a Unix machine. "*.java;..\\*.java" would do the same thing 93 | on a Dos machine. */ 94 | public static String[] list(String f) { 95 | return list(f,EITHER); 96 | } 97 | static String[] list(String f,int df) { 98 | //return list_(f,new FileRegex()); 99 | StringTokenizer st = new StringTokenizer(f,File.pathSeparator); 100 | Vector v = new Vector(); 101 | while(st.hasMoreTokens()) { 102 | String path = st.nextToken(); 103 | list1(path,v,df,true); 104 | } 105 | String[] sa = new String[v.size()]; 106 | v.copyInto(sa); 107 | return sa; 108 | } 109 | final static Regex root=new Regex(File.separatorChar=='/' ? 110 | "/$" : "(?:.:|)\\\\$"); 111 | static void list1(String path,Vector v,int df,boolean rec) { 112 | // if path looks like a/b/c/ or d:\ then add . 113 | if(root.matchAt(path,0)) { 114 | v.addElement(path+"."); 115 | return; 116 | } 117 | File f = new File(path); 118 | if(f.getParent() != null && rec) { 119 | Vector v2 = new Vector(); 120 | list1(f.getParent(),v2,DIR,true); 121 | for(int i=0;i 2 | 3 | 4 | Tutorial for Regular Expressions in Java, Part 5 | 6 | 7 | 8 | 9 | 10 | 11 | 138 | 141 |
    12 | 13 | 14 | 20 | 59 | 137 |
      15 |

    Regular Expressions in Java

    16 | 17 |

    Package com.stevesoft.pat version 1.5

    18 |
    19 |
    22 | 23 |

    24 | 38 | Online help...
    39 | Quick Start
    40 | Tutorial Part 1
    41 | Tutorial Part 2
    42 | Tutorial Part 3
    43 | Tutorial Part 4
    44 | Tutorial Part 5
    45 | Tutorial Part 6
    46 | 58 |

    60 | 61 |
    62 |

    Tutorial Part 4

    63 |

    Pattern Elements

    64 |

    {1,}?, (?#), \1, \2, \G

    65 |
    66 | We only have a few pattern elements left to explore before 67 | the syntax of Perl 5 is covered. The first is to follow 68 | a pattern element such as {2,}, {3,8}, +, *, ? by a question 69 | mark. The effect of this is to make the pattern matching less 70 | hungry, instead of matching the most times it can the pattern 71 | matcher will attempt to match the fewest number of times it can. 72 |
    Regex r = new Regex("\\d+?");
     73 | r.search("36454");
     74 | System.out.println(r.stringMatched());
     75 | // Prints "3"
     76 | 
     77 | r = new Regex("\\d*?4");
     78 | r.search("36454");
     79 | System.out.println(r.stringMatched());
     80 | // Prints "364"
     81 | 
    82 | Now suppose you want to match the text between single quotes. 83 | You would want to use a minimumal match, not a maximal. 84 |
    Regex r = new Regex("'.*'");
     85 | r.search("  'hello' and 'world'  ");
     86 | System.out.println(r.stringMatched());
     87 | // Prints "'hello' and 'world'"
     88 | // This isn't really what we wanted, we got
     89 | // the text inside two sets of ''s as well as
     90 | // some from in between.
     91 | 
     92 | r = new Regex("'.*?'");
     93 | r.search("  'hello' and 'world' ");
     94 | System.out.println(r.stringMatched());
     95 | // Prints 'hello'
     96 | 
    97 | But let's get a little more fancy. Suppose we want 98 | to match the text between either single or double quotes. 99 | We can do this as follows 100 |
    Regex r = new Regex("(['\"]).*?\\1");
    101 | r.search(" 'hello' and 'world' ");
    102 | System.out.println(r.stringMatched());
    103 | // Prints "'hello'"
    104 | 
    105 | r.search(" \"hello\" and \"world\" ");
    106 | System.out.println(r.stringMatched());
    107 | // Prints "\"hello\""
    108 | 
    109 | The "\\1" matches the text contained in the first backreference. 110 | You can likewise use "\\2" to match the second backreference (if 111 | you have a second backreference). 112 |

    113 | If you wish one search to pick up where the last one left off you 114 | can use the "\G" pattern element. If the string hasn't been searched 115 | before, then "\G" matches the beginning of the String. 116 |

    Regex r = new Regex("\\Gfoo");
    117 | String x = "foofoo foo";
    118 | System.out.println(r.search(x));
    119 | System.out.println(r.search(x));
    120 | System.out.println(r.search(x));
    121 | // Prints true, true, false.
    122 | 
    123 |

    124 | Finally, you can add comments inside your patterns using the "(?#)" 125 | notation. This last pattern is the same as the previous one, it 126 | just has a comment added. It works exactly the same, however. 127 |

    Regex r = new Regex("(['\"]).*?\\1(?# I like this pattern)");
    128 | 
    129 | This completes the set of pattern elements that exist in perl 5. 130 |

    131 | For a pattern summary table click here. 132 |


    133 | Previous 134 | Next 135 | 136 |
    142 | 143 | 144 | --------------------------------------------------------------------------------