= 0) {
33 | pn = i;
34 | return r;
35 | }
36 | }
37 | pn = -1;
38 | return -1;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Pthings.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.*;
10 |
11 | /**
12 | Things that need to be kept track of during a
13 | match. Passed along with Pattern.matchInternal. */
14 | public class Pthings {
15 | /** The current text we are attempting to match. */
16 | public StringLike src;
17 | /** Whether we should ignore the case of letters in
18 | this match. */
19 | public boolean ignoreCase;
20 | public boolean mFlag;
21 | /** The mask to use when dontMatchInQuotes is set. */
22 | public BitSet cbits;
23 | /** Used to keep track of backreferences. */
24 | //public Hashtable marks;
25 | public int[] marks;
26 | public int nMarks;
27 | /** Used to set the behavior of "." By default, it
28 | now fails to match the '\n' character. */
29 | public boolean dotDoesntMatchCR;
30 | /** Determine if Skipped strings need to be checked. */
31 | public boolean no_check;
32 | int lastPos;
33 | }
34 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/wrap/WriterWrap.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat.wrap;
9 |
10 | import com.stevesoft.pat.*;
11 | import java.io.*;
12 |
13 | /** Allows the outcome of a replaceAll() or replaceFirst()
14 | to be directed to a Writer rather than a String.
15 |
16 | The method toStringLike() cannot work, however.
17 | This means that the return value of replaceAll() will
18 | be null if this Object is used as the StringBufferLike.*/
19 | public class WriterWrap
20 | implements BasicStringBufferLike
21 | {
22 | Writer w;
23 | public WriterWrap(Writer w) {
24 | this.w = w;
25 | }
26 | public void append(char c) {
27 | try {
28 | w.write((int)c);
29 | } catch(IOException ioe) {}
30 | }
31 | public void append(String s) {
32 | try {
33 | w.write(s);
34 | } catch(IOException ioe) {}
35 | }
36 |
37 | /** This operation can't really be done. */
38 | public StringLike toStringLike() {
39 | return null;
40 | }
41 |
42 | public Object unwrap() {
43 | return w;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/wrap/CharArrayWrap.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat.wrap;
9 |
10 | import com.stevesoft.pat.*;
11 |
12 | /** This provides a wrapper for a char array so that
13 | it can be searched by Regex. */
14 | public class CharArrayWrap implements StringLike {
15 | char[] ca;
16 | public char[] getCharArray() { return ca; }
17 | public CharArrayWrap(char[] ca) { this.ca = ca; }
18 | public String toString() {
19 | return new String(ca);
20 | }
21 | public char charAt(int i) { return ca[i]; }
22 | public int length() { return ca.length; }
23 | public String substring(int i1,int i2) {
24 | StringBuffer sb = new StringBuffer();
25 | for(int i=i1;i end) return -1;
28 | start += offset;
29 | int vend = min(s.length()-2,end+offset);
30 | for(int i=start;i<=vend;i++)
31 | if(0 == (s.charAt(i)&mask) && 0 == (s.charAt(i+1)&mask1)) {
32 | //if(m1||s.regionMatches(ign,i,src,0,src.length()) )
33 | if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) )
34 | return i-offset;
35 | }
36 | return -1;
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/End.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | //class AddToEnd extends RegSyntax {};
12 |
13 | /** Compiles the '$' or the '\Z' Pattern. It is
14 | an error to have further Pattern elements after
15 | '\Z'. It is the end of the String. */
16 | class End extends Pattern {
17 | boolean retIsEnd;
18 | End(boolean b) { retIsEnd = b; }
19 | public int matchInternal(int pos,Pthings pt) {
20 | if(retIsEnd && pt.mFlag && pos < pt.src.length()) {
21 | if(pt.src.charAt(pos)=='\n') {
22 | return nextMatch(pos,pt);
23 | }
24 | }
25 | if(pt.src.length() == pos)
26 | return nextMatch(pos,pt);
27 | else if(posderiv2.java
13 | and deriv3.java
14 | in the test directory.
15 | @see com.stevesoft.pat.CustomEndpoint
16 | */
17 | class Custom extends PatternSub {
18 | String select;
19 | Validator v;
20 | int start;
21 | Custom(String s) {
22 | select = s;
23 | v = (Validator)Regex.validators.get(s);
24 | }
25 | public int matchInternal(int pos,Pthings pt) {
26 | start = pos;
27 | return sub.matchInternal(pos,pt);
28 | }
29 | public String toString() {
30 | String a = v.argsave == null ? "" : ":"+v.argsave;
31 | return "(??"+select+a+")"+nextString();
32 | }
33 | public patInt minChars() { return v.minChars(); }
34 | public patInt maxChars() { return v.maxChars(); }
35 | Pattern clone1(Hashtable h) {
36 | Custom c = new Custom(select);
37 | h.put(c,c);
38 | h.put(this,c);
39 | c.sub = sub.clone(h);
40 | return c;
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Group.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | /** This class implements the (?@<>) syntax that matches
12 | a balanced parenthesis. Not in perl 5. */
13 | class Group extends Pattern {
14 | char op,cl;
15 | Group(char opi,char cli) {
16 | op = opi;
17 | cl = cli;
18 | }
19 | public int matchInternal(int pos,Pthings pt) {
20 | int i,count=1;
21 | if(pos < pt.src.length())
22 | if(!Masked(pos,pt) && pt.src.charAt(pos) != op)
23 | return -1;
24 | for(i=pos+1;i
13 | The StringLike object will not change. Calls to
14 | charAt(int) will not vary with time.
15 | The length of the object being searched is known
16 | before the search begins and does not vary with time.
17 |
18 | Note that searching String is probably faster than searching
19 | other objects, so searching String is still preferred if
20 | possible.
21 | */
22 | public interface StringLike {
23 | public char charAt(int i);
24 | public String toString();
25 | public int length();
26 | public String substring(int i1,int i2);
27 | /** Obtain the underlying object, be it a String, char[],
28 | RandomAccessFile, whatever. */
29 | public Object unwrap();
30 | /** By default, the result is put in a String or char[]
31 | when a replace is done. If you wish to save the result
32 | in some other StringBufferLike then you can do this
33 | by implementing this method, or over-riding it's behavior
34 | from an existing class. */
35 | public BasicStringBufferLike newStringBufferLike();
36 | public int indexOf(char c);
37 | }
38 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Bracket.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Vector;
10 | import java.util.Hashtable;
11 |
12 | /** The Bracket is a form of the Or class,
13 | implements the pattern element [ ]. */
14 | class Bracket extends Or {
15 | boolean neg;
16 | Bracket(boolean n) { neg = n; }
17 | String leftForm() {
18 | if(neg)
19 | return "[^";
20 | else
21 | return "[";
22 | }
23 | String rightForm() { return "]"; }
24 | String sepForm() { return ""; }
25 | public int matchInternal(int pos,Pthings pt) {
26 | if(pos >= pt.src.length()) return -1;
27 | int r = super.matchInternal(pos,pt);
28 | if((neg && r<0)||(!neg && r>=0))
29 | return nextMatch(pos+1,pt);
30 | return -1;
31 | }
32 | public patInt minChars() { return new patInt(1); }
33 | public patInt maxChars() { return new patInt(1); }
34 |
35 | public Or addOr(Pattern p) {
36 | pv = null;
37 | v.addElement(p);
38 | p.setParent(null);
39 | return this;
40 | }
41 | public Pattern clone1(Hashtable h) {
42 | Bracket b = new Bracket(neg);
43 | b.v = new Vector();
44 | for(int i=0;i= 0) {
23 | if(reverse) return -1;
24 | else return nextMatch(pos,pt);
25 | } else {
26 | if(reverse) return nextMatch(pos,pt);
27 | else return -1;
28 | }
29 | }
30 | String leftForm() {
31 | if(reverse)
32 | return "(?!";
33 | else
34 | return "(?=";
35 | }
36 | public patInt minChars() { return new patInt(0); }
37 | public patInt maxChars() { return new patInt(0); }
38 | Pattern clone1(Hashtable h) {
39 | lookAhead la=new lookAhead(reverse);
40 | h.put(this,la);
41 | h.put(la,la);
42 | for(int i=0;iCopyright 2001, Steven R. Brandt
13 | */ /**
14 | This class only exists to store data needed during the
15 | compilation of a regular expression. */
16 | public class Rthings {
17 | /** The numeric identity of the next () to be encountered
18 | while compiling the pattern. */
19 | public int val=Regex.BackRefOffset;
20 | /** Needed in case (?i) is encountered, to pass back the
21 | message that ignoreCase should be set. */
22 | public boolean ignoreCase;
23 | /** Needed in case (?Q) is encountered, to pass back the
24 | message that dontMatchInQuotes should be set. */
25 | public boolean dontMatchInQuotes;
26 | public boolean optimizeMe = false;
27 | public boolean noBackRefs = false;
28 | public int parenLevel = 0;
29 | boolean gFlag = false, mFlag = false, sFlag = false;
30 | Pattern p;
31 | Or o;
32 | Rthings(Regex r) {
33 | ignoreCase = r.ignoreCase;
34 | dontMatchInQuotes = r.dontMatchInQuotes;
35 | }
36 | void set(Regex r) {
37 | r.gFlag = gFlag;
38 | r.mFlag = mFlag;
39 | r.sFlag = sFlag;
40 | r.ignoreCase = ignoreCase;
41 | r.dontMatchInQuotes = dontMatchInQuotes;
42 | if(optimizeMe) r.optimize();
43 | }
44 | };
45 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/oneChar.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | /** This class matches a single character. */
12 | class oneChar extends Pattern {
13 | char c,altc,altc2;
14 | int mask;
15 | public oneChar(char ci) {
16 | c = ci;
17 | char cu,cl,ct;
18 | cu = CaseMgr.toUpperCase(c);
19 | cl = CaseMgr.toLowerCase(c);
20 | ct = CaseMgr.toTitleCase(c);
21 | if(c == cu) {
22 | altc = cl;
23 | altc2 = ct;
24 | } else if(c == cl) {
25 | altc = cu;
26 | altc2 = ct;
27 | } else {
28 | altc = cl;
29 | altc2 = cu;
30 | }
31 | mask = c & altc & altc2;
32 | }
33 | public int matchInternal(int pos,Pthings pt) {
34 | char p;
35 | int ret=-1;
36 | if (pos < pt.src.length() && !Masked(pos,pt)
37 | && ((p=pt.src.charAt(pos))==c ||
38 | (pt.ignoreCase&& (p==altc||p==altc2) ) ))
39 | ret = nextMatch(pos+1,pt);
40 | return ret;
41 | }
42 | public String toString() {
43 | return protect(""+c,PROTECT_THESE,ESC)+nextString();
44 | }
45 | public patInt minChars() { return new patInt(1); }
46 | public patInt maxChars() { return new patInt(1); }
47 | Pattern clone1(Hashtable h) { return new oneChar(c); }
48 | };
49 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/apps/ColorLine.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat.apps;
9 | import java.awt.*;
10 | import java.util.*;
11 |
12 | /* A helper class for the ColorText class. This provides a single
13 | line of text with various colored parts. */
14 | public class ColorLine {
15 | Vector v = new Vector();
16 | public Object clone() {
17 | ColorLine cl = new ColorLine();
18 | cl.v = (Vector)v.clone();
19 | return cl;
20 | }
21 | int ColorLineWidth(FontMetrics fm) {
22 | int width = 0;
23 | int i;
24 | for(i=0;i pt.marks[id+pt.nMarks]) {
37 | int swap = pt.marks[id];
38 | pt.marks[id] = pt.marks[id+pt.nMarks]+1;
39 | pt.marks[id+pt.nMarks] = swap+1;
40 | }
41 | return ret;
42 | }
43 | public Pattern clone1(Hashtable h) {
44 | OrMark om = new OrMark(id);
45 | h.put(om,om);
46 | h.put(this,om);
47 | for(int i=0;i= 'a' && c <= 'z')
18 | return true;
19 | if(c >= 'A' && c <= 'Z')
20 | return true;
21 | if(c >= '0' && c <= '9')
22 | return true;
23 | if(c == '_')
24 | return true;
25 | return false;
26 | }
27 | boolean matchLeft(int pos,Pthings pt) {
28 | if(pos <= 0)
29 | return true;
30 | if(isAChar(pt.src.charAt(pos))
31 | && isAChar(pt.src.charAt(pos-1)))
32 | return false;
33 | return true;
34 | }
35 | boolean matchRight(int pos,Pthings pt) {
36 | if(pos < 0) return false;
37 | if(pos+1 >= pt.src.length())
38 | return true;
39 | if(isAChar(pt.src.charAt(pos))
40 | && isAChar(pt.src.charAt(pos+1)))
41 | return false;
42 | return true;
43 | }
44 | public int matchInternal(int pos,Pthings pt) {
45 | if(matchRight(pos-1,pt) || matchLeft(pos,pt))
46 | return nextMatch(pos,pt);
47 | return -1;
48 | }
49 | public patInt maxChars() { return new patInt(0); }
50 | public Pattern clone1(Hashtable h) { return new Boundary(); }
51 | };
52 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/StringBufferLike.java:
--------------------------------------------------------------------------------
1 | package//
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | com.stevesoft.pat;
9 |
10 | import com.stevesoft.pat.*;
11 |
12 | /** A tool that is used to make the \E, \U, \L, and \Q
13 | elements of a substitution. */
14 | public class StringBufferLike implements BasicStringBufferLike {
15 | BasicStringBufferLike sbl;
16 | public StringBufferLike(BasicStringBufferLike sbl) {
17 | this.sbl = sbl;
18 | }
19 | char mode = 'E', altMode = ' ';
20 | public StringLike toStringLike() {
21 | return sbl.toStringLike();
22 | }
23 | public String toString() {
24 | return sbl.toString();
25 | }
26 | public void append(char c) {
27 |
28 | switch(mode) {
29 | case 'u':
30 | mode = altMode;
31 | altMode = ' ';
32 | case 'U':
33 | sbl.append(CaseMgr.toUpperCase(c));
34 | break;
35 | case 'l':
36 | mode = altMode;
37 | altMode = ' ';
38 | case 'L':
39 | sbl.append(CaseMgr.toLowerCase(c));
40 | break;
41 | case 'Q':
42 | if((c >= 'a' && c <= 'z')
43 | || (c >= 'A' && c <= 'Z')
44 | || (c >= '0' && c <= '9'))
45 | ;
46 | else
47 | sbl.append('\\');
48 | default:
49 | sbl.append(c);
50 | break;
51 | }
52 | }
53 | public void append(String s) {
54 | for(int i=0;ihttp://unicode.org. */
12 | public class Prop {
13 | /** Is this a "Decimal Digit" according to Unicode? */
14 | public final static boolean isDecimalDigit(char c) {
15 | if(Bits.decimal_digit == null)
16 | Bits.decimal_digit_f();
17 | return Bits.decimal_digit.get(c);
18 | }
19 | /** Is this a "Alphabetic" according to Unicode? */
20 | public final static boolean isAlphabetic(char c) {
21 | if(Bits.letter == null)
22 | Bits.letter_f();
23 | return Bits.letter.get(c);
24 | }
25 | /** Is this a "Math" according to Unicode? */
26 | public final static boolean isMath(char c) {
27 | if(Bits.math == null)
28 | Bits.math_f();
29 | return Bits.math.get(c);
30 | }
31 |
32 | /** Is this a "Currency" according to Unicode? */
33 | public final static boolean isCurrency(char c) {
34 | if(Bits.currency == null)
35 | Bits.currency_f();
36 | return Bits.currency.get(c);
37 | }
38 |
39 | /** Is c a white space character according to Unicode? */
40 | public final static boolean isWhite(char c) {
41 | if(Bits.white == null)
42 | Bits.white_f();
43 | return Bits.white.get(c);
44 | }
45 |
46 | /** Is c a punctuation character according to Unicode? */
47 | public final static boolean isPunct(char c) {
48 | if(Bits.punct == null)
49 | Bits.punct_f();
50 | return Bits.punct.get(c);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Range.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | /** Thrown when one encounters things like [z-a] */
12 | class BadRangeArgs extends RegSyntax {};
13 |
14 | /** Implments a subelement (ranges) of the [] pattern element.
15 | For example, [a-z023] is implemented using a range and tree oneChar
16 | classes.
17 | @see Bracket
18 | @see oneChar
19 | */
20 | class Range extends Pattern {
21 | char lo,hi,altlo,althi;
22 | boolean printBrackets = false;
23 | public String toString() {
24 | String s=protect(""+lo,PROTECT_THESE,ESC)+"-"
25 | +protect(""+hi,PROTECT_THESE,ESC);
26 | if(!printBrackets)
27 | return s;
28 | return "["+s+"]";
29 | }
30 | Range(char loi,char hii) throws RegSyntax {
31 | lo = loi; hi = hii;
32 | oneChar o = null;
33 | if(lo >= hi)
34 | //throw new BadRangeArgs();
35 | RegSyntaxError.endItAll("Badly formed []'s : "+lo+" >= "+hi);
36 | o = new oneChar(lo);
37 | altlo = o.altc;
38 | o = new oneChar(hi);
39 | althi = o.altc;
40 | }
41 | public int matchInternal(int pos,Pthings pt) {
42 | if(pos >= pt.src.length()) return -1;
43 | if(Masked(pos,pt)) return -1;
44 | char c = pt.src.charAt(pos);
45 | if(lo <= c && c <= hi ||
46 | (pt.ignoreCase && (altlo <= c && c <= althi)))
47 | return nextMatch(pos+1,pt);
48 | return -1;
49 | }
50 | public patInt minChars() { return new patInt(1); }
51 | public patInt maxChars() { return new patInt(1); }
52 | public Pattern clone1(Hashtable h) {
53 | try {
54 | Range r = new Range(lo,hi);
55 | r.printBrackets = printBrackets;
56 | return r;
57 | } catch(RegSyntax rs) {
58 | return null;
59 | }
60 | }
61 | };
62 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Multi.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | /** Matches any number of instances of sub Pattern
12 | this was the hardest method to write. It implements
13 | '+', '*', '?', "{0,10}", "{5,}", "{5}", etc.
14 | @see pat.Multi_stage2
15 | @see pat.MultiMin
16 | */
17 | class Multi extends PatternSub {
18 | patInt a,b;
19 | public patInt minChars() { return a.mul(p.countMinChars()); }
20 | public patInt maxChars() { return b.mul(p.countMaxChars()); }
21 | Pattern p;
22 | Multi_stage2 st2;
23 | public boolean matchFewest = false;
24 | /**
25 | @param a The fewest number of times the sub pattern can match.
26 | @param b The maximum number of times the sub pattern can match.
27 | @param p The sub pattern.
28 | @see Multi_stage2
29 | @see MultiMin
30 | */
31 | public Multi(patInt a,patInt b,Pattern p) throws RegSyntax {
32 | if(!a.lessEq(b))
33 | throw new RegSyntax("{"+a+","+b+"} is invalid");
34 | this.a = a;
35 | this.b = b;
36 | this.p = p;
37 | st2 = new Multi_stage2(a,b,p);
38 | st2.parent = this;
39 | sub = st2.sub;
40 | }
41 | public String toString() {
42 | st2.matchFewest = matchFewest;
43 | return st2.toString();
44 | }
45 | public int matchInternal(int pos,Pthings pt) {
46 | try {
47 | st2 = new Multi_stage2(a,b,p);
48 | } catch(RegSyntax r__) {}
49 | st2.matchFewest = matchFewest;
50 | st2.parent = this;
51 | return st2.matchInternal(pos,pt);
52 | }
53 | public Pattern clone1(Hashtable h) {
54 | try {
55 | Multi m = new Multi(a,b,((Pattern)p).clone(h));
56 | m.matchFewest = matchFewest;
57 | return m;
58 | } catch(RegSyntax rs) {
59 | return null;
60 | }
61 | }
62 | };
63 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Validator.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 |
10 | /** This class makes it easy to create your own patterns
11 | and integrate them into Regex. For more detail, see the
12 | example file deriv2.java or
13 | deriv3.java. */
14 |
15 | public class Validator {
16 | String argsave = null;
17 | String pattern = ".";
18 | /**
19 | This method does extra checking on a matched section of
20 | a String beginning at position start and ending at end.
21 | The idea is that you can do extra checking with this
22 | that you don't know how to do with a standard Regex.
23 |
24 | If this method is successful, it returns the location
25 | of the end of this pattern element -- that may be the
26 | value end provided or some other value. A negative
27 | value signifies that a match failure.
28 |
29 | By default, this method just returns end and thus
30 | does nothing.
31 | @see com.stevesoft.pat.Regex#define(java.lang.String,java.lang.String,com.stevesoft.pat.Validator)
32 | */
33 | public int validate(StringLike src,int start,int end) {
34 | return end;
35 | }
36 | /* This method allows you to modify the behavior of this
37 | validator by making a new Validator object. If a Validator
38 | named "foo" is defined, then the pattern "{??foo:bar}" will
39 | cause Regex to first get the Validator given to Regex.define
40 | and then to call its arg method with the string "bar".
41 | If this method returns a null (the default) you get the same
42 | behavior as the pattern "{??foo}" would supply. */
43 | public Validator arg(String s) { return null; }
44 |
45 | /** For optimization it is helpful, but not necessary, that
46 | you define the minimum number of characters this validator
47 | will allow to match. To do this
48 | return new patInt(number) where number is the smallest
49 | number of characters that can match. */
50 | public patInt minChars() { return new patInt(0); }
51 |
52 | /** For optimization it is helpful, but not necessary, that
53 | you define the maximum number of characters this validator
54 | will allow to match. To do this either
55 | return new patInt(number), or new patInf() if an infinite
56 | number of characters may match. */
57 | public patInt maxChars() { return new patInf(); }
58 | }
59 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/PartialBuffer.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 |
10 | import java.io.*;
11 |
12 | /** This class allows you to match on a partial string.
13 | If the allowOverRun flag is true, then the
14 | length() method returns a number 1 larger than
15 | is actually contained by the class.
16 |
17 | If one attempts to access the last character as
18 | follows:
19 |
20 | StringBuffer sb = ...;
21 | ...
22 | PartialBuffer pb = new PartialBuffer(sb);
23 | char c = pb.charAt(pb.length()-1);
24 |
25 | then two things happen. First, a zero is returned
26 | into the variable c. Second, the overRun flag is
27 | set to "true." Accessing data beyond the end of
28 | the buffer is considered an "overRun" of the data.
29 |
30 | This can be helpful in determining whether more
31 | characters are required for a match to occur, as
32 | the pseudo-code below illustrates.
33 |
34 | int i = ...;
35 | Regex r = new Regex("some pattern");
36 | pb.allowOverRun = true;
37 | pb.overRun = true;
38 | boolean result = r.matchAt(pb,i);
39 | if(pb.overRun) {
40 | // The result of the match is not relevant, regardless
41 | // of whether result is true or false. We need to
42 | // append more data to the buffer and try again.
43 | ....
44 | sb.append(more data);
45 | }
46 |
47 | */
48 | class PartialBuffer implements StringLike {
49 | int off;
50 | public boolean allowOverRun = true;
51 | public boolean overRun = false;
52 | StringBuffer sb;
53 | PartialBuffer(StringBuffer sb) {
54 | this.sb = sb;
55 | }
56 | public char charAt(int n) {
57 | n += off;
58 | if(n == sb.length()) {
59 | overRun = true;
60 | return 0;
61 | }
62 | return sb.charAt(n);
63 | }
64 | public int length() {
65 | return allowOverRun ? sb.length()+1 : sb.length();
66 | }
67 | public int indexOf(char c) {
68 | for(int i=0;i0)
33 | sb.append( ((Pattern)v.elementAt(0)).toString() );
34 | for(i=1;i= 0)
51 | return r;
52 | }
53 | return -1;
54 | }
55 | public patInt minChars() {
56 | if(v.size()==0) return new patInt(0);
57 | patInt m = ((Pattern)v.elementAt(0)).countMinChars();
58 | for(int i=1;i v.size()) imax = v.size();
50 | for(i=imin;i d.width ? xs : d.width;
69 | }
70 | ys += fm.getAscent();
71 | Dimension d = new Dimension(xs,ys);
72 | return d;
73 | }
74 | final void
75 | drawColorLine(Graphics g,FontMetrics fm,ColorLine ln,int yi) {
76 | int i;
77 | int x = x_margin;
78 | int y = fm.getAscent()+yi*fm.getHeight()+y_margin;
79 | for(i=0;i i) i = p.i;
80 | return this;
81 | }
82 | /** Tests to see if this represents an infinite quantity. */
83 | public boolean finite() { return !inf; }
84 | /** Converts to a patInt to an int. Infinity is
85 | mapped Integer.MAX_VALUE;
86 | */
87 | public int intValue() { return inf ? Integer.MAX_VALUE : i; }
88 | };
89 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Multi_stage2.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | /** If Multi were not split into a second stage, then
12 | a nested Multi would try to re-use the same count
13 | variable and the whole thing would break. */
14 | class Multi_stage2 extends PatternSub {
15 | Pattern nextRet;
16 | patInt count;
17 | patInt matchMin,matchMax;
18 | public boolean matchFewest = false;
19 | public String toString() {
20 | String ret = "";
21 | ret += sub.toString();
22 | ret += "{"+matchMin+","+matchMax+"}";
23 | if(matchFewest) ret += "?";
24 | ret += parent.nextString();
25 | return ret;
26 | }
27 | Multi_stage2(patInt a,patInt b,Pattern p) throws RegSyntax {
28 | if(p == null) RegSyntaxError.endItAll(
29 | "Multiple match of Null pattern requested.");
30 | sub = p;
31 | nextRet = this;
32 | sub.setParent(this);
33 | matchMin = a;
34 | matchMax = b;
35 | count = new patInt(0);
36 | // we must have b > a > -1 for this
37 | // to make sense.
38 | if(!a.lessEq(b))
39 | //throw new BadMultiArgs();
40 | RegSyntaxError.endItAll("Bad Multi Args: "+a+">"+b);
41 | patInt i = new patInt(-1);
42 | if(a.lessEq(i))
43 | //throw new BadMultiArgs();
44 | RegSyntaxError.endItAll("Bad Multi Args: "+a+"< 0");
45 | }
46 | public Pattern getNext() {
47 | return nextRet;
48 | }
49 | int pos_old = -1;
50 | public int matchInternal(int pos,Pthings pt) {
51 | sub.setParent(this);
52 |
53 | int canUse = -1;
54 |
55 | // check for some forms of infinite recursion...
56 | if(pos_old >= 0 && pos == pos_old) {
57 | return -1;
58 | }
59 | pos_old = pos;
60 |
61 | if(matchMin.lessEq(count))
62 | canUse = pos;
63 | if(!count.lessEq(matchMax) || pos > pt.src.length())
64 | return -1;
65 |
66 | if((matchFewest||count.equals(matchMax)) && canUse >= 0) {
67 | Pattern n = super.getNext();
68 | if(n == null)
69 | return canUse;
70 | int ret = testMatch(n,pos,pt);
71 | if(ret >= 0) {
72 | return ret;
73 | }
74 | else canUse = -1;
75 | }
76 |
77 | count.inc();
78 | try {
79 | if(count.lessEq(matchMax)) {
80 | int r = testMatch(sub,pos,pt);
81 | if(r >= 0)
82 | return r;
83 | }
84 | } finally { count.dec(); }
85 |
86 | if(!matchFewest && canUse >= 0) {
87 | Pattern n = super.getNext();
88 | if(n == null)
89 | return canUse;
90 | int ret = testMatch(n,pos,pt);
91 | return ret;
92 | } else return canUse;
93 | }
94 | public Pattern clone1(Hashtable h) {
95 | try {
96 | Multi_stage2 m = new Multi_stage2(matchMin,matchMax,sub.clone(h));
97 | m.matchFewest = matchFewest;
98 | return m;
99 | } catch(RegSyntax rs) {
100 | return null;
101 | }
102 | }
103 | };
104 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/wrap/RandomAccessFileWrap.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat.wrap;
9 |
10 | import com.stevesoft.pat.*;
11 | import java.io.*;
12 |
13 | /** Provides a wrapper for a RandomAccessFile so that it
14 | can be searched by Regex. */
15 | public class RandomAccessFileWrap implements StringLike {
16 |
17 | long offset = 0;
18 | public void setOffset(long o) {
19 | offset = o;
20 | i0 = iend = 0;
21 | }
22 | public long getOffset() {
23 | return offset;
24 | }
25 | RandomAccessFile raf;
26 | int i0=0,iend=0;
27 | byte[] buf = new byte[1024];
28 |
29 | public int getBufferSize() {
30 | return buf.length;
31 | }
32 |
33 | public void setBufferSize(int bs) {
34 | buf = new byte[bs];
35 | i0 = iend = 0;
36 | }
37 |
38 | public RandomAccessFileWrap(String file) throws IOException {
39 | this.raf = new RandomAccessFile(file,"r");
40 | }
41 | public RandomAccessFileWrap(RandomAccessFile raf) {
42 | this.raf = raf;
43 | }
44 |
45 | public char charAt(int i) {
46 | if(i >= i0 && i < iend)
47 | return (char)buf[i-i0];
48 |
49 | try {
50 | i0 = i-5;
51 | //if(i0+offset<0) i0=(int)(-offset);
52 | if(i0<0) i0=0;
53 | raf.seek(i0+offset);
54 | iend = i0+raf.read(buf,0,buf.length);
55 |
56 | if(i >= i0 && i < iend)
57 | return (char)buf[i-i0];
58 | } catch(Throwable t) {}
59 |
60 | throw new ArrayIndexOutOfBoundsException("Out of bounds for file:"+
61 | " i="+i+
62 | ", Final Buffer: i0="+i0+
63 | " iend="+iend);
64 | }
65 |
66 | public String toString() { throw new Error("Not implemented"); }
67 | public int length() {
68 | try {
69 | long len = raf.length()-offset;
70 | if(len > Integer.MAX_VALUE)
71 | return Integer.MAX_VALUE;
72 | return (int)len;
73 | } catch(IOException ioe) {
74 | return 0;
75 | }
76 | }
77 | public String substring(int i1,int i2) {
78 | StringBuffer sb = new StringBuffer();
79 | for(int i=i1;i");
81 | ln.add(Color.black,r.left());
82 | ln.add(darkgreen,"|");
83 | ln.add(darkred,r.substring());
84 | ln.add(darkgreen,"|");
85 | ln.add(Color.black,r.right());
86 | ln.add(darkgreen,"<==");
87 | ctxt.addColorLine(ln);
88 | ctxt.addColorLine(new ColorLine());
89 | if(r.numSubs() > 0) {
90 | ln = new ColorLine();
91 | ln.add(darkblue,"Backreferences:");
92 | ctxt.addColorLine(ln);
93 | }
94 | int i;
95 | for(i=1;i<=r.numSubs();i++) {
96 | ln = new ColorLine();
97 | ln.add(darkblue,"("+i+") : ");
98 | if(r.left(i)==null) ln.add(darkblue,"[null]");
99 | else {
100 | ln.add(Color.black,r.left(i));
101 | ln.add(darkgreen,"|");
102 | ln.add(darkred,r.substring(i));
103 | ln.add(darkgreen,"|");
104 | ln.add(Color.black,r.right(i));
105 | }
106 | ctxt.addColorLine(ln);
107 | }
108 | ctxt.repaint();
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/FastMulti.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | /** A special case of Multi, implemented when minChars().equals(maxChars()),
12 | * and some other conditions spelled out in RegOpt.safe4fm "Safe for
13 | * FastMulti." It avoids stack growth problems as well as being slightly
14 | * faster.
15 | */
16 | class FastMulti extends PatternSub {
17 | patInt fewestMatches,mostMatches;
18 | public patInt minChars() {
19 | return sub.countMinChars().mul(fewestMatches);
20 | }
21 | public patInt maxChars() {
22 | return sub.countMaxChars().mul(mostMatches);
23 | }
24 | public boolean matchFewest = false;
25 |
26 | FastMulti(patInt a,patInt b,Pattern p) throws RegSyntax {
27 | if(!a.lessEq(b))
28 | throw new RegSyntax("{"+a+","+b+"} is invalid");
29 | if(p == null) RegSyntaxError.endItAll("Null length pattern "+
30 | "followed by *, +, or other Multi.");
31 | fewestMatches = a;
32 | mostMatches = b;
33 | sub = p;
34 | step = p.countMinChars().intValue();
35 | sub.setParent(null);
36 | }
37 | public String toString() {
38 | return sub.toString()+"{"
39 | +fewestMatches+","+mostMatches+"}"+
40 | (matchFewest ? "?" : "")+"(?# <= fast multi)"+
41 | nextString();
42 | }
43 | int step = -1;
44 | public int matchInternal(int pos,Pthings pt) {
45 | int m=-1;
46 | int i=pos;
47 | int endstr = pt.src.length()-step;
48 | patInt matches = new patInt(0);
49 | if(matchFewest) {
50 | if(fewestMatches.lessEq(matches)) {
51 | int ii = nextMatch(i,pt);
52 | if(ii >= 0) return ii;
53 | }
54 | while(i >= 0 && i <= endstr) {
55 | i=sub.matchInternal(i,pt);
56 | if(i >= 0) {
57 | matches.inc();
58 | if(fewestMatches.lessEq(matches)) {
59 | int ii = nextMatch(i,pt);
60 | if(ii >= 0) return ii;
61 | }
62 | if(matches.equals(mostMatches))
63 | return -1;
64 | }
65 | }
66 | return -1;
67 | }
68 | int nMatches = 0;
69 | while(fewestMatches.intValue() > nMatches) {
70 | i=sub.matchInternal(i,pt);
71 | if(i >= 0)
72 | nMatches++;
73 | else
74 | return -1;
75 | }
76 | m=i;
77 | if(mostMatches.finite()) {
78 | while(nMatches < mostMatches.intValue()) {
79 | i = sub.matchInternal(i,pt);
80 | if(i>=0) {
81 | m=i;
82 | nMatches++;
83 | } else break;
84 | }
85 | } else {
86 | while(true) {
87 | i = sub.matchInternal(i,pt);
88 | if(i>=0) {
89 | m=i;
90 | nMatches++;
91 | } else break;
92 | }
93 | }
94 | while(m >= pos) {
95 | int r=nextMatch(m,pt);
96 | if(r >= 0) return r;
97 | m -= step;
98 | nMatches--;
99 | if(nMatches < fewestMatches.intValue())
100 | return -1;
101 | }
102 | return -1;
103 | }
104 | public Pattern clone1(Hashtable h) {
105 | try {
106 | FastMulti fm = new FastMulti(fewestMatches,mostMatches,sub.clone(h));
107 | fm.matchFewest = matchFewest;
108 | return fm;
109 | } catch(RegSyntax rs) {
110 | return null;
111 | }
112 | }
113 | }
114 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/StrPos.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | /**
10 | Shareware: package pat
11 | Copyright 2001, Steven R. Brandt
12 | */ /**
13 | StrPos is used internally by regex to parse the regular expression. */
14 | public class StrPos {
15 | String s;
16 | int pos;
17 | /** Return the position in the string pointed to */
18 | public int pos() { return pos; }
19 |
20 | /** This contains the escape character, which is \ by default. */
21 | public char esc=Pattern.ESC;
22 | char c;
23 | /** Returns the current, possibly escaped, character. */
24 | public char thisChar() { return c; }
25 |
26 | boolean dontMatch,eos;
27 |
28 | /** tell whether we are at end of string */
29 | public boolean eos() { return eos; }
30 | /** initialize a StrPos from another StrPos. */
31 | public StrPos(StrPos sp) {
32 | dup(sp);
33 | }
34 | /** copy a StrPos from sp to this. */
35 | public void dup(StrPos sp) {
36 | s = sp.s;
37 | pos = sp.pos;
38 | c = sp.c;
39 | dontMatch = sp.dontMatch;
40 | eos = sp.eos;
41 | }
42 | /** Initialize a StrPos by giving it a String, and a
43 | position within the String. */
44 | public StrPos(String s,int pos) {
45 | this.s=s;
46 | this.pos=pos-1;
47 | inc();
48 | }
49 | /** Advance the place where StrPos points within the String.
50 | Counts a backslash as part of the next character. */
51 | public StrPos inc() {
52 | pos++;
53 | if(pos >= s.length()) {
54 | eos = true;
55 | return this;
56 | }
57 | eos = false;
58 | c = s.charAt(pos);
59 | if(c == esc && pos+1st that matches a non-escaped
88 | character. */
89 | public boolean incMatch(String st) {
90 | StrPos sp = new StrPos(this);
91 | int i;
92 | for(i=0;i= '0' && sp.c <= '9';i++) {
108 | cnt = 10*cnt+sp.c-'0';
109 | sp.inc();
110 | }
111 | if(i==0) return null;
112 | dup(sp);
113 | return new patInt(cnt);
114 | }
115 | /** get the string that we are processing. */
116 | public String getString() { return s; }
117 | };
118 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/DotMulti.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.Hashtable;
10 |
11 | /** A special optimization of multi that is used when the
12 | * common subpattern ".*" is encountered.
13 | */
14 | class DotMulti extends PatternSub {
15 | patInt fewestMatches,mostMatches;
16 | public patInt minChars() {
17 | return fewestMatches;
18 | }
19 | public patInt maxChars() {
20 | return mostMatches;
21 | }
22 | public boolean matchFewest = false;
23 |
24 | StringLike src=null;
25 | int srclength=0;
26 | boolean dotDoesntMatchCR=true;
27 | DotMulti(patInt a,patInt b) {
28 | fewestMatches = a;
29 | mostMatches = b;
30 | }
31 | public String toString() {
32 | return ".{"
33 | +fewestMatches+","+mostMatches+"}"+
34 | (matchFewest ? "?" : "")+"(?# <= dot multi)"+
35 | nextString();
36 | }
37 | final int submatchInternal(int pos,Pthings pt) {
38 | if(pos < srclength) {
39 | if(dotDoesntMatchCR) {
40 | if(src.charAt(pos) != '\n')
41 | return 1+pos;
42 | } else return 1+pos;
43 | }
44 | return -1;
45 | }
46 | final static int step = 1;
47 | static int idcount = 1;
48 | public int matchInternal(int pos,Pthings pt) {
49 | int m=-1;
50 | int i=pos;
51 | src = pt.src;
52 | srclength = src.length();
53 | dotDoesntMatchCR = pt.dotDoesntMatchCR;
54 | if(matchFewest) {
55 | int nMatches = 0;
56 | while(fewestMatches.intValue() > nMatches) {
57 | i=submatchInternal(i,pt);
58 | if(i<0) return -1;
59 | nMatches++;
60 | }
61 | if(i<0) return -1;
62 | int ii = nextMatch(i,pt);
63 | if(ii >= 0) return ii;
64 | if(!mostMatches.finite()) {
65 | while(i >= 0) {
66 | i = submatchInternal(i,pt);
67 | if(i < 0) return -1;
68 | ii = nextMatch(i,pt);
69 | if(ii >= 0) return ii;
70 | }
71 | } else {
72 | while(i > 0) {
73 | i = submatchInternal(i,pt);
74 | if(i < 0) return -1;
75 | nMatches++;
76 | if(nMatches > mostMatches.intValue())
77 | return -1;
78 | ii = nextMatch(i,pt);
79 | if(ii >= 0) return ii;
80 | }
81 | }
82 | return -1;
83 | }
84 | int nMatches = 0;
85 | while(fewestMatches.intValue() > nMatches) {
86 | i=submatchInternal(i,pt);
87 | if(i >= 0)
88 | nMatches++;
89 | else
90 | return -1;
91 | }
92 | m=i;
93 | if(mostMatches.finite()) {
94 | while(nMatches < mostMatches.intValue()) {
95 | i = submatchInternal(i,pt);
96 | if(i>=0) {
97 | m=i;
98 | nMatches++;
99 | } else break;
100 | }
101 | } else {
102 | while(true) {
103 | i = submatchInternal(i,pt);
104 | if(i>=0) {
105 | m=i;
106 | nMatches++;
107 | } else break;
108 | }
109 | }
110 | while(m >= pos) {
111 | int r=nextMatch(m,pt);
112 | if(r >= 0) return r;
113 | m -= step;
114 | nMatches--;
115 | if(nMatches < fewestMatches.intValue())
116 | return -1;
117 | }
118 | return -1;
119 | }
120 | Pattern clone1(Hashtable h) {
121 | DotMulti dm = new DotMulti(fewestMatches,mostMatches);
122 | dm.matchFewest = matchFewest;
123 | return dm;
124 | }
125 | }
126 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/RegexTokenizer.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 | import java.util.*;
10 | /**
11 | Shareware: package pat
12 | Copyright 2001, Steven R. Brandt
13 | */ /**
14 | The RegexTokenizer is similar to the StringTokenizer class
15 | provided with java, but allows one to tokenize using
16 | regular expressions, rather than a simple list of characters.
17 | Tokens are any strings between the supplied regular expression,
18 | as well as any backreferences (things in parenthesis)
19 | contained within the regular expression. */
20 | public class RegexTokenizer implements Enumeration {
21 | String toParse;
22 | Regex r;
23 | int count = 0;
24 | Vector v = new Vector();
25 | Vector vi = new Vector();
26 | int pos=0;
27 |
28 | int offset = 1;
29 | void getMore() {
30 | String s = r.right();
31 | if(r.searchFrom(toParse,pos)) {
32 | v.addElement(r.left().substring(pos));
33 | vi.addElement(new Integer(r.matchFrom()+
34 | r.charsMatched()));
35 | for(int i=0;i= v.size()) getMore();
65 | return v.elementAt(count++);
66 | }
67 | /** This is the equivalent (String)nextElement(). */
68 | public String nextToken() { return (String)nextElement(); }
69 | /** This asks for the next token, and changes the pattern
70 | being used at the same time. */
71 | public String nextToken(String newpat) {
72 | try { r.compile(newpat); } catch (RegSyntax r_) {}
73 | return nextToken(r);
74 | }
75 | /** This asks for the next token, and changes the pattern
76 | being used at the same time. */
77 | public String nextToken(Regex nr) {
78 | r = nr;
79 | if(vi.size() > count) {
80 | pos = ((Integer)vi.elementAt(count)).intValue();
81 | v.setSize(count);
82 | vi.setSize(count);
83 | }
84 | getMore();
85 | return nextToken();
86 | }
87 | /** Tells whether there are more tokens in the pattern. */
88 | public boolean hasMoreElements() {
89 | if(count >= v.size()) getMore();
90 | return count < v.size();
91 | }
92 | /** Tells whether there are more tokens in the pattern, but
93 | in the fashion of StringTokenizer. */
94 | public boolean hasMoreTokens() { return hasMoreElements(); }
95 | /** Determines the # of remaining tokens */
96 | public int countTokens() {
97 | int old_pos=pos,_count=count;
98 | while(hasMoreTokens())
99 | nextToken();
100 | count=_count;
101 | return v.size()-count;
102 | }
103 | /** Returns all tokens in the String */
104 | public String[] allTokens() {
105 | countTokens();
106 | String[] ret = new String[v.size()];
107 | v.copyInto(ret);
108 | return ret;
109 | }
110 | };
111 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Skip.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 |
10 | /** This class is used internally to search ahead for some
11 | optimized Regex objects. It searches within a String
12 | for occrences of a given String -- like a more flexible
13 | version of String.indexOf.
14 | @see com.stevesoft.pat.Skip2
15 | @see com.stevesoft.pat.SkipBMH
16 | */
17 | public class Skip {
18 | static int mkmask(int c) {
19 | char x = (char)c;
20 | return ~( CaseMgr.toUpperCase(x) |
21 | CaseMgr.toLowerCase(x) |
22 | CaseMgr.toTitleCase(x));
23 | }
24 | static { int x = Regex.BackRefOffset; }
25 | String src;
26 | int c,mask;
27 | int offset;
28 | boolean ign,m1;
29 | /** Examine a Regex to determine what String it will
30 | attempt to skip to when searching for patterns.
31 | Return -1 if we aren't doing this. */
32 | public static String string(Regex r) {
33 | return r.skipper == null ? null : r.skipper.src;
34 | }
35 | /** Determine the offset of the String within the pattern
36 | that we are skipping to. Return -1 if we aren't doing
37 | this. */
38 | public static int offset(Regex r) {
39 | return r.skipper == null ? -1 : r.skipper.offset;
40 | }
41 | /** Initialize, give it a String to search for, tell it
42 | whether or not to ignoreCase, and what the offset is
43 | of the String within the String to be searched. */
44 | public Skip(String s,boolean ign,int o) {
45 | src = s;
46 | c = s.charAt(0);
47 | if(ign) {
48 | mask = mkmask(c);
49 | } else mask = 0;
50 | offset = o;
51 | this.ign = ign;
52 | m1 = (s.length()==1);
53 | }
54 | /** The same as find(s,0,s.length()) */
55 | public final int find(StringLike s) {
56 | return find(s,0,s.length());
57 | }
58 | static final int min(int a,int b) { return a end) return -1;
63 | start += offset;
64 | int vend = min(s.length()-1,end+offset);
65 | if(mask != c) {
66 | for(int i=start;i<=vend;i++)
67 | if(0 == (s.charAt(i) & mask))
68 | //if(m1||s.regionMatches(ign,i,src,0,src.length()) )
69 | if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) )
70 | return i-offset;
71 | } else {
72 | for(int i=start;i<=vend;i++)
73 | if(c == s.charAt(i))
74 | //if(m1||s.regionMatches(ign,i,src,0,src.length()) )
75 | if(m1||CaseMgr.regionMatches(s,ign,i,src,0,src.length()) )
76 | return i-offset;
77 | }
78 | return -1;
79 | }
80 | static Skip findSkip(Regex r) {
81 | return findSkip(r.thePattern,r.ignoreCase,!r.dontMatchInQuotes);
82 | }
83 | // look for things that can be skipped
84 | static Skip findSkip(Pattern p,boolean ignoreCase,boolean trnc) {
85 | StringBuffer sb = new StringBuffer();
86 | Skip subsk = null;
87 | int offset = 0;
88 | int skipc = -1,skipoff=0;
89 | for(;p != null;p = p.next) {
90 | if(p instanceof oneChar) {
91 | skipc = ((oneChar)p).c;
92 | skipoff = offset;
93 | }
94 | if(p instanceof oneChar && p.next instanceof oneChar) {
95 | Pattern psav = p;
96 | sb.append(((oneChar)p).c);
97 | while(p.next instanceof oneChar) {
98 | sb.append(((oneChar)p.next).c);
99 | p = p.next;
100 | }
101 | String st = sb.toString();
102 | char c0 = st.charAt(0), c1 = st.charAt(1);
103 | Skip sk=null;
104 | if(st.length()>2)
105 | sk = new SkipBMH(st,ignoreCase,offset);
106 | else
107 | sk = new Skip2(st,ignoreCase,offset);
108 | if(trnc && st.length()>2) { // chop out a whole string...
109 | psav.next = new Skipped(st.substring(1));
110 | psav.next.next = p.next;
111 | psav.next.parent = p.parent;
112 | }
113 | return sk;
114 | } else if(p instanceof Or && ((Or)p).v.size()==1
115 | && !((Or)p).leftForm().equals("(?!")
116 | && null != (subsk=
117 | findSkip( (Pattern)((Or)p).v.elementAt(0),ignoreCase,trnc) )) {
118 | subsk.offset += offset;
119 | return subsk;
120 | } else if(p.minChars().equals(p.maxChars())) {
121 | offset += p.minChars().intValue();
122 | } else return skipc < 0 ? null :
123 | new Skip(""+(char)skipc,ignoreCase,skipoff);
124 | }
125 | return null;
126 | }
127 | }
128 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Ctrl.java:
--------------------------------------------------------------------------------
1 | package//
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | com.stevesoft.pat;
9 |
10 | public class Ctrl {
11 | public final static char[] cmap = {
12 | (char)64,
13 | (char)65,
14 | (char)66,
15 | (char)67,
16 | (char)68,
17 | (char)69,
18 | (char)70,
19 | (char)71,
20 | (char)72,
21 | (char)73,
22 | (char)74,
23 | (char)75,
24 | (char)76,
25 | (char)77,
26 | (char)78,
27 | (char)79,
28 | (char)80,
29 | (char)81,
30 | (char)82,
31 | (char)83,
32 | (char)84,
33 | (char)85,
34 | (char)86,
35 | (char)87,
36 | (char)88,
37 | (char)89,
38 | (char)90,
39 | (char)91,
40 | (char)92,
41 | (char)93,
42 | (char)94,
43 | (char)95,
44 | (char)96,
45 | (char)97,
46 | (char)97,
47 | (char)99,
48 | (char)100,
49 | (char)101,
50 | (char)102,
51 | (char)103,
52 | (char)104,
53 | (char)105,
54 | (char)106,
55 | (char)107,
56 | (char)108,
57 | (char)109,
58 | (char)110,
59 | (char)111,
60 | (char)112,
61 | (char)113,
62 | (char)114,
63 | (char)115,
64 | (char)116,
65 | (char)117,
66 | (char)118,
67 | (char)119,
68 | (char)120,
69 | (char)121,
70 | (char)122,
71 | (char)123,
72 | (char)124,
73 | (char)125,
74 | (char)126,
75 | (char)127,
76 | (char)0,
77 | (char)1,
78 | (char)2,
79 | (char)3,
80 | (char)4,
81 | (char)5,
82 | (char)6,
83 | (char)7,
84 | (char)8,
85 | (char)9,
86 | (char)10,
87 | (char)11,
88 | (char)12,
89 | (char)13,
90 | (char)14,
91 | (char)15,
92 | (char)16,
93 | (char)17,
94 | (char)18,
95 | (char)19,
96 | (char)20,
97 | (char)21,
98 | (char)22,
99 | (char)23,
100 | (char)24,
101 | (char)25,
102 | (char)26,
103 | (char)27,
104 | (char)27,
105 | (char)29,
106 | (char)30,
107 | (char)31,
108 | (char)32,
109 | (char)1,
110 | (char)2,
111 | (char)3,
112 | (char)4,
113 | (char)5,
114 | (char)6,
115 | (char)7,
116 | (char)8,
117 | (char)9,
118 | (char)10,
119 | (char)11,
120 | (char)12,
121 | (char)13,
122 | (char)14,
123 | (char)15,
124 | (char)16,
125 | (char)17,
126 | (char)18,
127 | (char)19,
128 | (char)20,
129 | (char)21,
130 | (char)22,
131 | (char)23,
132 | (char)24,
133 | (char)25,
134 | (char)26,
135 | (char)59,
136 | (char)60,
137 | (char)61,
138 | (char)62,
139 | (char)63,
140 | (char)192,
141 | (char)193,
142 | (char)194,
143 | (char)195,
144 | (char)196,
145 | (char)197,
146 | (char)198,
147 | (char)199,
148 | (char)200,
149 | (char)201,
150 | (char)202,
151 | (char)203,
152 | (char)204,
153 | (char)205,
154 | (char)206,
155 | (char)207,
156 | (char)208,
157 | (char)209,
158 | (char)210,
159 | (char)211,
160 | (char)212,
161 | (char)213,
162 | (char)214,
163 | (char)215,
164 | (char)216,
165 | (char)217,
166 | (char)218,
167 | (char)219,
168 | (char)220,
169 | (char)221,
170 | (char)222,
171 | (char)223,
172 | (char)224,
173 | (char)225,
174 | (char)226,
175 | (char)227,
176 | (char)228,
177 | (char)229,
178 | (char)230,
179 | (char)231,
180 | (char)232,
181 | (char)233,
182 | (char)234,
183 | (char)235,
184 | (char)236,
185 | (char)237,
186 | (char)238,
187 | (char)239,
188 | (char)240,
189 | (char)241,
190 | (char)242,
191 | (char)243,
192 | (char)244,
193 | (char)245,
194 | (char)246,
195 | (char)247,
196 | (char)248,
197 | (char)249,
198 | (char)250,
199 | (char)251,
200 | (char)252,
201 | (char)253,
202 | (char)254,
203 | (char)255,
204 | (char)128,
205 | (char)129,
206 | (char)130,
207 | (char)131,
208 | (char)132,
209 | (char)133,
210 | (char)134,
211 | (char)135,
212 | (char)136,
213 | (char)137,
214 | (char)138,
215 | (char)139,
216 | (char)140,
217 | (char)141,
218 | (char)142,
219 | (char)143,
220 | (char)144,
221 | (char)145,
222 | (char)146,
223 | (char)147,
224 | (char)148,
225 | (char)149,
226 | (char)150,
227 | (char)151,
228 | (char)152,
229 | (char)153,
230 | (char)154,
231 | (char)155,
232 | (char)156,
233 | (char)157,
234 | (char)158,
235 | (char)159,
236 | (char)160,
237 | (char)161,
238 | (char)162,
239 | (char)163,
240 | (char)164,
241 | (char)165,
242 | (char)166,
243 | (char)167,
244 | (char)168,
245 | (char)169,
246 | (char)170,
247 | (char)171,
248 | (char)172,
249 | (char)173,
250 | (char)174,
251 | (char)175,
252 | (char)176,
253 | (char)177,
254 | (char)178,
255 | (char)179,
256 | (char)180,
257 | (char)181,
258 | (char)182,
259 | (char)183,
260 | (char)184,
261 | (char)185,
262 | (char)186,
263 | (char)187,
264 | (char)188,
265 | (char)189,
266 | (char)190,
267 | (char)191
268 | };
269 | }
270 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/Transformer.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 |
10 | import java.util.Vector;
11 | import com.stevesoft.pat.wrap.StringWrap;
12 |
13 | /** Replacement rule used by the Transformer.
14 | @see com.stevesoft.pat.Transformer
15 | */
16 | class TransRepRule extends ReplaceRule {
17 | Transformer t;
18 | TransRepRule(Transformer t) { this.t = t; }
19 | public String toString1() { return ""; }
20 | public Object clone1() { return new TransRepRule(t); }
21 | public void apply(StringBufferLike sb,RegRes rr) {
22 | // get the ReplaceRule of the Regex that matched.
23 | next = t.tp.ra[t.tp.pn].getReplaceRule();
24 | }
25 | }
26 |
27 | /** Sometimes you want to replace a whole bunch of things
28 | that might occur within a single line of text. One efficient
29 | way to do this, both in terms of performance and programming
30 | ease, is with Transformer. The Transformer contains an array
31 | of Regex's and uses the Regex that matches earliest within the
32 | text to do the replacing, if two Regex's match at the same
33 | time it uses the one put in the Transformer first.
34 |
35 | This feature can be used to prevent transformations from
36 | occurring in certain regions. For example, if I add the rule
37 | s'//.*'$&' and then add the
38 | rule s/hello/goodbye/ the Transformer will replace "hello"
39 | with "goodbye" except when it occurs inside a double-slash
40 | style of comment. The transformation on the comment goes first,
41 | does nothing, and precludes transformation on the same region
42 | of text as the s/hello/goodbye/ rule.
43 |
44 | So far, at least, this class does not have the capability of
45 | turning into a giant robot :-)
46 | */
47 | public class Transformer {
48 | TransPat tp;
49 | Regex rp = new Regex();
50 | boolean auto_optimize;
51 |
52 | /** Get a replacer to that works with the current Regex.
53 | @see com.stevesoft.pat.Replacer
54 | */
55 | public Replacer getReplacer() { return rp.getReplacer(); }
56 |
57 | /** Instantiate a new Transformer object. */
58 | public Transformer(boolean auto) {
59 | auto_optimize = auto;
60 | tp = new TransPat();
61 | rp.setReplaceRule(new TransRepRule(this));
62 | rp.thePattern = tp;
63 | }
64 |
65 | /** Add a new Regex to the set of Regex's. */
66 | public void add(Regex r) {
67 | if(auto_optimize) r.optimize();
68 | tp.ra[tp.ra_len++] = r;
69 | if(tp.ra.length==tp.ra_len) {
70 | Regex[] ra2 = new Regex[tp.ra_len+10];
71 | for(int i=0;i rp.numSubs_ ? r.numSubs_ : rp.numSubs_;
76 | }
77 |
78 | /** Returns the number of Regex's in this Transformer. */
79 | public int patterns() { return tp.ra_len; }
80 |
81 | /** Get the Regex at position i in this Transformer. */
82 | public Regex getRegexAt(int i) {
83 | if(i >= tp.ra_len)
84 | throw new ArrayIndexOutOfBoundsException("i="+i+">="+patterns());
85 | if(i < 0)
86 | throw new ArrayIndexOutOfBoundsException("i="+i+"< 0");
87 | return tp.ra[i];
88 | }
89 | /** Set the Regex at position i in this Transformer. */
90 | public void setRegexAt(Regex rx,int i) {
91 | if(i >= tp.ra_len)
92 | throw new ArrayIndexOutOfBoundsException("i="+i+">="+patterns());
93 | if(i < 0)
94 | throw new ArrayIndexOutOfBoundsException("i="+i+"< 0");
95 | tp.ra[i] = rx;
96 | }
97 |
98 | /** Add a new Regex by calling Regex.perlCode
99 | @see com.stevesoft.pat.Regex#perlCode(java.lang.String)
100 | */
101 | public void add(String rs) {
102 | Regex r = Regex.perlCode(rs);
103 | if(r == null) throw new NullPointerException("bad pattern to Regex.perlCode: "+rs);
104 | add(r);
105 | }
106 | /** Add an array of Strings (which will be converted to
107 | Regex's via the Regex.perlCode method.
108 | @see com.stevesoft.pat.Regex#perlCode(java.lang.String)
109 | */
110 | public void add(String[] array) {
111 | for(int i=0;i
19 | -i : ignore case
20 | -p : paragraph based matching
21 | -v : invert, print only lines that don't match
22 |
23 | */
24 | public class Grep {
25 |
26 | boolean iflag=false, pflag=false,vflag=false,verbose=false;
27 | Regex re=null;
28 | Vector v=new Vector();
29 | public static void main(String[] args) throws Exception {
30 | Grep g = new Grep();
31 | g.doArgs(args);
32 | }
33 |
34 | // -- BEGIN OPTIONS -- //
35 |
36 | /** The ignore case flag */
37 | public boolean getIFlag() {
38 | return iflag;
39 | }
40 | /** The ignore case flag */
41 | public void setIFlag(boolean b) {
42 | iflag = b;
43 | }
44 | /** The paragraph mode flag */
45 | public boolean getPFlag() {
46 | return pflag;
47 | }
48 | /** The paragraph mode flag */
49 | public void setPFlag(boolean b) {
50 | pflag = b;
51 | }
52 | /** If the vflag is true, then only lines not
53 | matching the supplied pattern will be printed. */
54 | public boolean getVFlag() {
55 | return vflag;
56 | }
57 | /** If the vflag is true, then only lines not
58 | matching the supplied pattern will be printed. */
59 | public void setVFlag(boolean b) {
60 | vflag = b;
61 | }
62 | /** Determine if file and line number info is written. */
63 | public boolean getVerbose() {
64 | return verbose;
65 | }
66 | /** Determine if file and line number info is written. */
67 | public void setVerbose(boolean b) {
68 | verbose = b;
69 | }
70 | /** The pattern to be searched for */
71 | public void setRegex(Regex r) {
72 | re = r;
73 | }
74 | /** The pattern to be searched for */
75 | public Regex getRegex() {
76 | return re;
77 | }
78 | int lineno=0;
79 | /** Line number info reported by verbose */
80 | void setLineno(int n) {
81 | lineno=n;
82 | }
83 | /** Line number info reported by verbose */
84 | int getLineno() {
85 | return lineno;
86 | }
87 | /** Line number info reported by verbose */
88 | void incLineno() {
89 | lineno++;
90 | }
91 | String _file = "";
92 | /** File name info reported by verbose */
93 | String getFile() { return _file; }
94 | /** File name info reported by verbose */
95 | void setFile(String s) { _file=s; }
96 |
97 | // -- END OPTIONS -- //
98 |
99 | void doArgs(String[] args) throws Exception {
100 | // Process command line arguments
101 | for(int i=0;i1)
136 | setVerbose(true);
137 |
138 | // Process files
139 | if(v.size()==0)
140 | doInputStream(System.in);
141 | for(int i=0;i= getl(v.elementAt(i))) {
68 | Pattern p2 = (Pattern)v.elementAt(i);
69 | char lo = min(getl(p),getl(p2));
70 | char hi = max(geth(p),geth(p2));
71 | nv.setElementAt(p=mkelem(lo,hi),nv.size()-1);
72 | } else {
73 | p = (Pattern)v.elementAt(i);
74 | nv.addElement(p);
75 | }
76 | }
77 |
78 | b.v = v = nv;
79 | } catch(RegSyntax e) {
80 | e.printStackTrace();
81 | }
82 |
83 | // We don't want these things to be empty.
84 | Vector negv = neg(v);
85 | if(v.size()==1) return b;
86 | if(negv.size()==1) {
87 | b.v = negv;
88 | b.neg = !b.neg;
89 | return b;
90 | }
91 |
92 | // Now consider if we can make a FastBracket.
93 | // Uses a BitSet to do a lookup.
94 | FastBracket fb = newbrack(v,b.neg);
95 | if(fb == null)
96 | fb = newbrack(negv,!b.neg);
97 | if(fb != null) {
98 | fb.parent = b.parent;
99 | fb.next = b.next;
100 | return fb;
101 | }
102 |
103 | // return the normal Bracket.
104 | return b;
105 | }
106 |
107 | // Build a FastBracket and set bits. If this can't
108 | // be done, return null.
109 | final static FastBracket newbrack(Vector v,boolean neg) {
110 | FastBracket fb = new FastBracket(neg);
111 | fb.v = v;
112 | if(v.size()==0) return null;
113 | fb.min = getl(v.elementAt(0));
114 | fb.max = geth(v.elementAt(v.size()-1));
115 | if(fb.max-fb.min <= 256) {
116 | fb.bs = new BitSet(fb.max-fb.min+1);
117 | for(int i=0;ib ? a : b;
165 | }
166 |
167 | // getl -- get lower value of Range object,
168 | // or get value of oneChar object.
169 | final static char getl(Object o) {
170 | Pattern p = (Pattern)o;
171 | if(p instanceof Range)
172 | return ((Range)p).lo;
173 | return ((oneChar)p).c;
174 | }
175 | // geth -- get higher value of Range object,
176 | // or get value of oneChar object.
177 | final static char geth(Object o) {
178 | Pattern p = (Pattern)o;
179 | if(p instanceof Range)
180 | return ((Range)p).hi;
181 | return ((oneChar)p).c;
182 | }
183 |
184 | // This is the easy part!
185 | public int matchInternal(int pos,Pthings pt) {
186 | if(pos >= pt.src.length() || Masked(pos,pt)) return -1;
187 | char c = pt.src.charAt(pos);
188 | return (neg ^ (c >= min && c <= max && bs.get(c-min)) ) ?
189 | nextMatch(pos+1,pt) : -1;
190 | }
191 | }
192 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/RegexWriter.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 |
10 | import java.io.*;
11 | import com.stevesoft.pat.wrap.*;
12 |
13 | /** A basic extension of FilterWriter that uses Transformer
14 | to make replacements in data as it is written out. It attempts
15 | to transform a string whenever the End-of-Line (EOL) character
16 | is written (which is, by default, the carriage return '\n').
17 | Only the transformed portion of the line is written out, allowing
18 | the RegexWriter to wait until a complete pattern is present before
19 | attempting to write out info. Until a pattern completes, data is
20 | stored in a StringBuffer -- which can be accessed through the
21 | length() and charAt() methods of this class.
22 |
23 | Note a subtlety here -- while a Transformer normally matches
24 | at higher priority against the pattern added to it first, this
25 | will not necessarily be true when a multi-line match is in progress
26 | because one of the complete multi-line patterns may not be completely
27 | loaded in RegexWriter's buffer. For this reason, the Transformer
28 | class is equipped with a way to add a pattern and replacement rule
29 | in three pieces -- a beginning (once this matches, nothing else in
30 | the Transformer can match until the whole pattern matches), an
31 | ending (the whole pattern is a String formed by adding the beginning
32 | and ending), and a ReplaceRule.
33 |
34 | An illustration of this is given in the this
35 | example.
36 | */
37 | public class RegexWriter extends Writer {
38 | Replacer repr;
39 | Writer w;
40 | WriterWrap ww;
41 | StringBuffer sb = new StringBuffer();
42 | PartialBuffer wrap = new PartialBuffer(sb);
43 | int pos, epos;
44 | int interval = 128;
45 | int bufferSize = 2*1024;
46 |
47 | public RegexWriter(Transformer t,Writer w) {
48 | this.w = w;
49 | ww = new WriterWrap(w);
50 | repr = t.getReplacer();
51 | repr.setBuffer(new StringBufferLike(ww));
52 | repr.setSource(wrap);
53 | }
54 | public RegexWriter(Regex r,Writer w) {
55 | this.w = w;
56 | ww = new WriterWrap(w);
57 | repr = r.getReplacer();
58 | repr.setBuffer(new StringBufferLike(ww));
59 | repr.setSource(wrap);
60 | }
61 |
62 | char EOLchar = '\n';
63 | /** This method no longer serves any purpose.
64 | @deprecated
65 | */
66 | public char getEOLchar() {
67 | return EOLchar;
68 | }
69 | /** This method no longer serves any purpose.
70 | @deprecated
71 | */
72 | public void setEOLchar(char c) {
73 | EOLchar = c;
74 | }
75 |
76 | int max_lines=2;
77 | /** This method no longer serves any purpose.
78 | @deprecated
79 | */
80 | public int getMaxLines() { return max_lines; }
81 | /** This method no longer serves any purpose.
82 | @deprecated
83 | */
84 | public void setMaxLines(int ml) { max_lines = ml; }
85 |
86 | void write() throws IOException {
87 | Regex rex = repr.getRegex();
88 | int eposOld = epos;
89 | if(rex.matchAt(wrap,epos) && !wrap.overRun) {
90 | while(pos < epos)
91 | w.write(sb.charAt(pos++));
92 | int to = rex.matchedTo();
93 | repr.setPos(to);
94 | repr.apply(rex,rex.getReplaceRule());
95 | epos = pos = to;
96 | if(epos == eposOld && epos < sb.length())
97 | epos++;
98 | } else if(!wrap.overRun && epos < sb.length()) {
99 | epos++;
100 | }
101 | while(pos < epos)
102 | w.write(sb.charAt(pos++));
103 | if(epos == sb.length()) {
104 | sb.setLength(1);
105 | pos = epos = 1;
106 | } else if(pos > bufferSize) {
107 | for(int i=bufferSize;iCopyright 2001, Steven R. Brandt
13 | */ /**
14 | Class Pattern is the base class on which all the other pattern
15 | elements are built. */
16 |
17 | public abstract class Pattern {
18 | /** The ESC character, the user can provide his own value
19 | for the escape character through regex.esc */
20 | public final static char ESC = '\\';
21 | final static String PROTECT_THESE = "[]{}(),$,-\"^.";
22 |
23 | /** The interal match function, it must be provided by any
24 | class which wishes to extend Pattern. */
25 | public abstract int matchInternal(int i,Pthings p);
26 | public abstract String toString();
27 |
28 | // Class Pattern is a singly linked list
29 | // chained together by member next. The member
30 | // parent is used so that sub patterns can access
31 | // the chain they are branching from.
32 | Pattern next=null,parent=null;
33 |
34 | /** This gets the next element of a Pattern that
35 | we wish to match. If we are at the end of a
36 | subchain of patterns, it will return us to the
37 | parent chain. */
38 | public Pattern getNext() {
39 | return next != null ? next :
40 | (parent == null ? null : parent.getNext());
41 | }
42 | /** Call this method if you have a pattern element that
43 | takes a sub pattern (such as Or), and
44 | after you have added a sub pattern to the current
45 | pattern element. */
46 | public void setParent(Pattern p) {
47 | if(next != null) next.setParent(p);
48 | else parent = p;
49 | }
50 | /** This determines if the remainder of a Pattern
51 | matches. Type "return nextMatch" from within
52 | matchInternal if the current
53 | Pattern matches. Otherwise, return a -1.*/
54 | public int nextMatch(int i,Pthings pt) {
55 | Pattern p = getNext();
56 | /*if(p == null) return i;
57 | return p.matchInternal(i,pt);*/
58 | return p==null ? i : p.matchInternal(i,pt);
59 | }
60 | /** This is a toString() for the remainder
61 | of the Pattern elements after this one.
62 | use this when overriding toString(). Called from
63 | within toString(). */
64 | public String nextString() {
65 | if(next == null) return "";
66 | return next.toString();
67 | }
68 |
69 | /** a method to detect whether char c is in String s */
70 | final static boolean inString(char c,String s) {
71 | int i;
72 | for(i=0;iCopyright 2001, Steven R. Brandt
14 | */ /**
15 | This class is used to store a result from Regex */
16 | public class RegRes implements Cloneable {
17 | protected int[] marks = null;
18 | protected boolean didMatch_ = false;
19 | protected StringLike src=null;
20 |
21 | /** Obtain the text String that was matched against. */
22 | public String getString() { return src.toString(); }
23 | /** Obtain the source StringLike object. */
24 | public StringLike getStringLike() { return src; }
25 | protected int charsMatched_=0,matchFrom_=0,numSubs_=0;
26 | public String toString() {
27 | StringBuffer sb = new StringBuffer();
28 | sb.append("match="+matchedFrom()+":"+charsMatched());
29 | if(!didMatch()) return sb.toString();
30 | for(int i=0;inumSubs_) return -1;
87 | //Integer in=(Integer)marks.get("left"+i);
88 | //return in == null ? -1 : in.intValue();
89 | return marks[i];
90 | }
91 | /** Obtains the number of characters matched by backreference i, or
92 | -1 if backreference i was not matched. */
93 | public int charsMatched(int i) {
94 | if(marks==null||i>numSubs_||!didMatch_) return -1;
95 | //Integer in = (Integer)marks.get("right"+i);
96 | //int i2 = in==null ? -1 : in.intValue();
97 | int mf = matchedFrom(i);
98 | return mf < 0 ? -1 : marks[i+numSubs_]-matchedFrom(i);
99 | }
100 | /** This is either equal to matchedFrom(i)+charsMatched(i) if the match
101 | was successful, or -1 if it was not. */
102 | public int matchedTo(int i) {
103 | if(marks==null||i>numSubs_||!didMatch_) return -1;
104 | return marks[i+numSubs_];
105 | }
106 | /** Obtains a substring matching the nth set
107 | of parenthesis from the pattern. See
108 | numSubs(void), or null if the nth backrefence did
109 | not match. */
110 | public String stringMatched(int i) {
111 | int mf = matchedFrom(i), cm = charsMatched(i);
112 | return !didMatch_ || mf<0 || cm<0 ? null :
113 | src.substring(mf,mf+cm);
114 | }
115 | /** This returns the part of the string that preceeds the match,
116 | or null if the match failed.*/
117 | public String left() {
118 | int mf = matchedFrom();
119 | return !didMatch_ || (mf<0) ? null : src.substring(0,mf);
120 | }
121 | /** This returns the part of the string that follows the ith
122 | backreference, or null if the backreference did not match. */
123 | public String left(int i) {
124 | int mf = matchedFrom(i);
125 | return !didMatch_ || (mf<0) ? null : src.substring(0,mf);
126 | }
127 | /** This returns the part of the string that follows the match,
128 | or null if the backreference did not match.*/
129 | public String right() {
130 | int mf = matchedFrom(), cm = charsMatched();
131 | return !didMatch_ || mf<0 || cm<0 ? null : src.substring(mf+
132 | cm,src.length());
133 | }
134 | /** This returns the string to the right of the ith backreference,
135 | or null if the backreference did not match. */
136 | public String right(int i) {
137 | int mf = matchedFrom(i), cm = charsMatched(i);
138 | return !didMatch_ || mf<0 || cm<0 ? null :
139 | src.substring(mf+cm,src.length());
140 | }
141 | /** After a successful match, this returns the location of
142 | the first matching character, or -1 if the match failed.*/
143 | public int matchedFrom() { return !didMatch_ ? -1 : matchFrom_; }
144 | /** After a successful match, this returns the number of
145 | characters in the match, or -1 if the match failed. */
146 | public int charsMatched() { return !didMatch_||matchFrom_<0 ? -1 : charsMatched_; }
147 | /** This is matchedFrom()+charsMatched() after a successful match,
148 | or -1 otherwise. */
149 | public int matchedTo() { return !didMatch_ ? -1 : matchFrom_+charsMatched_;}
150 | /** This returns the number of
151 | backreferences (parenthesis) in the pattern,
152 | i.e. the pattern "(ab)" has
153 | one, the pattern "(a)(b)" has two, etc. */
154 | public int numSubs() { return numSubs_; }
155 | /** Contains true if the last match was successful. */
156 | public boolean didMatch() { return didMatch_; }
157 |
158 | /** An older name for matchedFrom. */
159 | public int matchFrom() { return matchedFrom(); }
160 | /** An older name for stringMatched(). */
161 | public String substring() { return stringMatched(); }
162 | /** An older name for matchedFrom. */
163 | public int matchFrom(int i) { return matchedFrom(i); }
164 | /** An older name for stringMatched. */
165 | public String substring(int i) { return stringMatched(i); }
166 | }
167 |
--------------------------------------------------------------------------------
/com/stevesoft/pat/RegexReader.java:
--------------------------------------------------------------------------------
1 | //
2 | // This software is now distributed according to
3 | // the Lesser Gnu Public License. Please see
4 | // http://www.gnu.org/copyleft/lesser.txt for
5 | // the details.
6 | // -- Happy Computing!
7 | //
8 | package com.stevesoft.pat;
9 |
10 | import java.io.*;
11 | import com.stevesoft.pat.wrap.*;
12 |
13 | /** This class allows you to replace the text in strings
14 | as you read them in. Be careful what you do with
15 | this freedom... using Regex.perlCode("s{.*}{x}s")
16 | as your pattern will result in loading the entire
17 | contents of the Reader into memory.
18 | */
19 | public class RegexReader extends Reader {
20 | RBuffer rb = new RBuffer(new StringBuffer());
21 | PartialBuffer wrap = new PartialBuffer(rb.sb);
22 | boolean moreToRead = true;
23 | Reader r;
24 | Replacer rp;
25 |
26 | // the buffer size
27 | int nmax = 2*1024;
28 |
29 | public RegexReader(Regex rex,Reader r) {
30 | this.r = r;
31 | rp = rex.getReplacer();
32 | }
33 | public RegexReader(Transformer tex,Reader r) {
34 | this.r = r;
35 | rp = tex.getReplacer();
36 | }
37 | public void reset() throws IOException {
38 | r.reset();
39 | rb = new RBuffer(new StringBuffer());
40 | wrap = new PartialBuffer(rb.sb);
41 | moreToRead = true;
42 | }
43 | void readData() throws IOException {
44 | int c;
45 | int n = 0;
46 | while( (c = r.read()) != -1) {
47 | rb.sb.append((char)c);
48 | if(n++ > nmax)
49 | break;
50 | }
51 | if(c == -1 && n == 0) {
52 | moreToRead = false;
53 | wrap.allowOverRun = false;
54 | }
55 | }
56 | void getMoreData() throws IOException {
57 | while(rb.pos >= rb.epos) {
58 | wrap.overRun = false;
59 | if(rb.next != null) {
60 | rb = rb.next;
61 | } else if(rb.done) {
62 | break;
63 | } else if(rb.epos >= rb.sb.length()
64 | && rb.epos > nmax) {
65 | rb.pos = 1;
66 | rb.epos = 1;
67 | rb.sb.setLength(1);
68 | readData();
69 | } else if(rb.epos >= rb.sb.length()
70 | && moreToRead) {
71 | readData();
72 | } else if(rp.getRegex().matchAt(wrap,rb.epos)) {
73 | if(wrap.overRun) {
74 | readData();
75 | } else {
76 | StringBufferWrap sbw = new StringBufferWrap();
77 | StringBufferLike sbl = new StringBufferLike(sbw);
78 | /*
79 | ReplaceRule rr = rex.getReplaceRule();
80 | while(rr != null) {
81 | rr.apply(sbl,rex);
82 | rr = rr.next;
83 | }
84 | */
85 | Regex rex = rp.getRegex();
86 | int npos = rex.matchedTo();
87 | rp.setBuffer(sbl);
88 | rp.setSource(wrap);
89 | rp.setPos(npos);
90 | rp.apply(rex,rex.getReplaceRule());
91 | int opos = rb.epos;
92 | RBuffer rb2 = new RBuffer((StringBuffer)sbw.unwrap());
93 | rb2.epos = rb2.sb.length();
94 | RBuffer rb3 = new RBuffer(rb.sb);
95 |
96 | rb.next = rb2;
97 | rb2.next = rb3;
98 |
99 | if(npos == opos) {
100 | rb3.epos = npos+1;
101 | if(rb3.epos > rb3.sb.length()) {
102 | if(rb.pos >= rb.epos)
103 | rb = rb.next;
104 | rb3.pos = rb3.epos = 0;
105 | rb3.done = true;
106 | //break;
107 | }
108 | rb3.pos = npos;
109 | } else {
110 | rb3.pos = rb3.epos = npos;
111 | }
112 |
113 | }
114 | } else {
115 | if(wrap.overRun) {
116 | readData();
117 | } else if(rb.epos= rb.epos) {
127 | getMoreData();
128 | if(rb.pos >= rb.epos)
129 | return -1;
130 | }
131 | //System.out.println(rb);
132 | return rb.sb.charAt(rb.pos++);
133 | }
134 | public int read(char[] buf,int off,int len)
135 | throws IOException
136 | {
137 | int c = -1;
138 | int end = off+len;
139 | for(int i=off;i
2 |
3 |
4 | Tutorial for Regular Expressions in Java, Part 3
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | | |
14 |
15 | Regular Expressions in Java
16 |
17 | Package com.stevesoft.pat version 1.5
18 |
19 | |
20 | |
22 |
23 |
24 |
38 | Online help...
39 | Quick Start
40 | Tutorial Part 1
41 | Tutorial Part 2
42 | Tutorial Part 3
43 | Tutorial Part 4
44 | Tutorial Part 5
45 | Tutorial Part 6
46 |
58 |  |
59 |
60 |
61 |
62 | Tutorial Part 3
63 | Pattern Elements
64 | \A, \Z, ^, $, \b, \B
65 |
66 | There pattern elements "^" and "\A" matches the beginning of a String.
67 | Regex r = new Regex("^.....");
68 |
69 | r.search("Hello world.");
70 | System.out.println(r.stringMatched());
71 | // Prints "Hello"
72 |
73 | r.search(" Hello world.");
74 | System.out.println(r.didMatch());
75 | // Prints "false"
76 | |
77 |
78 | Likewise, the pattern element "$" or "\Z" matches the end of
79 | a String.
80 | Regex r = new Regex(".......$");
81 |
82 | r.search("Say goodbye");
83 | System.out.println(r.stringMatched());
84 | // Prints "goodbye"
85 | |
86 |
87 | You may, however, be interested in matching on a
88 | different sort of boundary, a "word boundary." This
89 | is the sort of thing you search for when you select
90 | a "match whole word" option from a search dialog
91 | box in a word processor. For example, suppose you
92 | wish to match on the word "some" but not words like
93 | "somehow" or "twosome."
94 | Regex r = new Regex("\\bsome\\b");
95 |
96 | r.search("somehow");
97 | System.out.println(""+r.didMatch());
98 | // Prints "false"
99 |
100 | r.search("twosome");
101 | System.out.println(""+r.didMatch());
102 | // Prints "false"
103 |
104 | r.search("some");
105 | System.out.println(""+r.didMatch());
106 | // Prints "true"
107 | |
108 | The "\\b" pattern element matches on the space
109 | between a word character ("\\w" or "[a-zA-Z0-9_]")
110 | or a non-word character ("\\W" or "[^a-zA-Z0-9_]").
111 | It will also match on the beginning or end of a
112 | String.
113 |
114 | There is also a "\\B" that will fail to match on a
115 | boundary that is a word boundary. In this example
116 | we want to match on a word that begins with the word
117 | word "some" but does not include the word "some."
118 | Regex r = new Regex("\\bsome\\B");
119 |
120 | r.search("somehow");
121 | System.out.println(""+r.didMatch());
122 | // Prints "true"
123 |
124 | r.search("twosome");
125 | System.out.println(""+r.didMatch());
126 | // Prints "false"
127 |
128 | r.search("some");
129 | System.out.println(""+r.didMatch());
130 | // Prints "false"
131 | |
132 |
133 | Previous
134 | Next
135 |
136 | |
137 | |
138 |  |
141 |
142 |
143 |