Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members

FXRex.h
Go to the documentation of this file.
1 /********************************************************************************
2 * *
3 * R e g u l a r E x p r e s s i o n C l a s s *
4 * *
5 *********************************************************************************
6 * Copyright (C) 1999,2006 by Jeroen van der Zijp. All Rights Reserved. *
7 *********************************************************************************
8 * This library is free software; you can redistribute it and/or *
9 * modify it under the terms of the GNU Lesser General Public *
10 * License as published by the Free Software Foundation; either *
11 * version 2.1 of the License, or (at your option) any later version. *
12 * *
13 * This library is distributed in the hope that it will be useful, *
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU *
16 * Lesser General Public License for more details. *
17 * *
18 * You should have received a copy of the GNU Lesser General Public *
19 * License along with this library; if not, write to the Free Software *
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. *
21 *********************************************************************************
22 * $Id: FXRex.h,v 1.53 2006/01/22 17:58:09 fox Exp $ *
23 ********************************************************************************/
24 #ifndef FXREX_H
25 #define FXREX_H
26 
27 
28 namespace FX {
29 
30 
31 /// Regular expression error codes
32 enum FXRexError {
33  REGERR_OK,
34  REGERR_EMPTY, /// Empty pattern
35  REGERR_PAREN, /// Unmatched parenthesis
36  REGERR_BRACK, /// Unmatched bracket
37  REGERR_BRACE, /// Unmatched brace
38  REGERR_RANGE, /// Bad character range
39  REGERR_ESC, /// Bad escape sequence
40  REGERR_COUNT, /// Bad counted repeat
41  REGERR_NOATOM, /// No atom preceding repetition
42  REGERR_REPEAT, /// Repeat following repeat
43  REGERR_BACKREF, /// Bad backward reference
44  REGERR_CLASS, /// Bad character class
45  REGERR_COMPLEX, /// Expression too complex
46  REGERR_MEMORY, /// Out of memory
47  REGERR_TOKEN /// Illegal token
48  };
49 
50 
51 /// Regular expression parse flags
52 enum {
53  REX_NORMAL = 0, /// Normal mode
54  REX_CAPTURE = 1, /// Perform capturing parentheses
55  REX_ICASE = 2, /// Case independent matching
56  REX_NEWLINE = 4, /// Match-any operators match newline too
57  REX_VERBATIM = 8, /// Disable interpretation of magic characters
58  REX_SYNTAX = 16 /// Perform syntax check only
59  };
60 
61 
62 /// Regular expression match flags
63 enum {
64  REX_FORWARD = 0, /// Match scanning forward from offset
65  REX_BACKWARD = 32, /// Match scanning backward from offset
66  REX_NOT_BOL = 64, /// Start of string is NOT begin of line
67  REX_NOT_EOL = 128, /// End of string is NOT end of line
68  REX_NOT_EMPTY = 256 /// Do not match empty
69  };
70 
71 
72 /**
73 * FXRex is a regular expression class implementing a NFA matcher.
74 * It supports capturing parentheses, non-capturing parentheses,
75 * positive or negative lookahead, backreferences, case-insensitive
76 * matching, counted repetitions, lazy or greedy matches, and
77 * PERL-like matching operators.
78 * The subject string may be scanned forwards or backwards, and may
79 * contain any of 256 possible character values.
80 *
81 * When parsing a regular expression pattern, the mode parameter is
82 * the bitwise OR of a set of flags and affects the match algorithm.
83 * Passing the flag REX_CAPTURE enables capturing parentheses
84 * and back references. The flag REX_ICASE enables case-insensitive
85 * matching. When the flag REX_NEWLINE is passed, newlines are treated
86 * like normal characters; otherwise, newline is NOT matched
87 * except when explicitly part of a character class. The flag
88 * REX_VERBATIM disables all special character interpretation.
89 *
90 * When matching a compiled pattern, the mode parameter is the
91 * bitwise OR of a set of flags that affects how the match is
92 * performed. Passing the flag REX_BACKWARD causes the match
93 * to proceed backwards through the subject string. Passing the
94 * flags REX_NOT_BOL and/or REX_NOT_EOL causes the begin and
95 * end of the subject string NOT to be considered a line start
96 * or line end. The flag REX_NOT_EMPTY causes a match to fail if
97 * the empty string was matched.
98 */
99 class FXAPI FXRex {
100 private:
101  FXint *code;
102 private:
103  static const FXchar *const errors[];
104  static const FXint fallback[];
105 public:
106 
107  /// Construct empty regular expression object
108  FXRex():code((FXint*)fallback){}
109 
110  /// Copy regular expression object
111  FXRex(const FXRex& orig);
112 
113  /// Compile expression from pattern; if error is not NULL, error code is returned
114  FXRex(const FXchar* pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);
115 
116  /// Compile expression from pattern; if error is not NULL, error code is returned
117  FXRex(const FXString& pattern,FXint mode=REX_NORMAL,FXRexError* error=NULL);
118 
119  /// Assign another regular expression to this one
120  FXRex& operator=(const FXRex& orig);
121 
122  /**
123  * See if regular expression is empty; the regular expression
124  * will be empty when it is unable to parse a pattern due to
125  * a syntax error.
126  */
127  bool empty() const { return (code==fallback); }
128 
129  /// Parse pattern, return error code if syntax error is found
130  FXRexError parse(const FXchar* pattern,FXint mode=REX_NORMAL);
131 
132  /// Parse pattern, return error code if syntax error is found
133  FXRexError parse(const FXString& pattern,FXint mode=REX_NORMAL);
134 
135  /**
136  * Match a subject string of length len, returning TRUE if a match is found
137  * and FALSE otherwise. The entire pattern is captured in beg[0] and end[0],
138  * where beg[0] refers to the position of the first matched character and end[0]
139  * refers to the position after the last matched character.
140  * Sub expressions from capturing parenthesis i are returned in beg[i] and end[i].
141  */
142  bool match(const FXchar* string,FXint len,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
143 
144  /// Search for match in a string
145  bool match(const FXString& string,FXint* beg=NULL,FXint* end=NULL,FXint mode=REX_FORWARD,FXint npar=1,FXint fm=0,FXint to=2147483647) const;
146 
147  /**
148  * After performing a regular expression match with capturing parentheses,
149  * a substitution string is build from the replace string, where where "&"
150  * is replaced by the entire matched pattern, and "\1" through "\9" are
151  * replaced by captured expressions. The original source string and its
152  * length, and the match arrays beg and end must be passed.
153  */
154  static FXString substitute(const FXchar* string,FXint len,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
155 
156  /// Return substitution string
157  static FXString substitute(const FXString& string,FXint* beg,FXint* end,const FXString& replace,FXint npar=1);
158 
159  /// Returns error code for given error
160  static const FXchar* getError(FXRexError err){ return errors[err]; }
161 
162  /// Comparison operators
163  bool operator==(const FXRex& rex) const;
164  bool operator!=(const FXRex& rex) const;
165 
166  /// Saving and loading
167  friend FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
168  friend FXAPI FXStream& operator>>(FXStream& store,FXRex& s);
169 
170  /// Delete
171  ~FXRex();
172  };
173 
174 
175 extern FXAPI FXStream& operator<<(FXStream& store,const FXRex& s);
176 extern FXAPI FXStream& operator>>(FXStream& store,FXRex& s);
177 
178 }
179 
180 #endif
Bad escape sequence.
Definition: FXRex.h:42
Unmatched bracket.
Definition: FXRex.h:39
FXStream & operator>>(FXStream &store, FXDate &d)
Match-any operators match newline too.
Definition: FXRex.h:63
char FXchar
Definition: fxdefs.h:387
No atom preceding repetition.
Definition: FXRex.h:44
Definition: FXRex.h:36
Perform capturing parentheses.
Definition: FXRex.h:61
#define FXAPI
Definition: fxdefs.h:122
Expression too complex.
Definition: FXRex.h:48
FXRex is a regular expression class implementing a NFA matcher.
Definition: FXRex.h:116
Repeat following repeat.
Definition: FXRex.h:45
#define NULL
Definition: fxdefs.h:41
FXRexError
Regular expression error codes.
Definition: FXRex.h:35
A stream is a way to serialize data and objects into a byte stream.
Definition: FXStream.h:99
Start of string is NOT begin of line.
Definition: FXRex.h:77
Match scanning backward from offset.
Definition: FXRex.h:76
Bad counted repeat.
Definition: FXRex.h:43
bool match(const FXString &pattern, const FXString &file, FXuint flags=(FILEMATCH_NOESCAPE|FILEMATCH_FILE_NAME))
Perform wildcard match of a filename against a wildcard pattern.
End of string is NOT end of line.
Definition: FXRex.h:78
int FXint
Definition: fxdefs.h:397
Unmatched parenthesis.
Definition: FXRex.h:38
Bad backward reference.
Definition: FXRex.h:46
Illegal token.
Definition: FXRex.h:50
Normal mode.
Definition: FXRex.h:60
Do not match empty.
Definition: FXRex.h:79
Bad character class.
Definition: FXRex.h:47
Match scanning forward from offset.
Definition: FXRex.h:75
FXStream & operator<<(FXStream &store, const FXDate &d)
Bad character range.
Definition: FXRex.h:41
Disable interpretation of magic characters.
Definition: FXRex.h:64
Unmatched brace.
Definition: FXRex.h:40
bool operator!=(const FXString &s1, const FXString &s2)
Out of memory.
Definition: FXRex.h:49
Empty pattern.
Definition: FXRex.h:37
Case independent matching.
Definition: FXRex.h:62
bool operator==(const FXString &s1, const FXString &s2)
FXString provides essential string manipulation capabilities.
Definition: FXString.h:33
Perform syntax check only.
Definition: FXRex.h:65

Copyright © 1997-2005 Jeroen van der Zijp