[Gd-chatter] r11582 - in trunk/libraries/regular-expressions: . tests
cgay at gwydiondylan.org
cgay at gwydiondylan.org
Wed Dec 26 13:08:45 CET 2007
Author: cgay
Date: Wed Dec 26 13:08:44 2007
New Revision: 11582
Modified:
trunk/libraries/regular-expressions/interface.dylan
trunk/libraries/regular-expressions/od-library.dylan
trunk/libraries/regular-expressions/parse.dylan
trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt
trunk/libraries/regular-expressions/tests/pcre.dylan
trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
Log:
Bug: 7357
Renamed a few things.
Modified: trunk/libraries/regular-expressions/interface.dylan
==============================================================================
--- trunk/libraries/regular-expressions/interface.dylan (original)
+++ trunk/libraries/regular-expressions/interface.dylan Wed Dec 26 13:08:44 2007
@@ -132,7 +132,7 @@
(regex :: <string>, parse-info :: <parse-info>)
=> (parsed-regex :: <parsed-regex>, last-group :: <integer>);
let key = make(<cache-key>, regex-string: regex,
- character-set-type: parse-info.set-type);
+ character-set-type: parse-info.character-set-type);
let cached-value = element(*regex-cache*, key, default: #f);
if (cached-value)
values(cached-value.parse-tree, cached-value.last-group);
Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan (original)
+++ trunk/libraries/regular-expressions/od-library.dylan Wed Dec 26 13:08:44 2007
@@ -61,7 +61,6 @@
regex-group-count,
regex-position,
make-regex-positioner,
- regex-match, // todo -- rename to regex-search-strings?
regex-replace,
make-regex-replacer,
<regex-error>,
Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan (original)
+++ trunk/libraries/regular-expressions/parse.dylan Wed Dec 26 13:08:44 2007
@@ -178,21 +178,24 @@
// being parsed.
//
define class <parse-info> (<object>)
- // Whether or not the function includes \1, \2, etc in the regex.
- // Name this has-backreferences, for consistency with the other slots.
- // Add ? to all the has-* slots. --cgay
- // Also, not sure why anyone cares about these three things.
- slot backreference-used :: <boolean> = #f;
- slot has-alternatives :: <boolean> = #f;
- slot has-quantifiers :: <boolean> = #f;
+ slot has-back-references? :: <boolean> = #f;
+ slot has-alternatives? :: <boolean> = #f;
+ slot has-quantifiers? :: <boolean> = #f;
slot current-group-number :: <integer> = 0;
constant slot group-number-to-name :: <table> = make(<table>);
- constant slot set-type :: <class>,
+ constant slot character-set-type :: <class>,
required-init-keyword: #"set-type";
+
+ // If true then . matches \n. (?s) /s
slot dot-matches-all? :: <boolean>,
required-init-keyword: #"dot-matches-all";
- slot verbose? :: <boolean>,
+
+ // Ignore whitespace and comments within a regex pattern. (?x) /x
+ slot extended? :: <boolean>,
required-init-keyword: #"verbose";
+
+ // If multi-line? is true then ^ and $ match and \n boundaries as well as
+ // at the beginning and end of the subject string. (?m) /m
slot multi-line? :: <boolean>,
required-init-keyword: #"multi-line";
end class <parse-info>;
@@ -201,8 +204,8 @@
// them via subpatterns like (?i). Until then, this prevents warnings.
begin
dot-matches-all?-setter;
- verbose?;
- verbose?-setter;
+ extended?;
+ extended?-setter;
multi-line?;
multi-line?-setter;
end;
@@ -253,9 +256,9 @@
else
values(optimized-regex,
parse-info.current-group-number,
- parse-info.backreference-used,
- parse-info.has-alternatives,
- parse-info.has-quantifiers);
+ parse-info.has-back-references?,
+ parse-info.has-alternatives?,
+ parse-info.has-quantifiers?);
end if
end method parse;
@@ -266,7 +269,7 @@
if (~alternative)
parse-error(str.parse-string, "");
elseif (lookahead(str) = '|')
- info.has-alternatives := #t;
+ info.has-alternatives? := #t;
make(<union>, left: alternative, right: parse-regex(consume(str), info))
else
alternative
@@ -291,22 +294,22 @@
let char = lookahead(str);
select (char by \=)
'*' =>
- info.has-quantifiers := #t;
+ info.has-quantifiers? := #t;
consume(str);
make(<quantified-atom>, min: 0, atom: atom);
'+' =>
- info.has-quantifiers := #t;
+ info.has-quantifiers? := #t;
consume(str);
make(<quantified-atom>, min: 1, atom: atom);
'?' =>
- info.has-quantifiers := #t;
+ info.has-quantifiers? := #t;
consume(str);
make(<quantified-atom>, min: 0, max: 1, atom: atom);
'{' =>
- info.has-quantifiers := #t;
+ info.has-quantifiers? := #t;
consume(str);
parse-minmax-quantifier(atom, str);
@@ -564,7 +567,7 @@
end select
end for;
end block;
- make(<parsed-set>, set: make(info.set-type, description: set-string))
+ make(<parsed-set>, set: make(info.character-set-type, description: set-string))
end function parse-character-set;
// This only handles escaped characters *outside* of a character
@@ -581,10 +584,12 @@
end;
consume(str);
select (next-char)
- '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
- info.backreference-used := #t;
- make(<parsed-backreference>, group: digit-to-integer(next-char));
-
+ // not yet
+ //'0' =>
+ // parse-octal-escape(str, info);
+ '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
+ parse-back-reference(str, info);
+
// Hmm. Why would you write \\n in your regex instead of \n? It has the
// same effect. Also, what about the rest of the Dylan character escapes?
// --cgay
@@ -611,6 +616,34 @@
end select;
end method parse-escaped-character;
+define function parse-back-reference
+ (str :: <parse-string>, info :: <parse-info>)
+ => (backref :: <parsed-backreference>)
+ // We've just parsed '\' and a digit between '1' and '9'.
+ let start-index = str.parse-index - 1; // include the char we already read
+ let end-index = str.parse-index;
+ while (member?(lookahead(str), $digit-chars))
+ end-index := end-index + 1;
+ consume(str);
+ end;
+ let group = string-to-integer(copy-sequence(str.parse-string,
+ start: start-index,
+ end: end-index));
+ // todo -- If I understand the PCRE doc correctly it's not an error for a
+ // backref to be a forward ref if it is between 1 and 9, which implies we
+ // need to do a separate pass AFTER the entire parse is done, so we can
+ // verify that there are enough groups. Does a forward reference ever work???
+ // Maybe only if numbering the groups the way perl 6 does it rather than
+ // strictly left to right?
+ if (group >= info.current-group-number)
+ parse-error(str.parse-string,
+ "Invalid back reference at index %d",
+ str.parse-index);
+ end if;
+ info.has-back-references? := #t;
+ make(<parsed-backreference>, group: group)
+end function parse-back-reference;
+
define method is-anchored? (regex :: <parsed-regex>)
=> (result :: <boolean>);
select (regex by instance?)
Modified: trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt (original)
+++ trunk/libraries/regular-expressions/tests/pcre-testoutput1.txt Wed Dec 26 13:08:44 2007
@@ -4270,10 +4270,6 @@
No match
b
No match
-
-
-/a*/
-
/([abc])*d/
abbbcd
@@ -4830,9 +4826,6 @@
/abc/i
-/a*/i
-
-
/([abc])*d/i
ABBBCD
0: ABBBCD
Modified: trunk/libraries/regular-expressions/tests/pcre.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre.dylan (original)
+++ trunk/libraries/regular-expressions/tests/pcre.dylan Wed Dec 26 13:08:44 2007
@@ -164,9 +164,9 @@
values(pattern, flags)
end method read-pattern-and-flags;
let (pattern, flags) = read-pattern-and-flags();
- //test-output("pattern: %s (flags = %s)\n", pattern, flags);
+ //test-output("pattern: '%s' (flags = '%s')\n", pattern, flags);
for (flag in flags)
- check-true(sprintf("For regex %s, flag %s is recognized", pattern, flag),
+ check-true(sprintf("For regex '%s', flag '%s' is recognized", pattern, flag),
member?(flag, "ixms"));
end for;
block ()
@@ -180,7 +180,7 @@
// <invalid-character-set-description> which isn't related to <regex-error>
// (and isn't even exported).
exception (ex :: <error>)
- check-true(sprintf("can compile regex %s", pattern), #f);
+ check-true(sprintf("can compile regex '%s'", pattern), #f);
//test-output(" ERROR: %s\n", ex);
#f
end block
@@ -198,7 +198,7 @@
pcre-groups :: <sequence>)
=> ()
if (match)
- check-equal(sprintf("Match %s against %s -- same # of groups",
+ check-equal(sprintf("Match '%s' against regex '%s' -- same # of groups",
test-string, pattern),
size(match-groups(match)),
pcre-groups.size);
@@ -210,13 +210,13 @@
let our-group = /* if (group-number < size(match-groups(match))) */
match-group(match, group-number)
/* end */;
- check-equal(sprintf("Match %s against %s -- group %d is the same",
+ check-equal(sprintf("Match '%s' against regex '%s' -- group %d is the same",
test-string, pattern, group-number),
our-group,
pcre-group);
end;
else
- check-equal(sprintf("Pattern %s doesn't match test string %s",
+ check-equal(sprintf("Regex '%s' doesn't match test string '%s'",
pattern, test-string),
0,
pcre-groups.size);
Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan (original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan Wed Dec 26 13:08:44 2007
@@ -52,15 +52,18 @@
end;
end function check-matches;
-// These are to cover the basics, as I add new features to the code.
-// The PCRE tests should cover a lot of the more esoteric cases, I hope.
+// These are to cover the basics, as I add new features to the code or
+// read through the pcrepattern docs. The PCRE tests should cover a lot
+// of the more esoteric cases, I hope.
//
define test ad-hoc-regex-test ()
+ //args: check-matches(regex, string, group1, group2, ..., flag1: x, flag2: y, ...)
check-matches("", "abc", "");
check-matches("a()b", "ab", "ab", "");
check-matches("a(?#blah)b", "ab", "ab"); // comments shouldn't create a group
check-matches(".", "x", "x");
check-matches(".", "\n", "\n", dot-matches-all: #t);
+ check-matches("[a-]", "-", "-");
end test ad-hoc-regex-test;
// All these regexes should signal <invalid-regex> on compilation.
@@ -68,7 +71,8 @@
define test invalid-regex-test ()
let patterns = #(
"(?P<name>x)(?P<name>y)", // can't use same name twice
- "(?@abc)" // invalid extended character '@'
+ "(?@abc)", // invalid extended character '@'
+ "(a)\\2" // invalid back reference
);
for (pattern in patterns)
check-condition(sprintf("Compiling '%s' gets an error", pattern),
More information about the chatter
mailing list