[Gd-chatter] r11533 - in trunk/libraries/regular-expressions: . tests
cgay at gwydiondylan.org
cgay at gwydiondylan.org
Tue Dec 11 21:29:31 CET 2007
Author: cgay
Date: Tue Dec 11 21:29:29 2007
New Revision: 11533
Modified:
trunk/libraries/regular-expressions/interface.dylan
trunk/libraries/regular-expressions/match.dylan
trunk/libraries/regular-expressions/od-library.dylan
trunk/libraries/regular-expressions/parse.dylan
trunk/libraries/regular-expressions/regex.dylan
trunk/libraries/regular-expressions/tests/app-library.dylan
trunk/libraries/regular-expressions/tests/library.dylan
trunk/libraries/regular-expressions/tests/pcre.dylan
trunk/libraries/regular-expressions/tests/regression-tests.txt
trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan
trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
Log:
Bug: 7357
Fixed "", (), (?#...), ".", and changed order of args in regex-search.
The regex now comes first.
Committing now so bug 7364 can reference this revision.
Modified: trunk/libraries/regular-expressions/interface.dylan
==============================================================================
--- trunk/libraries/regular-expressions/interface.dylan (original)
+++ trunk/libraries/regular-expressions/interface.dylan Tue Dec 11 21:29:29 2007
@@ -153,7 +153,7 @@
// of marks.
//
define function regex-position
- (big :: <string>, regex :: <string>, #key start: big-start = 0,
+ (regex :: <string>, big :: <string>, #key start: big-start = 0,
end: big-end = #f, case-sensitive = #f)
=> (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>));
let substring = make(<substring>, string: big, start: big-start,
@@ -193,16 +193,17 @@
end: big-end = #f)
=> (regex-start :: false-or(<integer>),
#rest marks :: false-or(<integer>));
- regex-position(big, regex, case-sensitive: case-sensitive,
+ regex-position(regex, big, case-sensitive: case-sensitive,
start: big-start, end: big-end);
end method;
end function make-regex-positioner;
// returns #f if no match, the matching string on match, and another string or #f
// for each group in the regex.
+/*
define method regex-match
(big :: <string>, regex :: <string>) => (#rest results);
- let (#rest marks) = regex-position(big, regex);
+ let (#rest marks) = regex-position(regex, big);
let result = make(<stretchy-vector>);
if(marks[0])
@@ -218,6 +219,7 @@
end;
apply(values, result)
end;
+*/
// #if (have-free-time)
/*
Modified: trunk/libraries/regular-expressions/match.dylan
==============================================================================
--- trunk/libraries/regular-expressions/match.dylan (original)
+++ trunk/libraries/regular-expressions/match.dylan Tue Dec 11 21:29:29 2007
@@ -437,13 +437,13 @@
#"end-of-string" => index >= target.end-index;
#"word-boundary" =>
index = 0 | index >= target.end-index
- | (member?(target.entire-string[index], word-chars)
- ~== member?(target.entire-string[index - 1], word-chars));
+ | (member?(target.entire-string[index], $word-chars)
+ ~== member?(target.entire-string[index - 1], $word-chars));
#"not-word-boundary" =>
index ~== 0 & index < target.end-index
- & (member?(target.entire-string[index], word-chars)
- == member?(target.entire-string[index - 1], word-chars));
+ & (member?(target.entire-string[index], $word-chars)
+ == member?(target.entire-string[index - 1], $word-chars));
otherwise =>
error("Unknown assertion %=", assertion);
Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan (original)
+++ trunk/libraries/regular-expressions/od-library.dylan Tue Dec 11 21:29:29 2007
@@ -36,6 +36,8 @@
define library regular-expressions
use common-dylan;
use string-extensions;
+ use io,
+ import: { format-out }; // for debugging only
export
regular-expressions;
end;
@@ -45,6 +47,7 @@
exclude: {
split // todo -- just add a method to this one
};
+ use format-out; // for debugging only
use string-conversions;
use character-type;
use string-hacking;
Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan (original)
+++ trunk/libraries/regular-expressions/parse.dylan Tue Dec 11 21:29:29 2007
@@ -148,6 +148,32 @@
format-arguments: list(thing)));
end;
+define constant $empty-string :: <parsed-string>
+ = make(<parsed-string>, string: "");
+
+// The useful definitions of all these are in as(<character-set>).
+//
+define constant $any-char
+ = make(<case-sensitive-character-set>, description: "[\<00>-\<FF>]");
+define constant $any-char-except-newline
+ = make(<case-sensitive-character-set>, description: "^\n");
+define constant $digit-chars
+ = make(<case-sensitive-character-set>, description: "\\d");
+define constant $not-digit-chars
+ = make(<case-sensitive-character-set>, description: "^\\d");
+define constant $word-chars
+ = make(<case-sensitive-character-set>, description: "\\w");
+define constant $not-word-chars
+ = make(<case-sensitive-character-set>, description: "^\\w");
+define constant $whitespace-chars
+ = make(<case-sensitive-character-set>, description: "\\s");
+define constant $not-whitespace-chars
+ = make(<case-sensitive-character-set>, description: "^\\s");
+define constant $dot
+ = make(<parsed-set>, set: $any-char-except-newline);
+define constant
+ $dot-all = make(<parsed-set>, set: $any-char);
+
// <parse-info> contains some information about the current regex
// being parsed.
//
@@ -156,14 +182,31 @@
// Name this has-backreferences, for consistency with the other slots.
// Add ? to all the has-* slots. --cgay
// Also, not sure why anyone cares about these three things.
- slot backreference-used :: <boolean>, init-value: #f;
- slot has-alternatives :: <boolean>, init-value: #f;
- slot has-quantifiers :: <boolean>, init-value: #f;
- slot current-group-number :: <integer>, init-value: 0;
- constant slot group-number-to-name :: <table>, init-value: make(<table>);
- constant slot set-type :: <class>, required-init-keyword: #"set-type";
+ slot backreference-used :: <boolean> = #f;
+ slot has-alternatives :: <boolean> = #f;
+ slot has-quantifiers :: <boolean> = #f;
+ slot current-group-number :: <integer> = 0;
+ constant slot group-number-to-name :: <table> = make(<table>);
+ constant slot set-type :: <class>,
+ required-init-keyword: #"set-type";
+ slot dot-matches-all? :: <boolean>,
+ required-init-keyword: #"dot-matches-all";
+ slot verbose? :: <boolean>,
+ required-init-keyword: #"verbose";
+ slot multi-line? :: <boolean>,
+ required-init-keyword: #"multi-line";
end class <parse-info>;
+// These setters will be used eventually, when we implement the ability to change
+// them via subpatterns like (?i). Until then, this prevents warnings.
+begin
+ dot-matches-all?-setter;
+ verbose?;
+ verbose?-setter;
+ multi-line?;
+ multi-line?-setter;
+end;
+
define function make-parse-info
(#key case-sensitive :: <boolean> = #t,
verbose :: <boolean> = #f,
@@ -172,14 +215,16 @@
=> (info :: <parse-info>)
verbose & not-yet-implemented("'verbose' option");
multi-line & not-yet-implemented("'multi-line' option");
- dot-matches-all & not-yet-implemented("'dot-matches-all' option");
- let char-set-type
- = if (case-sensitive)
- <case-sensitive-character-set>
- else
- <case-insensitive-character-set>
- end;
- make(<parse-info>, set-type: char-set-type)
+ let char-set-type = if (case-sensitive)
+ <case-sensitive-character-set>
+ else
+ <case-insensitive-character-set>
+ end;
+ make(<parse-info>,
+ set-type: char-set-type,
+ verbose: verbose,
+ multi-line: multi-line,
+ dot-matches-all: dot-matches-all)
end function make-parse-info;
define method has-named-group?
@@ -189,7 +234,7 @@
define method parse
(regex :: <string>, parse-info :: <parse-info>)
- => (parsed-regex :: <parsed-regex>,
+ => (regex :: <parsed-regex>,
last-group :: <integer>,
backrefs? :: <boolean>,
alternatives? :: <boolean>,
@@ -211,60 +256,59 @@
parse-info.backreference-used,
parse-info.has-alternatives,
parse-info.has-quantifiers);
- end if;
+ end if
end method parse;
-define method parse-regex (s :: <parse-string>, info :: <parse-info>)
- => parsed-regex :: <parsed-regex>;
- let alternative = parse-alternative(s, info);
+define method parse-regex
+ (str :: <parse-string>, info :: <parse-info>)
+ => (regex :: <parsed-regex>)
+ let alternative = parse-alternative(str, info);
if (~alternative)
- parse-error(s.parse-string, "");
- elseif (lookahead(s) = '|')
+ parse-error(str.parse-string, "");
+ elseif (lookahead(str) = '|')
info.has-alternatives := #t;
- make(<union>, left: alternative, right: parse-regex(consume(s), info));
+ make(<union>, left: alternative, right: parse-regex(consume(str), info))
else
- alternative;
- end if;
+ alternative
+ end if
end method parse-regex;
define method parse-alternative
- (s :: <parse-string>, info :: <parse-info>)
+ (str :: <parse-string>, info :: <parse-info>)
=> (re :: false-or(<parsed-regex>))
- let term = parse-quantified-atom(s, info);
- if (member?(lookahead(s), #(#f, '|', ')')))
- term;
+ let term = parse-quantified-atom(str, info);
+ if (member?(lookahead(str), #(#f, '|', ')')))
+ term
else
- make(<alternative>, left: term, right: parse-alternative(s, info));
- end if;
+ make(<alternative>, left: term, right: parse-alternative(str, info))
+ end
end method parse-alternative;
-define method parse-quantified-atom (s :: <parse-string>, info :: <parse-info>)
+define method parse-quantified-atom
+ (str :: <parse-string>, info :: <parse-info>)
=> (result :: false-or(<parsed-regex>))
- // I think this breaks when parse-atom returns #f and then we quantify that.
- // I added some regexes to regression-tests.txt starting with /a()b/ that I
- // hope will exercise that case. --cgay
- let atom = parse-atom(s, info);
- let char = lookahead(s);
+ let atom = parse-atom(str, info);
+ let char = lookahead(str);
select (char by \=)
'*' =>
info.has-quantifiers := #t;
- consume(s);
+ consume(str);
make(<quantified-atom>, min: 0, atom: atom);
'+' =>
info.has-quantifiers := #t;
- consume(s);
+ consume(str);
make(<quantified-atom>, min: 1, atom: atom);
'?' =>
info.has-quantifiers := #t;
- consume(s);
+ consume(str);
make(<quantified-atom>, min: 0, max: 1, atom: atom);
'{' =>
info.has-quantifiers := #t;
- consume(s);
- parse-minmax-quantifier(atom, s);
+ consume(str);
+ parse-minmax-quantifier(atom, str);
otherwise =>
atom;
@@ -274,75 +318,83 @@
// {m,n}, {m,}, {,n}, {m}, {}, and {,} are all valid.
// m defaults to 0 and n defaults to #f (unlimited).
define method parse-minmax-quantifier
- (atom :: <parsed-regex>, s :: <parse-string>) => (qatom :: <quantified-atom>)
+ (atom :: <parsed-regex>, str :: <parse-string>)
+ => (qatom :: <quantified-atom>)
local method parse-integer () => (int :: false-or(<integer>))
let digits = make(<deque>);
- while (lookahead(s) & digit?(lookahead(s)))
- push-last(digits, lookahead(s));
- consume(s);
+ while (lookahead(str) & digit?(lookahead(str)))
+ push-last(digits, lookahead(str));
+ consume(str);
end;
~empty?(digits) & string-to-integer(as(<byte-string>, digits));
end method parse-integer;
let qmin = parse-integer() | 0;
let qmax = #f;
- if (lookahead(s) = ',')
- consume(s);
+ if (lookahead(str) = ',')
+ consume(str);
qmax := parse-integer();
else
qmax := qmin;
end;
- if (lookahead(s) ~= '}')
- parse-error(s.parse-string,
+ if (lookahead(str) ~= '}')
+ parse-error(str.parse-string,
"Close brace expected in {m,n} quantifier (index = %s).",
- s.parse-index);
+ str.parse-index);
end;
- consume(s);
+ consume(str);
make(<quantified-atom>, atom: atom, min: qmin, max: qmax);
end method parse-minmax-quantifier;
-define method parse-atom (s :: <parse-string>, info :: <parse-info>)
+define method parse-atom (str :: <parse-string>, info :: <parse-info>)
=> (regex :: false-or(<parsed-regex>))
- let char = lookahead(s);
+ let char = lookahead(str);
select (char)
'(' =>
- consume(s); // Consume beginning paren
- parse-group(s, info);
-
- ')' =>
- #f; // Need something to terminate upon seeing a close paren
+ consume(str); // Consume beginning paren
+ // If parse-group returns #f it's because it parsed a (?#...) comment,
+ // so just ignore the comment and try to parse another atom (if there's
+ // anything left in str). This means comments can't immediately precede
+ // |, ), or EOI. Should fix this.
+ parse-group(str, info)
+ | (lookahead(str) & parse-atom(str, info));
#f =>
- #f; // Signal error? (end of stream)
+ // Handle valid patterns like "" and "a|"
+ $empty-string;
- '*', '|', '+' =>
+ '*', '|', '+', ')' =>
#f;
'\\' =>
- consume(s); // Consume the backslash
+ consume(str); // Consume the backslash
// Perhaps add support for a different escape character to aid readability.
// The escape character could be specified in the <parse-info>. --cgay
- parse-escaped-character(s, info);
+ parse-escaped-character(str, info);
'[' =>
- consume(s); // Eat the opening brace
- parse-character-set(s, info);
+ consume(str); // Eat the opening brace
+ parse-character-set(str, info);
'.' =>
- consume(s);
- dot;
+ consume(str);
+ if (info.dot-matches-all?)
+ $dot-all
+ else
+ $dot
+ end;
'^' =>
- consume(s);
+ consume(str);
make(<parsed-assertion>, assertion: #"beginning-of-string");
'$' =>
- consume(s);
+ consume(str);
make(<parsed-assertion>, assertion: #"end-of-string");
// Insert more special characters here
otherwise =>
- consume(s);
+ consume(str);
make(<parsed-character>, character: char);
end select;
end method parse-atom;
@@ -390,18 +442,21 @@
while (lookahead(str) & lookahead(str) ~== ')')
consume(str);
end;
- if (~ lookahead(str))
- parse-error(str.parse-string, "Unterminated subpattern commend (?#....");
- else
+ if (lookahead(str) == ')')
+ consume(str);
#f
+ else
+ parse-error(str.parse-string, "Unterminated subpattern comment (?#....");
end;
otherwise =>
// See the Python re docs for what all these do.
+ // See "INTERNAL OPTION SETTING" in pcre.txt doc as well.
if (member?(char, "iLmsux=!<("))
not-yet-implemented("'(?%c' subpattern construct", char);
else
- parse-error(str.parse-string, "Invalid subpattern construct (?%c...) at index %s.",
+ parse-error(str.parse-string,
+ "Invalid subpattern construct (?%c...) at index %s.",
char, str.parse-index);
end;
end select
@@ -420,13 +475,14 @@
consume(str);
copy-sequence(str.parse-string, start: start-index, end: str.parse-index - 1)
else
- parse-error(str.parse-string, "Unterminated named group name at index %s.",
+ parse-error(str.parse-string,
+ "Unterminated named group name at index %s.",
str.parse-index);
end
end function parse-group-name;
-// Parse a group/subpattern, possibly after having already parsed any
-// options given via "(?...".
+// Parse a group/subpattern. The open paren and any subpattern options given
+// via "(?..." have already been consumed.
//
define inline function parse-simple-group
(str :: <parse-string>,
@@ -437,27 +493,30 @@
if (save-group?)
info.current-group-number := info.current-group-number + 1;
end;
- let regex = parse-regex(str, info);
- if (lookahead(str) ~== ')')
- parse-error(str.parse-string, "Unbalanced parens in regex (index = %s).",
- str.parse-index);
+ let regex = if (lookahead(str) == ')')
+ consume(str);
+ $empty-string
+ else
+ let re = parse-regex(str, info);
+ if (lookahead(str) == ')')
+ consume(str)
+ end;
+ re
+ end;
+ if (~save-group?)
+ regex
else
- consume(str);
- if (~ save-group?)
- regex
- else
- if (group-name)
- if (has-named-group?(info, group-name))
- parse-error(str.parse-string,
- "Duplicate group name (%s) at index %s.",
- group-name, str.parse-index);
- else
- info.group-number-to-name[info.current-group-number] := group-name;
- end;
+ if (group-name)
+ if (has-named-group?(info, group-name))
+ parse-error(str.parse-string,
+ "Duplicate group name (%s) at index %s.",
+ group-name, str.parse-index);
+ else
+ info.group-number-to-name[info.current-group-number] := group-name;
end;
- make(<mark>, child: regex, group: info.current-group-number)
- end
- end
+ end;
+ make(<mark>, child: regex, group: info.current-group-number)
+ end if
end function parse-simple-group;
// This just does a quick scan to find the closing ] and then lets
@@ -508,43 +567,19 @@
make(<parsed-set>, set: make(info.set-type, description: set-string))
end function parse-character-set;
-define constant any-char
- = make(<case-sensitive-character-set>, description: "^\n");
-
-// The useful definitions of all these are in as(<character-set>).
-//
-define constant digit-chars
- = make(<case-sensitive-character-set>, description: "\\d");
-define constant not-digit-chars
- = make(<case-sensitive-character-set>, description: "^\\d");
-define constant word-chars
- = make(<case-sensitive-character-set>, description: "\\w");
-define constant not-word-chars
- = make(<case-sensitive-character-set>, description: "^\\w");
-define constant whitespace-chars
- = make(<case-sensitive-character-set>, description: "\\s");
-define constant not-whitespace-chars
- = make(<case-sensitive-character-set>, description: "^\\s");
-
-define constant dot = make(<parsed-set>, set: any-char);
-/* KJP: Not used.
-define constant dot-star = make(<quantified-atom>, min: 0, max: #f,
- atom: dot);
-*/
-
// This only handles escaped characters *outside* of a character
// set. Inside of a character set is a whole different story.
//
define method parse-escaped-character
- (s :: <parse-string>, info :: <parse-info>)
+ (str :: <parse-string>, info :: <parse-info>)
=> parsed-regex :: <parsed-regex>;
- let next-char = lookahead(s);
+ let next-char = lookahead(str);
if (~next-char)
- parse-error(s.parse-string,
+ parse-error(str.parse-string,
"Unterminated escape sequence at index %d.",
- s.parse-index - 1)
+ str.parse-index - 1)
end;
- consume(s);
+ consume(str);
select (next-char)
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
info.backreference-used := #t;
@@ -560,14 +595,14 @@
'b' => make(<parsed-assertion>, assertion: #"word-boundary");
'B' => make(<parsed-assertion>, assertion: #"not-word-boundary");
- // Beginning and end of string are not escaped
+ // Beginning and end of string are not escaped
- 'd' => make(<parsed-set>, set: digit-chars);
- 'D' => make(<parsed-set>, set: not-digit-chars);
- 'w' => make(<parsed-set>, set: word-chars);
- 'W' => make(<parsed-set>, set: not-word-chars);
- 's' => make(<parsed-set>, set: whitespace-chars);
- 'S' => make(<parsed-set>, set: not-whitespace-chars);
+ 'd' => make(<parsed-set>, set: $digit-chars);
+ 'D' => make(<parsed-set>, set: $not-digit-chars);
+ 'w' => make(<parsed-set>, set: $word-chars);
+ 'W' => make(<parsed-set>, set: $not-word-chars);
+ 's' => make(<parsed-set>, set: $whitespace-chars);
+ 'S' => make(<parsed-set>, set: $not-whitespace-chars);
// Insert more escaped characters here
Modified: trunk/libraries/regular-expressions/regex.dylan
==============================================================================
--- trunk/libraries/regular-expressions/regex.dylan (original)
+++ trunk/libraries/regular-expressions/regex.dylan Tue Dec 11 21:29:29 2007
@@ -29,7 +29,7 @@
// This function signals <invalid-regex> if the regular expression is invalid.
//
define sealed generic compile-regex
- (string :: <string>,
+ (pattern :: <string>,
#key case-sensitive :: <boolean> = #t,
verbose :: <boolean> = #f,
multi-line :: <boolean> = #f,
@@ -37,13 +37,13 @@
=> (regex :: <regex>);
define method compile-regex
- (string :: <string>,
+ (pattern :: <string>,
#key case-sensitive :: <boolean> = #t,
verbose :: <boolean> = #f,
multi-line :: <boolean> = #f,
dot-matches-all :: <boolean> = #f)
=> (regex :: <regex>)
- parse(string,
+ parse(pattern,
make-parse-info(case-sensitive: case-sensitive,
verbose: verbose,
multi-line: multi-line,
@@ -68,33 +68,33 @@
// todo -- Should $ anchor at the provided end position or at the end of the string?
//
define sealed generic regex-search
- (big :: <string>, pattern :: <object>,
+ (pattern :: <object>, string :: <string>,
#key anchored :: <boolean> = #f,
start :: <integer> = 0,
end: _end :: <integer> = big.size)
=> (match :: false-or(<regex-match>));
define method regex-search
- (big :: <string>, pattern :: <string>,
+ (pattern :: <string>, string :: <string>,
#key anchored :: <boolean> = #f,
start :: <integer> = 0,
- end: _end :: <integer> = big.size)
+ end: _end :: <integer> = string.size)
=> (match :: false-or(<regex-match>))
- regex-search(big, compile-regex(pattern),
- anchored: anchored, start: start, end: _end)
+ regex-search(compile-regex(pattern),
+ string,
+ anchored: anchored, start: start, end: _end)
end method regex-search;
define method regex-search
- (big :: <string>, pattern :: <regex>,
+ (pattern :: <regex>, string :: <string>,
#key anchored :: <boolean> = #f,
start :: <integer> = 0,
- end: _end :: <integer> = big.size)
+ end: _end :: <integer> = string.size)
=> (match :: false-or(<regex-match>))
- // Copied from regex-position with some mods to match our interface.
// Unlike regex-position there is no caching. If you don't want to
// recompile your regex each time, compile it explicitly with compile-regex
// and save it.
- let substring = make(<substring>, string: big, start: start, end: _end);
+ let substring = make(<substring>, string: string, start: start, end: _end);
let case-sensitive? = #t;
let num-groups = pattern.regex-group-count;
let (matched?, marks)
Modified: trunk/libraries/regular-expressions/tests/app-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/app-library.dylan (original)
+++ trunk/libraries/regular-expressions/tests/app-library.dylan Tue Dec 11 21:29:29 2007
@@ -3,11 +3,13 @@
Synopsis: An application library for regular-expressions-test-suite
define library regular-expressions-test-suite-app
+ use common-dylan;
use regular-expressions-test-suite;
use testworks;
end;
define module regular-expressions-test-suite-app
+ use common-dylan;
use regular-expressions-test-suite;
use testworks;
end;
Modified: trunk/libraries/regular-expressions/tests/library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/library.dylan (original)
+++ trunk/libraries/regular-expressions/tests/library.dylan Tue Dec 11 21:29:29 2007
@@ -4,6 +4,7 @@
use common-dylan;
use io,
import: {
+ format-out, // for debugging only
streams
};
use regular-expressions;
@@ -21,6 +22,9 @@
define module regular-expressions-test-suite
use common-dylan,
+ rename: {
+ format-to-string => sprintf // to long for 80 chars per line
+ },
exclude: {
split
};
@@ -37,6 +41,7 @@
environment-variable
};
use testworks;
+ use format-out; // for debugging only
use streams;
use strings,
import: {
Modified: trunk/libraries/regular-expressions/tests/pcre.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre.dylan (original)
+++ trunk/libraries/regular-expressions/tests/pcre.dylan Tue Dec 11 21:29:29 2007
@@ -20,14 +20,14 @@
// Some multi-line regular expression patterns have empty lines and we
// don't want to think that's the end of the section.
if (lines.size > 0
- & regex-search(lines[lines.size - 1], $group-regex))
+ & regex-search($group-regex, lines[lines.size - 1]))
check-pcre-section(make(<section>,
lines: lines,
start-line-number: line-number - lines.size));
lines := make(<stretchy-vector>);
elseif (lines.size < 3)
- // A section must have a regex, a test string and one group result to make
- // any sense. Note we don't add the line to the section here.
+ // A section must have a regex, a test string and one group result to
+ // make any sense. Note we don't add the line to the section here.
test-output("Line %d: empty line in first 3 lines of a section.\n",
line-number);
else
@@ -41,8 +41,10 @@
end function run-pcre-checks;
define class <section> (<object>)
- constant slot section-lines :: <sequence>, required-init-keyword: #"lines";
- constant slot start-line-number :: <integer>, required-init-keyword: #"start-line-number";
+ constant slot section-lines :: <sequence>,
+ required-init-keyword: #"lines";
+ constant slot start-line-number :: <integer>,
+ required-init-keyword: #"start-line-number";
slot %index :: <integer> = 0;
end class <section>;
@@ -91,10 +93,11 @@
block (done-with-this-test-string)
while (#t)
let line = peek-line(section);
- let match = line & regex-search(line, $group-regex);
+ let match = line & regex-search($group-regex, line);
if (match)
consume-line(section);
- let group-text = match-group(match, $group-index-of-what-pcre-matched);
+ let group-text = match-group(match,
+ $group-index-of-what-pcre-matched);
//test-output(" pcre group: %s\n", group-text | "No match");
if (group-text)
add!(group-strings, group-text);
@@ -109,16 +112,17 @@
end;
end;
if (regex)
- check-no-errors(format-to-string("search for %s in %s",
- test-string, regex.regex-pattern),
- regex-search(test-string, regex));
+ check-no-errors(sprintf("search for %s in %s",
+ test-string, regex.regex-pattern),
+ regex-search(regex, test-string));
let match = block ()
- regex-search(test-string, regex)
+ regex-search(regex, test-string)
exception (ex :: <error>)
#f
end;
if (match)
- compare-to-pcre-results(regex.regex-pattern, test-string, match, group-strings);
+ compare-to-pcre-results(regex.regex-pattern, test-string, match,
+ group-strings);
end;
end if;
end while;
@@ -162,8 +166,7 @@
let (pattern, flags) = read-pattern-and-flags();
//test-output("pattern: %s (flags = %s)\n", pattern, flags);
for (flag in flags)
- check-true(format-to-string("For regex %s, flag %s is recognized",
- pattern, flag),
+ check-true(sprintf("For regex %s, flag %s is recognized", pattern, flag),
member?(flag, "ixms"));
end for;
block ()
@@ -173,10 +176,11 @@
multi-line: member?('m', flags),
dot-matches-all: member?('s', flags))
// Unfortunately we can't catch <regex-error> here because the charset
- // parser is in string-extensions and signals <invalid-character-set-description>
- // which isn't related to <regex-error> (and isn't even exported).
+ // parser is in string-extensions and signals
+ // <invalid-character-set-description> which isn't related to <regex-error>
+ // (and isn't even exported).
exception (ex :: <error>)
- check-true(format-to-string("can compile regex %s", pattern), #f);
+ check-true(sprintf("can compile regex %s", pattern), #f);
//test-output(" ERROR: %s\n", ex);
#f
end block
@@ -194,8 +198,8 @@
pcre-groups :: <sequence>)
=> ()
if (match)
- check-equal(format-to-string("Match %s against %s -- same # of groups",
- test-string, pattern),
+ check-equal(sprintf("Match %s against %s -- same # of groups",
+ test-string, pattern),
size(match-groups(match)),
pcre-groups.size);
for (group-number from 0,
@@ -206,14 +210,14 @@
let our-group = /* if (group-number < size(match-groups(match))) */
match-group(match, group-number)
/* end */;
- check-equal(format-to-string("Match %s against %s -- group %d is the same",
- test-string, pattern, group-number),
+ check-equal(sprintf("Match %s against %s -- group %d is the same",
+ test-string, pattern, group-number),
our-group,
pcre-group);
end;
else
- check-equal(format-to-string("Pattern %s doesn't match test string %s",
- pattern, test-string),
+ check-equal(sprintf("Pattern %s doesn't match test string %s",
+ pattern, test-string),
0,
pcre-groups.size);
end if;
Modified: trunk/libraries/regular-expressions/tests/regression-tests.txt
==============================================================================
--- trunk/libraries/regular-expressions/tests/regression-tests.txt (original)
+++ trunk/libraries/regular-expressions/tests/regression-tests.txt Tue Dec 11 21:29:29 2007
@@ -20,6 +20,18 @@
b]b
No match
+/./
+ a
+ 0: a
+ \n
+No match
+
+/./s
+ a
+ 0: a
+ \n
+ 0: \n
+
/a()b/
/a()+b/
Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan (original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan Tue Dec 11 21:29:29 2007
@@ -1,3 +1,9 @@
Module: regular-expressions-test-suite-app
-run-test-application(regular-expressions-test-suite);
+define function main
+ () => ()
+ run-test-application(regular-expressions-test-suite);
+end;
+
+main();
+
Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan (original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan Tue Dec 11 21:29:29 2007
@@ -1,64 +1,81 @@
Module: regular-expressions-test-suite
Author: Carl Gay
-define function re/position (string, pattern, #rest args)
- let (#rest marks) = apply(regex-position, string, pattern, args);
+define function re/position (pattern, string, #rest args)
+ let (#rest marks) = apply(regex-position, pattern, string, args);
marks
end function re/position;
define test atom-test ()
- // In current code the empty string is an illegal regex, but Python
- // (and probably perl?) allow it, so I think we should consider that
- // a bug. --cgay
check-no-errors("atom-0", re/position("", ""));
check-equal("atom-1", re/position("a", "a"), #[0, 1]);
- check-equal("atom-2", re/position("a", "[a]"), #[0, 1]);
- check-equal("atom-3", re/position("ab", "(a)b"), #[0, 2, 0, 1]);
- check-equal("atom-4", re/position("a", "\\w"), #[0, 1]);
- check-equal("atom-5", re/position("a", "."), #[0, 1]);
- check-equal("atom-6", re/position("a", "a{0}"), #[0, 0]);
- check-equal("atom-7", re/position("aa", "a{2}"), #[0, 2]);
- check-equal("atom-8", re/position("aa", "a{1,}"), #[0, 2]);
- check-equal("atom-9", re/position("aaa", "a{1,8}"), #[0, 3]);
- check-equal("atom-A", re/position("", "a{,}"), #[0, 0]);
- check-equal("atom-A1", re/position("aaaaaa", "a{,}"), #[0, 6]);
- check-condition("atom-B", <invalid-regex>, re/position("", "a{m,n}"));
- check-condition("atom-C", <invalid-regex>, re/position("", "a{m,}"));
- check-condition("atom-D", <invalid-regex>, re/position("", "a{,n}"));
- check-condition("atom-E", <invalid-regex>, re/position("", "a{m}"));
- check-condition("atom-F", <invalid-regex>, re/position("", "a{,"));
- check-condition("atom-G", <invalid-regex>, re/position("", "[a"));
- check-condition("atom-H", <invalid-regex>, re/position("", "\\"));
- //check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
+ check-equal("atom-2", re/position("[a]", "a"), #[0, 1]);
+ check-equal("atom-3", re/position("(a)b", "ab"), #[0, 2, 0, 1]);
+ check-equal("atom-4", re/position("\\w", "a"), #[0, 1]);
+ check-equal("atom-5", re/position(".", "a"), #[0, 1]);
+ check-equal("atom-6", re/position("a{0}", "a"), #[0, 0]);
+ check-equal("atom-7", re/position("a{2}", "aa"), #[0, 2]);
+ check-equal("atom-8", re/position("a{1,}", "aa"), #[0, 2]);
+ check-equal("atom-9", re/position("a{1,8}", "aaa"), #[0, 3]);
+ check-equal("atom-A", re/position("a{,}", ""), #[0, 0]);
+ check-equal("atom-A1", re/position("a{,}", "aaaaaa"), #[0, 6]);
+ check-condition("atom-B", <invalid-regex>, re/position("a{m,n}", ""));
+ check-condition("atom-C", <invalid-regex>, re/position("a{m,}", ""));
+ check-condition("atom-D", <invalid-regex>, re/position("a{,n}", ""));
+ check-condition("atom-E", <invalid-regex>, re/position("a{m}", ""));
+ check-condition("atom-F", <invalid-regex>, re/position("a{,", ""));
+ check-condition("atom-G", <invalid-regex>, re/position("[a", ""));
+ check-condition("atom-H", <invalid-regex>, re/position("\\", ""));
+ check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
end;
-// These should all compile.
-//
-define test good-regex-test ()
- let patterns = #(
- "",
- "a()b",
- "a(?#blah)b"
- );
- for (pattern in patterns)
- check-no-errors(format-to-string("Regex '%s' compiles", pattern),
- compile-regex(pattern));
+define function check-matches
+ (pattern, input-string, #rest groups-and-flags) => ()
+ let string? = rcurry(instance?, <string>);
+ let groups = choose(string?, groups-and-flags);
+ let flags = choose(complement(string?), groups-and-flags);
+ let regex = apply(compile-regex, pattern, flags);
+ let match = regex-search(regex, input-string);
+ if (empty?(groups))
+ check-false(sprintf("Regex '%s' matches '%s'", pattern, input-string),
+ match);
+ else
+ for (group in groups, i from 0)
+ check-equal(sprintf("Regex '%s' group %d is '%s'", pattern, i, group),
+ group,
+ if (match)
+ match-group(match, i)
+ else
+ "{no such group}"
+ end);
+ end;
end;
-end test good-regex-test;
+end function check-matches;
+
+// These are to cover the basics, as I add new features to the code.
+// The PCRE tests should cover a lot of the more esoteric cases, I hope.
+//
+define test ad-hoc-regex-test ()
+ check-matches("", "abc", "");
+ check-matches("a()b", "ab", "ab", "");
+ check-matches("a(?#blah)b", "ab", "ab"); // comments shouldn't create a group
+ check-matches(".", "x", "x");
+ check-matches(".", "\n", "\n", dot-matches-all: #t);
+end test ad-hoc-regex-test;
// All these regexes should signal <invalid-regex> on compilation.
//
-define test bad-regex-test ()
+define test invalid-regex-test ()
let patterns = #(
"(?P<name>x)(?P<name>y)", // can't use same name twice
"(?@abc)" // invalid extended character '@'
);
for (pattern in patterns)
- check-condition(format-to-string("Compiling '%s' gets an error", pattern),
+ check-condition(sprintf("Compiling '%s' gets an error", pattern),
<invalid-regex>,
compile-regex(pattern));
end;
-end test bad-regex-test;
+end test invalid-regex-test;
define test pcre-testoutput1 ()
run-pcre-checks(make-pcre-locator("pcre-testoutput1.txt"));
@@ -74,11 +91,10 @@
define suite regular-expressions-test-suite ()
test atom-test;
- test good-regex-test;
- test bad-regex-test;
+ test ad-hoc-regex-test;
+ test invalid-regex-test;
test regressions-test;
- // For now leaving out the PCRE tests because they're too verbose.
- // Once more basic stuff is working will start including them again.
- // suite pcre-test-suite;
+ // It's sometimes useful to use -ignore-suite to skip this one because it's so noisy.
+ suite pcre-test-suite;
end;
More information about the chatter
mailing list