[Gd-chatter] r11504 - in trunk: fundev/sources/lib/string-extensions libraries/regular-expressions libraries/regular-expressions/tests
cgay at gwydiondylan.org
cgay at gwydiondylan.org
Tue Nov 27 06:35:47 CET 2007
Author: cgay
Date: Tue Nov 27 06:35:45 2007
New Revision: 11504
Modified:
trunk/fundev/sources/lib/string-extensions/string-hacking.dylan
trunk/libraries/regular-expressions/od-library.dylan
trunk/libraries/regular-expressions/parse.dylan
trunk/libraries/regular-expressions/regex.dylan
trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan
Log:
Job: 7357
Fixed [a-] and []...] character classes.
Improved parsing of PCRE test output file a bit:
Ran 2071 checks: 1856 passed (89.7%), 161 failed, 0 not executed, 54 crashed
Modified: trunk/fundev/sources/lib/string-extensions/string-hacking.dylan
==============================================================================
--- trunk/fundev/sources/lib/string-extensions/string-hacking.dylan (original)
+++ trunk/fundev/sources/lib/string-extensions/string-hacking.dylan Tue Nov 27 06:35:45 2007
@@ -269,19 +269,26 @@
define method parse-description (string :: <sequence>);
let s = make(<parse-string>, string: string);
let negated = (lookahead(s) == '^');
- if (negated) consume(s) end;
+ if (negated)
+ consume(s)
+ end;
let char-list = #();
let range-list = #();
-
until (lookahead(s) = #f) // until end of string
let char = lookahead(s);
consume(s);
if (lookahead(s) = '-')
consume(s);
let second-char = lookahead(s);
- consume(s);
- range-list := add!(range-list, pair(char, second-char));
+ if (second-char)
+ consume(s);
+ range-list := add!(range-list, pair(char, second-char));
+ else
+ // e.g., [a-] is the set containing 'a' and '-'.
+ char-list := add!(char-list, char);
+ char-list := add!(char-list, '-');
+ end;
elseif (char = '\\')
let escaped-char = lookahead(s);
consume(s);
Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan (original)
+++ trunk/libraries/regular-expressions/od-library.dylan Tue Nov 27 06:35:45 2007
@@ -56,8 +56,6 @@
group-start,
group-end,
group-text,
- groups-by-position,
- groups-by-name,
<invalid-match-group>;
end module regexp;
Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan (original)
+++ trunk/libraries/regular-expressions/parse.dylan Tue Nov 27 06:35:45 2007
@@ -316,7 +316,6 @@
// Insert more special characters here
otherwise =>
- let char = lookahead(s);
consume(s);
make(<parsed-character>, character: char);
end select;
@@ -343,30 +342,39 @@
define inline function parse-character-set
(s :: <parse-string>, info :: <parse-info>)
=> (set :: <parsed-set>)
- let set-string = make(<deque>); // Need something that'll
- // preserve the right ordering
+ let set-string = make(<deque>);
let start-index = s.parse-index;
- for (char = lookahead(s) then lookahead(s),
- until: char == ']')
- consume(s); // eat char
- if (~char)
- parse-error(s.parse-string,
- "Unterminated character set at index %d.",start-index);
- elseif (char ~== '\\')
- push-last(set-string, char);
- else
- let char2 = lookahead(s);
- consume(s); // Eat escaped char
- if (char2 == ']')
- push-last(set-string, ']');
+ local method peek ()
+ lookahead(s)
+ | parse-error(s.parse-string,
+ "Unterminated character set starting at at index %d.",
+ start-index);
+ end;
+ block (done)
+ for (char = peek() then peek(),
+ charset-index from 0)
+ consume(s);
+ if (char == ']')
+ if (charset-index == 0)
+ push-last(set-string, char); // e.g., []] is the set containing ']'.
+ else
+ done();
+ end;
+ elseif (char == '\\')
+ let char2 = peek();
+ consume(s); // Eat escaped char
+ if (char2 == ']')
+ push-last(set-string, ']');
+ else
+ push-last(set-string, '\\');
+ push-last(set-string, char2);
+ end if;
else
- push-last(set-string, '\\');
- push-last(set-string, char2);
+ push-last(set-string, char);
end if;
- end if;
- end for;
- consume(s); // Eat ending brace
- make(<parsed-set>, set: make(info.set-type, description: set-string));
+ end for;
+ end block;
+ make(<parsed-set>, set: make(info.set-type, description: set-string))
end function parse-character-set;
define constant any-char
Modified: trunk/libraries/regular-expressions/regex.dylan
==============================================================================
--- trunk/libraries/regular-expressions/regex.dylan (original)
+++ trunk/libraries/regular-expressions/regex.dylan Tue Nov 27 06:35:45 2007
@@ -2,8 +2,11 @@
Author: Carl Gay
Synopsis: A new API for the regular-expressions library
+// todo -- Improve error message for <invalid-match-group> errors.
+// Make %s and %= display the regex elided if it's too long.
+
+
-// Rename a few things...
define constant <invalid-regexp> = <illegal-regexp>;
@@ -191,7 +194,7 @@
else
let ng = match.groups-by-position.size;
signal(make(<invalid-match-group>,
- format-string: "Group index %d is out of bounds for regex %s match. %s",
+ format-string: "Group number %d is out of bounds for regex %s match. %s",
format-arguments: list(group-number,
match.regular-expression.regexp-pattern,
if (ng == 1)
Modified: trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan (original)
+++ trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan Tue Nov 27 06:35:45 2007
@@ -25,12 +25,12 @@
lines: lines,
start-line-number: line-number - lines.size));
lines := make(<stretchy-vector>);
- elseif (lines.size > 0) // a section never begins with a blank line
- test-output("Line %d: lines.size = %d, matches = %s, prev = %s\n",
- line-number,
- lines.size,
- regexp-search(lines[lines.size - 1], $group-regexp),
- lines[lines.size - 1]);
+ elseif (lines.size < 3)
+ // A section must have a regex, a test string and one group result to make
+ // any sense. Note we don't add the line to the section here.
+ test-output("Line %d: empty line in first 3 lines of a section.\n",
+ line-number);
+ else
add!(lines, line);
end;
else
@@ -56,6 +56,11 @@
end
end method consume-line;
+define method line-number
+ (section :: <section>) => (line-number :: <integer>)
+ section.start-line-number + section.%index
+end method line-number;
+
define method peek-line
(section :: <section>) => (line :: false-or(<string>))
if (section.%index < section.section-lines.size)
@@ -74,44 +79,48 @@
define function check-pcre-section
(section :: <section>)
let regexp = parse-pcre-regexp(section);
- while (peek-line(section))
- let test-string = consume-line(section);
- //test-output(" test string: %s\n", test-string);
- let group-strings = make(<stretchy-vector>);
- block (done-with-this-test-string)
- while (#t)
- let line = peek-line(section);
- let match = line & regexp-search(line, $group-regexp);
- if (match)
- consume-line(section);
- let group-text = regexp-match-group(match, $group-index-of-what-pcre-matched);
- //test-output(" pcre group: %s\n", group-text | "No match");
- if (group-text)
- add!(group-strings, group-text);
+ // If the section has fewer than 3 lines (a regex, a test string and at least
+ // one group result) then all we do is try to compile it (above).
+ if (section.section-lines.size >= 3)
+ while (peek-line(section))
+ let test-string = consume-line(section);
+ //test-output(" test string: %s\n", test-string);
+ let group-strings = make(<stretchy-vector>);
+ block (done-with-this-test-string)
+ while (#t)
+ let line = peek-line(section);
+ let match = line & regexp-search(line, $group-regexp);
+ if (match)
+ consume-line(section);
+ let group-text = regexp-match-group(match, $group-index-of-what-pcre-matched);
+ //test-output(" pcre group: %s\n", group-text | "No match");
+ if (group-text)
+ add!(group-strings, group-text);
+ else
+ assert(regexp-match-group(match, 0) = "No match",
+ "previous line was 'No match'");
+ done-with-this-test-string();
+ end;
else
- assert(regexp-match-group(match, 0) = "No match",
- "previous line was 'No match'");
done-with-this-test-string();
end;
- else
- done-with-this-test-string();
end;
end;
- end;
- if (regexp)
- check-no-errors(format-to-string("search for %s in %s",
- test-string, regexp.regexp-pattern),
- regexp-search(test-string, regexp));
- let match = block ()
- regexp-search(test-string, regexp)
- exception (ex :: <error>)
- #f
- end;
- if (match)
- compare-to-pcre-results(regexp.regexp-pattern, test-string, match, group-strings);
- end;
- end if;
- end while;
+ if (regexp)
+ check-no-errors(format-to-string("search for %s in %s",
+ test-string, regexp.regexp-pattern),
+ regexp-search(test-string, regexp));
+ let match = block ()
+ regexp-search(test-string, regexp)
+ exception (ex :: <error>)
+ #f
+ end;
+ if (match)
+ compare-to-pcre-results(regexp.regexp-pattern, test-string, match, group-strings);
+ end;
+ end if;
+ end while;
+ end if;
end function check-pcre-section;
define function parse-pcre-regexp
@@ -185,11 +194,16 @@
if (match)
check-equal(format-to-string("Match %s against %s -- same # of groups",
test-string, pattern),
- match.groups-by-position.size,
+ size(regexp-match-groups(match)),
pcre-groups.size);
for (group-number from 0,
pcre-group in pcre-groups)
- let our-group = regexp-match-group(match, group-number);
+ // Adding block/exception here causes an infinite loop.
+ // Could it be related to using the Visual Studio 8 linker?
+ // The if also causes an infinite loop. Hmmm.
+ let our-group = /* if (group-number < size(regexp-match-groups(match))) */
+ regexp-match-group(match, group-number)
+ /* end */;
check-equal(format-to-string("Match %s against %s -- group %d is the same",
test-string, pattern, group-number),
our-group,
@@ -202,7 +216,7 @@
pcre-groups.size);
end if;
end function compare-to-pcre-results;
-
+
define test pcre-test ()
let source-directory = environment-variable("OPEN_DYLAN_USER_SOURCES");
if (source-directory)
More information about the chatter
mailing list