[Gd-chatter] r11471 - in trunk/libraries/regular-expressions: . tests
cgay at gwydiondylan.org
cgay at gwydiondylan.org
Sat Oct 27 22:33:02 CEST 2007
Author: cgay
Date: Sat Oct 27 22:33:00 2007
New Revision: 11471
Added:
trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan (contents, props changed)
trunk/libraries/regular-expressions/tests/old-api-test-suite.dylan (contents, props changed)
Modified:
trunk/libraries/regular-expressions/interface.dylan
trunk/libraries/regular-expressions/match.dylan
trunk/libraries/regular-expressions/od-library.dylan
trunk/libraries/regular-expressions/parse.dylan
trunk/libraries/regular-expressions/regex.dylan
trunk/libraries/regular-expressions/tests/library.dylan
trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid
Log:
job: 7357
A little more progress on regular expressions library...
* Switched from "regex" to "regexp" nomenclature.
* Added a <regexp> class which is now the type returned by "parse" and
contains the original pattern plus the group count. The matcher wants
to use the latter.
* Fixed bugs in regexp-search.
Modified: trunk/libraries/regular-expressions/interface.dylan
==============================================================================
--- trunk/libraries/regular-expressions/interface.dylan (original)
+++ trunk/libraries/regular-expressions/interface.dylan Sat Oct 27 22:33:00 2007
@@ -106,8 +106,8 @@
=> (equal? :: <function>, hash :: <function>);
values(method (key1 :: <cache-key>, key2 :: <cache-key>) // equal?
=> res :: <boolean>;
- key1.regexp-string == key2.regexp-string
- & key1.character-set-type == key2.character-set-type;
+ key1.regexp-string = key2.regexp-string
+ & key1.character-set-type == key2.character-set-type;
end method,
method (key :: <cache-key>, initial-state) => (id :: <integer>, state); // hash()
let (string-id, string-state) = object-hash(key.regexp-string, initial-state);
Modified: trunk/libraries/regular-expressions/match.dylan
==============================================================================
--- trunk/libraries/regular-expressions/match.dylan (original)
+++ trunk/libraries/regular-expressions/match.dylan Sat Oct 27 22:33:00 2007
@@ -108,7 +108,7 @@
descend-re(re, target, case-sensitive?, index,
marks, fail, list(up-proc));
error("A regexp should either match or not match. Why did it "
- "reach this piece of code?");
+ "reach this piece of code?");
end block;
end for;
values(#f); // Failure
Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan (original)
+++ trunk/libraries/regular-expressions/od-library.dylan Sat Oct 27 22:33:00 2007
@@ -38,37 +38,38 @@
use common-dylan;
use string-extensions;
export
- regex, // new API
+ regexp, // new API
regular-expressions; // old API
end library regular-expressions;
-define module regex // new API module
+define module regexp // new API module
create
- compile-regex,
- regex-search,
- <regex>,
- <invalid-regex>,
- invalid-regex-pattern,
- <regex-match>, // results of a successful search
- regex-match-group,
- regex-match-group-count,
+ compile-regexp,
+ regexp-search,
+ <regexp>,
+ <invalid-regexp>,
+ invalid-regexp-pattern,
+ <regexp-match>, // results of a successful search
+ regexp-match-group,
+ regexp-match-groups,
group-start,
group-end,
group-text,
<invalid-match-group>;
-end module regex;
+end module regexp;
define module regular-expressions // old API module
create
regexp-position, make-regexp-positioner,
regexp-match,
regexp-replace, make-regexp-replacer,
+ regexp-group-count,
translate, make-translator,
split, make-splitter,
join,
<illegal-regexp>,
- regexp-pattern;
-
+ regexp-pattern,
+ <regexp-error>;
create
split-string;
end module regular-expressions;
@@ -82,6 +83,7 @@
use %do-replacement;
use %parse-string;
use substring-search;
- use regular-expressions; // API module
- use regex;
+ use regular-expressions; // old API module
+ use regexp; // new API module
end module regular-expressions-impl;
+
Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan (original)
+++ trunk/libraries/regular-expressions/parse.dylan Sat Oct 27 22:33:00 2007
@@ -57,6 +57,12 @@
constant slot group-number :: <integer>, required-init-keyword: #"group";
end class <mark>;
+// The root of the parsed regexp
+define class <regexp> (<mark>)
+ constant slot regexp-pattern :: <string>, required-init-keyword: #"pattern";
+ constant slot regexp-group-count :: <integer>, required-init-keyword: #"group-count";
+end class <regexp>;
+
define class <union> (<parsed-regexp>) // |
slot left :: <parsed-regexp>, required-init-keyword: #"left";
slot right :: <parsed-regexp>, required-init-keyword: #"right";
@@ -99,10 +105,10 @@
end class <parsed-backreference>;
// Note: I'm pretty sure <simple-error> won't work in GD. --cgay
-define class <regex-error> (<simple-error>)
-end class <regex-error>;
+define class <regexp-error> (<simple-error>)
+end class <regexp-error>;
-define class <illegal-regexp> (<regex-error>)
+define class <illegal-regexp> (<regexp-error>)
constant slot regexp-pattern :: <string>,
required-init-keyword: #"pattern";
end class <illegal-regexp>;
@@ -143,7 +149,7 @@
dot-matches-all :: <boolean> = #f)
=> (info :: <parse-info>)
local method nyi (option-name)
- signal(make(<regex-error>,
+ signal(make(<regexp-error>,
format-string: "The '%s' option is not yet implemented.",
format-arguments: list(option-name)));
end;
@@ -167,8 +173,12 @@
alternatives? :: <boolean>,
quantifiers? :: <boolean>)
let parse-string = make(<parse-string>, string: regexp);
- let parse-tree = make(<mark>, group: 0,
- child: parse-regexp(parse-string, parse-info));
+ let child = parse-regexp(parse-string, parse-info);
+ let parse-tree = make(<regexp>,
+ pattern: regexp,
+ group-count: parse-info.current-group-number + 1,
+ group: 0,
+ child: child);
let optimized-regexp = optimize(parse-tree);
if (optimized-regexp.pathological?)
parse-error(regexp, "A sub-regexp that matches the empty string was quantified.");
Modified: trunk/libraries/regular-expressions/regex.dylan
==============================================================================
--- trunk/libraries/regular-expressions/regex.dylan (original)
+++ trunk/libraries/regular-expressions/regex.dylan Sat Oct 27 22:33:00 2007
@@ -4,9 +4,8 @@
// Rename a few things...
-define constant <regex> = <parsed-regexp>;
-define constant <invalid-regex> = <illegal-regexp>;
-define constant invalid-regex-pattern = regexp-pattern;
+define constant <invalid-regexp> = <illegal-regexp>;
+define constant invalid-regexp-pattern = regexp-pattern;
// Compile the given string into an optimized regular expression.
@@ -25,118 +24,121 @@
// @param dot-matches-newline -- Normally '.' matches any character except for
// newline. If this parameter is true '.' matches newline as well.
//
-// This function signals <invalid-regex> if the regular expression is invalid.
+// This function signals <invalid-regexp> if the regular expression is invalid.
//
-define sealed generic compile-regex
+define sealed generic compile-regexp
(string :: <string>,
#key case-sensitive :: <boolean> = #t,
verbose :: <boolean> = #f,
multi-line :: <boolean> = #f,
dot-matches-all :: <boolean> = #f)
- => (regex :: <regex>);
+ => (regexp :: <regexp>);
-define method compile-regex
+define method compile-regexp
(string :: <string>,
#key case-sensitive :: <boolean> = #t,
verbose :: <boolean> = #f,
multi-line :: <boolean> = #f,
dot-matches-all :: <boolean> = #f)
- => (regex :: <regex>)
+ => (regexp :: <regexp>)
parse(string,
make-parse-info(case-sensitive: case-sensitive,
verbose: verbose,
multi-line: multi-line,
dot-matches-all: dot-matches-all))
-end;
+end method compile-regexp;
-// Returns a <regex-match> containing info about a successful match, or #f if
+// Returns a <regexp-match> containing info about a successful match, or #f if
// no match was found.
//
// @param big -- The string in which to search.
-// @param pattern -- The pattern to search for. If not a <regex>, it will be
-// compiled first with compile-regex (implying that <invalid-regex> may be
+// @param pattern -- The pattern to search for. If not a <regexp>, it will be
+// compiled first with compile-regexp (implying that <invalid-regexp> may be
// signalled), using the defaults for the keyword arguments. If you wish
-// to override them, call compile-regex directly.
+// to override them, call compile-regexp directly.
// @param anchored -- Whether or not the search should be anchored at the start
// position. This is useful because "^..." will only match at the beginning
-// of a string, or after \n if the regex was compiled with multi-line = #t.
+// of a string, or after \n if the regexp was compiled with multi-line = #t.
// @param start -- Where to begin the search.
// @param end -- Where to stop searching.
//
// todo -- Should $ anchor at the provided end position or at the end of the string?
//
-define sealed generic regex-search
+define sealed generic regexp-search
(big :: <string>, pattern :: <object>,
#key anchored :: <boolean> = #f,
start :: <integer> = 0,
end: _end :: <integer> = big.size)
- => (match :: false-or(<regex-match>));
+ => (match :: false-or(<regexp-match>));
-define method regex-search
+define method regexp-search
(big :: <string>, pattern :: <string>,
#key anchored :: <boolean> = #f,
start :: <integer> = 0,
end: _end :: <integer> = big.size)
- => (match :: false-or(<regex-match>))
- regex-search(big, compile-regex(pattern),
+ => (match :: false-or(<regexp-match>))
+ regexp-search(big, compile-regexp(pattern),
anchored: anchored, start: start, end: _end)
-end method regex-search;
+end method regexp-search;
-define method regex-search
- (big :: <string>, pattern :: <regex>,
+define method regexp-search
+ (big :: <string>, pattern :: <regexp>,
#key anchored :: <boolean> = #f,
start :: <integer> = 0,
end: _end :: <integer> = big.size)
- => (match :: false-or(<regex-match>))
+ => (match :: false-or(<regexp-match>))
// Copied from regexp-position with some mods to match our interface.
// Unlike regexp-position there is no caching. If you don't want to
- // recompile your regex each time, compile it explicitly with compile-regex
+ // recompile your regexp each time, compile it explicitly with compile-regexp
// and save it.
let substring = make(<substring>, string: big, start: start, end: _end);
let case-sensitive? = #t;
+ let num-groups = pattern.regexp-group-count;
let (matched?, marks)
= if (pattern.is-anchored?)
- anchored-match-root?(pattern, substring, case-sensitive?, last-group + 1, #f);
+ anchored-match-root?(pattern, substring, case-sensitive?, num-groups, #f);
else
let initial = pattern.initial-substring;
let searcher = ~initial.empty?
& make-substring-positioner(initial, case-sensitive: case-sensitive?);
- match-root?(pattern, substring, case-sensitive?, last-group + 1, searcher);
+ match-root?(pattern, substring, case-sensitive?, num-groups, searcher);
end if;
if (matched?)
- let regex-match = make(<regex-match>);
- for (index from 0 by 2)
+ let regexp-match = make(<regexp-match>);
+ for (index from 0 below marks.size by 2)
let bpos = marks[index];
let epos = marks[index + 1];
// It would be nice to make <substring> a real sequence, and possibly unify
// it with the substring implementation in Koala.
let text = copy-sequence(substring.entire-string,
start: substring.start-index + bpos,
- end: substring.end-index + epos);
- add-group(regex-match, make(<match-group>, text: text, start: bpos, end: epos));
+ end: substring.start-index + epos);
+ add-group(regexp-match, make(<match-group>, text: text, start: bpos, end: epos));
end;
- regex-match
+ regexp-match
else
#f
end
-end method regex-search;
+end method regexp-search;
// This has methods for group :: <string> and group :: <integer>.
// Group zero is always the entire match.
-define sealed generic regex-match-group
- (match :: <regex-match>, group :: <object>)
+define sealed generic regexp-match-group
+ (match :: <regexp-match>, group :: <object>)
=> (text :: false-or(<string>),
start-index :: false-or(<integer>),
end-index :: false-or(<integer>));
-// How many groups matched? There will always be at least one; the entire match.
-// (Maybe better to provide a way to iterate over the groups instead, but this
-// should be rarely used since you generally know what your groups are.)
+// Get the groups for the match. There will always be at least one; the entire match.
//
-define sealed generic regex-match-group-count
- (match :: <regex-match>) => (count :: <integer>);
+define sealed generic regexp-match-groups
+ (match :: <regexp-match>) => (groups :: <sequence>);
+define method regexp-match-groups
+ (match :: <regexp-match>) => (groups :: <sequence>)
+ map-as(<simple-object-vector>, identity, match.groups-by-position)
+end;
define sealed class <match-group> (<object>)
constant slot group-text :: <string>,
@@ -147,53 +149,52 @@
required-init-keyword: end:;
end class <match-group>;
-
-define sealed class <regex-match> (<object>)
+define sealed class <regexp-match> (<object>)
// Groups by position. Zero is the entire match.
- constant slot group-vector :: <stretchy-vector> = make(<stretchy-vector>);
- // Maps group names to positions.
- constant slot group-table :: <string-table> = make(<string-table>);
-end class <regex-match>;
+ constant slot groups-by-position :: <stretchy-vector> = make(<stretchy-vector>);
+ constant slot groups-by-name :: <string-table> = make(<string-table>);
+end class <regexp-match>;
define method add-group
- (match :: <regex-match>, group :: <match-group>,
+ (match :: <regexp-match>, group :: <match-group>,
#key name :: false-or(<string>))
- => (match :: <regex-match>)
- add!(match.group-vector, group);
+ => (match :: <regexp-match>)
+ add!(match.groups-by-position, group);
if (name)
- match.group-table[name] := group;
+ match.groups-by-name[name] := group;
end;
match
end;
-define sealed class <invalid-match-group> (<regex-error>)
+define sealed class <invalid-match-group> (<regexp-error>)
end class <invalid-match-group>;
-define method regex-match-group
- (match :: <regex-match>, group :: <integer>)
+define method regexp-match-group
+ (match :: <regexp-match>, group-number :: <integer>)
=> (text :: false-or(<string>),
start-index :: false-or(<integer>),
end-index :: false-or(<integer>))
- if (0 <= group < match.group-vector.size)
- match.group-vector[group]
+ if (0 <= group-number & group-number < match.groups-by-position.size)
+ let group = match.groups-by-position[group-number];
+ values(group.group-text, group.group-start, group.group-end)
else
signal(make(<invalid-match-group>,
format-string: "Match group index %d out of bounds. Max group index is %d.",
- format-arguments: list(group, match.group-vector.size - 1)));
+ format-arguments: list(group-number, match.groups-by-position.size - 1)));
end;
-end method regex-match-group;
+end method regexp-match-group;
-define method regex-match-group
- (match :: <regex-match>, group :: <string>)
+define method regexp-match-group
+ (match :: <regexp-match>, group :: <string>)
=> (text :: false-or(<string>),
start-index :: false-or(<integer>),
end-index :: false-or(<integer>))
- let index = element(match.group-table, group, default: #f);
+ let index :: <integer> = element(match.groups-by-name, group, default: #f);
if (index)
- regex-match-group(match, index)
+ regexp-match-group(match, index)
else
signal(make(<invalid-match-group>,
format-string: "There is no group named %=.",
format-arguments: list(group)));
end;
-end method regex-match-group;
+end method regexp-match-group;
Modified: trunk/libraries/regular-expressions/tests/library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/library.dylan (original)
+++ trunk/libraries/regular-expressions/tests/library.dylan Sat Oct 27 22:33:00 2007
@@ -3,18 +3,32 @@
define library regular-expressions-test-suite
use common-dylan;
use testworks;
- use regular-expressions;
-
+ use regular-expressions,
+ import: { regular-expressions, regexp };
export
regular-expressions-test-suite;
end;
-define module regular-expressions-test-suite
+define module old-api-test-suite
use common-dylan, exclude: { split };
use regular-expressions;
use testworks;
-
export
//pcre-test-suite,
- regular-expressions-test-suite;
+ old-api-test-suite;
+end;
+
+define module new-api-test-suite
+ use common-dylan, exclude: { split };
+ use regexp;
+ use testworks;
+ export
+ new-api-test-suite;
+end;
+
+define module regular-expressions-test-suite
+ use testworks;
+ use old-api-test-suite;
+ use new-api-test-suite;
+ export regular-expressions-test-suite;
end;
Added: trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/tests/new-api-test-suite.dylan Sat Oct 27 22:33:00 2007
@@ -0,0 +1,24 @@
+Module: new-api-test-suite
+Author: Carl Gay
+
+
+define test groups-test ()
+ let text = "My dog has fleas.";
+ let match = regexp-search(text, "My (dog)");
+
+ // The entire match is a group as well, so 2 groups.
+ check-equal("number of groups.", 2, regexp-match-groups(match).size);
+
+ let (txt, bpos, epos) = regexp-match-group(match, 1);
+ check-equal("group text", "dog", txt);
+ check-equal("group start", 3, bpos);
+ check-equal("group end", 6, epos);
+end test groups-test;
+
+define suite new-api-test-suite ()
+ test groups-test;
+// test and-test;
+// test or-test;
+// test anchoring-test;
+// test quantifier-test;
+end suite new-api-test-suite;
Added: trunk/libraries/regular-expressions/tests/old-api-test-suite.dylan
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/tests/old-api-test-suite.dylan Sat Oct 27 22:33:00 2007
@@ -0,0 +1,43 @@
+Module: old-api-test-suite
+Author: Carl Gay
+
+
+define function re/position (string, pattern, #rest args)
+ let (#rest marks) = apply(regexp-position, string, pattern, args);
+ marks
+end function re/position;
+
+define test atom-test ()
+ // In current code the empty string is an illegal regex, but Python
+ // (and probably perl?) allow it, so I think we should consider that
+ // a bug. --cgay
+ check-no-errors("atom-0", re/position("", ""));
+ check-equal("atom-1", re/position("a", "a"), #[0, 1]);
+ check-equal("atom-2", re/position("a", "[a]"), #[0, 1]);
+ check-equal("atom-3", re/position("ab", "(a)b"), #[0, 2, 0, 1]);
+ check-equal("atom-4", re/position("a", "\\w"), #[0, 1]);
+ check-equal("atom-5", re/position("a", "."), #[0, 1]);
+ check-equal("atom-6", re/position("a", "a{0}"), #[0, 0]);
+ check-equal("atom-7", re/position("aa", "a{2}"), #[0, 2]);
+ check-equal("atom-8", re/position("aa", "a{1,}"), #[0, 2]);
+ check-equal("atom-9", re/position("aaa", "a{1,8}"), #[0, 3]);
+ check-equal("atom-A", re/position("", "a{,}"), #[0, 0]);
+ check-equal("atom-A1", re/position("aaaaaa", "a{,}"), #[0, 6]);
+ check-condition("atom-B", <illegal-regexp>, re/position("", "a{m,n}"));
+ check-condition("atom-C", <illegal-regexp>, re/position("", "a{m,}"));
+ check-condition("atom-D", <illegal-regexp>, re/position("", "a{,n}"));
+ check-condition("atom-E", <illegal-regexp>, re/position("", "a{m}"));
+ check-condition("atom-F", <illegal-regexp>, re/position("", "a{,"));
+ check-condition("atom-G", <illegal-regexp>, re/position("", "[a"));
+ check-condition("atom-H", <illegal-regexp>, re/position("", "\\"));
+ //check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
+end;
+
+
+define suite old-api-test-suite ()
+ test atom-test;
+// test and-test;
+// test or-test;
+// test anchoring-test;
+// test quantifier-test;
+end;
Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan (original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan Sat Oct 27 22:33:00 2007
@@ -2,42 +2,7 @@
Author: Carl Gay
-define function re/position (string, pattern, #rest args)
- let (#rest marks) = apply(regexp-position, string, pattern, args);
- marks
-end function re/position;
-
-define test atom-test ()
- // In current code the empty string is an illegal regex, but Python
- // (and probably perl?) allow it, so I think we should consider that
- // a bug. --cgay
- check-no-errors("atom-0", re/position("", ""));
- check-equal("atom-1", re/position("a", "a"), #[0, 1]);
- check-equal("atom-2", re/position("a", "[a]"), #[0, 1]);
- check-equal("atom-3", re/position("ab", "(a)b"), #[0, 2, 0, 1]);
- check-equal("atom-4", re/position("a", "\\w"), #[0, 1]);
- check-equal("atom-5", re/position("a", "."), #[0, 1]);
- check-equal("atom-6", re/position("a", "a{0}"), #[0, 0]);
- check-equal("atom-7", re/position("aa", "a{2}"), #[0, 2]);
- check-equal("atom-8", re/position("aa", "a{1,}"), #[0, 2]);
- check-equal("atom-9", re/position("aaa", "a{1,8}"), #[0, 3]);
- check-equal("atom-A", re/position("", "a{,}"), #[0, 0]);
- check-equal("atom-A1", re/position("aaaaaa", "a{,}"), #[0, 6]);
- check-condition("atom-B", <illegal-regexp>, re/position("", "a{m,n}"));
- check-condition("atom-C", <illegal-regexp>, re/position("", "a{m,}"));
- check-condition("atom-D", <illegal-regexp>, re/position("", "a{,n}"));
- check-condition("atom-E", <illegal-regexp>, re/position("", "a{m}"));
- check-condition("atom-F", <illegal-regexp>, re/position("", "a{,"));
- check-condition("atom-G", <illegal-regexp>, re/position("", "[a"));
- check-condition("atom-H", <illegal-regexp>, re/position("", "\\"));
- //check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
-end;
-
-
define suite regular-expressions-test-suite ()
- test atom-test;
-// test and-test;
-// test or-test;
-// test anchoring-test;
-// test quantifier-test;
+ suite old-api-test-suite;
+ suite new-api-test-suite;
end;
Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid (original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid Sat Oct 27 22:33:00 2007
@@ -1,3 +1,5 @@
library: regular-expressions-test-suite
files: library
regular-expressions-test-suite
+ old-api-test-suite
+ new-api-test-suite
More information about the chatter
mailing list