[Gd-chatter] r11686 - in trunk/libraries/regular-expressions: . tests

cgay at gwydiondylan.org cgay at gwydiondylan.org
Tue Feb 19 00:54:49 CET 2008


Author: cgay
Date: Tue Feb 19 00:54:48 2008
New Revision: 11686

Added:
   trunk/libraries/regular-expressions/tests/api.dylan   (contents, props changed)
Removed:
   trunk/libraries/regular-expressions/regex.dylan
Modified:
   trunk/libraries/regular-expressions/gd-library.dylan
   trunk/libraries/regular-expressions/gd-regular-expressions.lid
   trunk/libraries/regular-expressions/interface.dylan
   trunk/libraries/regular-expressions/match.dylan
   trunk/libraries/regular-expressions/od-library.dylan
   trunk/libraries/regular-expressions/od-regular-expressions.lid
   trunk/libraries/regular-expressions/parse.dylan
   trunk/libraries/regular-expressions/tests/library.dylan
   trunk/libraries/regular-expressions/tests/pcre.dylan
   trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
   trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid
Log:
job: 7357
Cleanup interface and use new common-dylan split implementation.
Removed match-groups, which has been replaced by groups-by-position.

Modified: trunk/libraries/regular-expressions/gd-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/gd-library.dylan	(original)
+++ trunk/libraries/regular-expressions/gd-library.dylan	Tue Feb 19 00:54:48 2008
@@ -31,59 +31,53 @@
 //
 //======================================================================
 
-
-// Added regex module with new API.  --cgay, June 2007
-
 define library regular-expressions
   use common-dylan;
   use string-extensions;
   use table-extensions;
   export
-    regex,                                             // new API
-    regular-expressions;                               // old API
+    regular-expressions,
+    regex-implementation;
 end library regular-expressions;
 
-define module regex                  // new API module
+define module regular-expressions
   create
-    compile-regex,
-    regex-search,
-    regex-search-strings,
     <regex>,
+    <regex-match>,
+    <match-group>,
+    <regex-error>,
     <invalid-regex>,
-      invalid-regex-pattern,
-    <regex-match>,                   // results of a successful search
-      regex-match-group,
-      regex-match-group-count,
-      group-start,
-      group-end,
-      group-text,
-      <invalid-match-group>;
-end module regex;
+    <invalid-match-group>,
 
-define module regular-expressions    // old API module
-  create
-    regexp-position, make-regexp-positioner,
-    regexp-match,
-    regexp-replace, make-regexp-replacer,
-    translate, make-translator,
-    split, make-splitter,
-    join,
-    <illegal-regexp>,
-      regexp-pattern;
+    // Compiling and accessing regex info
+    compile-regex,
+    regex-group-count,
+    regex-pattern,
 
-  create
-    split-string;
+    // Search and replace
+    regex-search,
+    regex-search-strings,
+    regex-position,
+    regex-replace,
+
+    // Accessing match groups and individual group info
+    match-group,
+    match-groups,
+    group-text,
+    group-start,
+    group-end;
 end module regular-expressions;
 
-define module regular-expressions-impl
-  use common-dylan,
-    exclude: { split };
+define module regex-implementation
+  use common-dylan;
   use string-conversions;
   use character-type;
   use string-hacking;
   use %do-replacement;
   use %parse-string;
   use substring-search;
-  use regular-expressions;                             // API module
-  use regex;
-end module regular-expressions-impl;
+  use regular-expressions,
+    export: all;
+  export
+    <mark>;
+end module regex-implementation;

Modified: trunk/libraries/regular-expressions/gd-regular-expressions.lid
==============================================================================
--- trunk/libraries/regular-expressions/gd-regular-expressions.lid	(original)
+++ trunk/libraries/regular-expressions/gd-regular-expressions.lid	Tue Feb 19 00:54:48 2008
@@ -4,4 +4,4 @@
        match
        parse
        interface
-       regex
+

Modified: trunk/libraries/regular-expressions/interface.dylan
==============================================================================
--- trunk/libraries/regular-expressions/interface.dylan	(original)
+++ trunk/libraries/regular-expressions/interface.dylan	Tue Feb 19 00:54:48 2008
@@ -1,14 +1,13 @@
-module:   regular-expressions
+module:   regex-implementation
 author:   Nick Kramer (nkramer at cs.cmu.edu)
-synopsis: This provides a useable interface for users. Functions 
-	  defined outside this file are really too strange and quirky 
-          to be of use to people.
+          Carl Gay (changed everything except regex-position)
+synopsis: The regular-expressions API, insofar as it can be separated into one file.
 copyright: see below
 
 //======================================================================
 //
 // Copyright (c) 1994  Carnegie Mellon University
-// Copyright (c) 1998, 1999, 2000  Gwydion Dylan Maintainers
+// Copyright (c) 1998-2008  Gwydion Dylan Maintainers
 // All rights reserved.
 // 
 // Use and copying of this software and preparation of derivative
@@ -32,302 +31,447 @@
 //
 //======================================================================
 
-// There are quite a few make-fooer functions hanging around.  Now
-// that regex-position does caching, these are basically useless, but
-// we've kept them around for backwards compatibility.  Unfortunately,
-// internally most of the functions are implemented in terms of
-// make-regex-positioner.  To minimize the amount of rewriting, I've
-// liberally applied seals and inline declarations so that
-// make-regex-positioner won't clobber all type information.  The
-// downside, of course, is that everything's sealed, but hey, no one
-// ever subclassed regex-position anyway.
+//// Caching
 
-
-// Caching
-//
 // Parsing a regex is not cheap, so we cache the parsed regexs and
 // only parse a string if we haven't seen it before.  Because in
 // practice almost all regex strings are string literals, we're free
 // to choose == or = depending on whatever's fastest.  However,
-// because a string is parsed differently depending on whether the
-// search is case sensitive or not, we also have to keep track of that
-// information as well.  (The case dependent parse boils down to the
-// parse creating a <character-set>, which must be either case
-// sensitive or case insensitive)
-//
-
-// This caching scheme fails if we later introduce the ability to change
-// attributes such as case-sensitivity mid-parse, the way (I believe) perl
-// does?  --cgay
-
-// ### Currently, only regex-position uses this cache, because the
-// other functions are still using make-regex-positioner.  With
-// caching, that make-regex-whatever stuff should probably go.
-
-// <cache-key> -- internal
-//
-// What we use for keys in the *regex-cache*.
-//
-define class <cache-key> (<object>)
-  constant slot regex-string :: <string>, 
-    required-init-keyword: #"regex-string";
-  constant slot character-set-type :: <class>, 
-    required-init-keyword: #"character-set-type";
-end class <cache-key>;
-
-// <cache-element> -- internal
-//
-// What we use for elements in a *regex-cache*
+// because a string is parsed differently depending on the arguments
+// passed to compile-regex, we also have to keep track of that
+// information as well.
 //
-define class <cache-element> (<object>)
-  constant slot parse-tree :: <parsed-regex>,
-    required-init-keyword: #"parse-tree";
-  constant slot last-group :: <integer>,
-    required-init-keyword: #"last-group";
-end class <cache-element>;
 
-// <regex-cache> -- internal
-//
-// Maps <cache-key> to <cache-element>.  ### Ideally, we'd be using
-// weak pointers to these strings.  In practice, however, most of the
-// regex strings are literals, so this isn't usually a drawback.
-//
-// This used to compare strings with == rather than =, but this leaks
-// lots of memory
-// 
 define class <regex-cache> (<table>) end;
 
-// table-protocol{<regex-cache>} -- method on imported G.F.
-//
-define method table-protocol (table :: <regex-cache>) 
- => (equal? :: <function>, hash :: <function>);
-  values(method (key1 :: <cache-key>, key2 :: <cache-key>) // equal?
-	  => res :: <boolean>;
-           key1.regex-string = key2.regex-string
-             & key1.character-set-type == key2.character-set-type;
-	 end method,
-	 method (key :: <cache-key>, initial-state) => (id :: <integer>, state); // hash()
-	   let (string-id, string-state) = object-hash(key.regex-string, initial-state);
-	   let (set-type-id, set-type-state) 
-	     = object-hash(key.character-set-type, string-state);
-	   values(merge-hash-ids(string-id, set-type-id, ordered: #t), set-type-state);
-	 end method);
+define method table-protocol
+    (table :: <regex-cache>)
+ => (equal? :: <function>, hash :: <function>)
+ local method hash (key :: <list>, initial-state)
+                => (id :: <integer>, state)
+         let (id, state) = string-hash(head(key), initial-state);
+         for (boolean in tail(key))
+           let (next-id, next-state) = object-hash(boolean, state);
+           id := merge-hash-ids(id, next-id, ordered: #t);
+           state := next-state;
+         end;
+         values(id, state)
+       end;
+  values(\=, hash)
 end method table-protocol;
 
-// *regex-cache* -- internal
-//
-// The only instance of <regex-cache>.  ### Not threadsafe.
-// 
 // Technically not thread safe, but does it matter?  Worst case seems to
 // be a duplicated regex parse.  --cgay
 //
 define constant *regex-cache* = make(<regex-cache>);
 
-// parse-or-use-cached -- internal
+// Compile the given string into an optimized regular expression.
 //
-// Tries to use the cached version of the regex, and if not possible,
-// parses it and adds it to the cache.
-//
-define inline function parse-or-use-cached 
-    (regex :: <string>, parse-info :: <parse-info>)
- => (parsed-regex :: <parsed-regex>, last-group :: <integer>);
-  let key = make(<cache-key>, regex-string: regex, 
-		 character-set-type: parse-info.character-set-type); 
-  let cached-value = element(*regex-cache*, key, default: #f);
-  if (cached-value)
-    values(cached-value.parse-tree, cached-value.last-group);
+// @param case-sensitive -- Whether to be case sensitive when matching character
+//   sets (e.g., [a-z]).  This does not affect other character/string matching yet.
+//   TODO -- but it should
+//
+// @param verbose -- If true, allows you to write regular expressions that
+//   are easier to read by including whitespace and comments in them that
+//   will be ignored.
+//
+// @param multi-line -- If true, '^' matches at the beginning of the string and
+//   at the beginning of each line (immediately following each newline); and '$'
+//   matches at the end of the string and at the end of each line (immediately
+//   preceding each newline). By default, "^" matches only at the beginning of
+//   the string, and "$" only at the end of the string.
+//
+// @param dot-matches-all -- Normally '.' matches any character except for
+//   newline.  If this parameter is true '.' matches newline as well.
+//
+// @param use-cache -- If true then check for a regex in the cache matching
+//   the given set of arguments.  If not found in the cache, compile it and
+//   then add it to the cache (and return it).
+//
+// This function signals <invalid-regex> if the regular expression is invalid.
+//
+define sealed generic compile-regex
+    (pattern :: <string>,
+     #key case-sensitive :: <boolean> = #t,
+          verbose :: <boolean> = #f,
+          multi-line :: <boolean> = #f,
+          dot-matches-all :: <boolean> = #f,
+          use-cache :: <boolean> = #t)
+ => (regex :: <regex>);
+
+define method compile-regex
+    (pattern :: <string>,
+     #key case-sensitive  :: <boolean> = #t,
+          verbose         :: <boolean> = #f,
+          multi-line      :: <boolean> = #f,
+          dot-matches-all :: <boolean> = #f,
+          use-cache       :: <boolean> = #t)
+ => (regex :: <regex>)
+  if (use-cache)
+    let cache-key = list(pattern, case-sensitive, verbose, multi-line,
+                         dot-matches-all);
+    element(*regex-cache*, cache-key, default: #f)
+    | begin
+        *regex-cache*[cache-key]
+          := compile-regex(pattern,
+                           case-sensitive: case-sensitive,
+                           verbose: verbose,
+                           dot-matches-all: dot-matches-all,
+                           use-cache: #f);
+       end
   else
-    let (parsed-regex, last-group) = parse(regex, parse-info);
-    *regex-cache*[key] := make(<cache-element>,
-                                parse-tree: parsed-regex,
-				last-group: last-group);
-    values(parsed-regex, last-group);
-  end if;
-end function parse-or-use-cached;
-
-
-// regex positioner stuff
+    parse(pattern,
+          make-parse-info(case-sensitive: case-sensitive,
+                          verbose: verbose,
+                          multi-line: multi-line,
+                          dot-matches-all: dot-matches-all))
+  end
+end method compile-regex;
 
 // Find the position of a regular expression inside a string.  If the
 // regex is not found, return #f, otherwise return a variable number
-// of marks.
-//
-define function regex-position
-    (regex :: <string>, big :: <string>, #key start: big-start = 0,
-     end: big-end = #f, case-sensitive = #f)
- => (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>));
-  let substring = make(<substring>, string: big, start: big-start,
-		       end: big-end | big.size);
-  let (parsed-regex, last-group) 
-    = parse-or-use-cached(regex, make-parse-info(case-sensitive: case-sensitive));
-
+// of marks.  This is a low-level API, returning indices marking the
+// start and end of groups.  Use regex-search if you want to get a
+// <regex-match> object back.
+//
+define generic regex-position
+    (regex :: <object>, big :: <string>,
+     #key start :: <integer>,
+          end: epos :: <integer>,
+          case-sensitive :: <boolean>)
+ => (regex-start :: false-or(<integer>), #rest marks);
+
+define method regex-position
+    (pattern :: <string>, string :: <string>,
+     #key start :: <integer> = 0,
+          end: epos :: <integer> = string.size,
+          case-sensitive :: <boolean> = #t)
+ => (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>))
+  regex-position(compile-regex(pattern), string, start: start, end: epos,
+                 case-sensitive: case-sensitive)
+end method regex-position;
+
+define method regex-position
+    (regex :: <regex>, string :: <string>,
+     #key start :: <integer> = 0,
+          end: epos :: <integer> = string.size,
+          case-sensitive :: <boolean> = #t)
+ => (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>))
+  let substring = make(<substring>, string: string, start: start, end: epos);
   let (matched, marks)
-    = if (parsed-regex.is-anchored?)
-	anchored-match-root?(parsed-regex, substring, case-sensitive,
-			     last-group + 1, #f);
+    = if (regex.is-anchored?)
+        let searcher = #f;
+	anchored-match-root?(regex, substring, case-sensitive,
+			     regex.regex-group-count, searcher);
       else
-	let initial = parsed-regex.initial-substring;
+	let initial = regex.initial-substring;
 	let searcher = ~initial.empty?
 	  & make-substring-positioner(initial, case-sensitive: case-sensitive);
-	match-root?(parsed-regex, substring, case-sensitive, last-group + 1,
+	match-root?(regex, substring, case-sensitive, regex.regex-group-count,
 		    searcher);
       end if;
   if (matched)  
-    apply(values, marks);
+    apply(values, marks)
   else
-    #f  
-  end if;
-end function regex-position;
-
-// Once upon a time, this was how you interfaced to the NFA stuff
-// (maximum-compile: #t).  That's gone.  Now it's just here for
-// backwards compatibility.  All keywords except case-sensitive are
-// now ignored.
+    #f
+  end
+end method regex-position;
+
+// Deprecated.  Use curry(regex-position, regex) or a local method instead.
 //
 define inline function make-regex-positioner
-    (regex :: <string>, 
-     #key byte-characters-only = #f, need-marks = #t, maximum-compile = #f,
-     case-sensitive = #f)
- => regex-positioner :: <function>;
-  method (big :: <string>, #key start: big-start = 0,
-	  end: big-end = #f)
+    (regex :: type-union(<string>, <regex>), 
+     #key case-sensitive :: <boolean> = #t)
+ => (regex-positioner :: <function>)
+  method (string :: <string>,
+          #key start :: <integer> = 0,
+               end: epos :: <integer> = string.size)
    => (regex-start :: false-or(<integer>), 
-       #rest marks :: false-or(<integer>));
-    regex-position(regex, big, case-sensitive: case-sensitive, 
-		    start: big-start, end: big-end);
+       #rest marks :: false-or(<integer>))
+    regex-position(regex, string,
+                   case-sensitive: case-sensitive, 
+                   start: start,
+                   end: epos);
   end method;
 end function make-regex-positioner;
 
-
-// Functions based on regex-position
-
-define function regex-replace
-    (input :: <string>, regex :: <string>, new-substring :: <string>,
-     #key count = #f, case-sensitive = #f, start = 0, end: input-end = #f)
- => changed-string :: <string>;
+define generic regex-replace
+    (regex :: <object>, big :: <string>, new-substring :: <string>,
+     #key start :: <integer>,
+          end: epos :: <integer>,
+          count :: false-or(<integer>),
+          case-sensitive :: <boolean>)
+ => (new-string :: <string>);
+
+define method regex-replace
+    (regex :: <string>, big :: <string>, new-substring :: <string>,
+     #key count :: false-or(<integer>),
+          start :: <integer> = 0,
+          end: epos :: <integer> = big.size,
+          case-sensitive :: <boolean> = #t)
+ => (new-string :: <string>)
+  regex-replace(compile-regex(regex), big, new-substring,
+                start: start,
+                end: epos,
+                count: count,
+                case-sensitive: case-sensitive)
+end method regex-replace;
+
+define method regex-replace
+    (regex :: <regex>, big :: <string>, new-substring :: <string>,
+     #key count :: false-or(<integer>),
+          start :: <integer> = 0,
+          end: epos :: <integer> = big.size,
+          case-sensitive :: <boolean> = #t)
+ => (new-string :: <string>)
   let positioner
     = make-regex-positioner(regex, case-sensitive: case-sensitive);
-  do-replacement(positioner, new-substring, input, start, 
-		 input-end, count, #t);
-end function regex-replace;
-
-define inline function make-regex-replacer 
-    (regex :: <string>, #key replace-with, case-sensitive = #f)
- => replacer :: <function>;
-  let positioner
-    = make-regex-positioner(regex, case-sensitive: case-sensitive);
-  if (replace-with)
-    method (input :: <string>, #key count: count, 
-	    start = 0, end: input-end = #f)
-     => string :: <string>;
-      do-replacement(positioner, replace-with, input, start, 
-		     input-end, count, #t);
-    end method;
-  else
-    method (input :: <string>, new-substring :: <string>, 
-	    #key count = #f, start = 0, end: input-end = #f)
-     => string :: <string>;
-      do-replacement(positioner, new-substring, input, 
-		     start, input-end, count, #t);
-    end method;
-  end if;
-end function make-regex-replacer;
-
-// Like Perl's split function
-//
-define function split
-    (input :: <string>, pattern :: <string>, 
-     #key count = #f, remove-empty-items = #t, start = 0, end: input-end = #f)
- => (strings :: <sequence>);
-  let positioner = make-regex-positioner(pattern);
-  split-string(positioner, input, start, input-end | size(input),
-	       count, remove-empty-items);
-end function split;
-
-define inline function make-splitter
-    (pattern :: <string>) => splitter :: <function>;
-  let positioner = make-regex-positioner(pattern);
-  method (string :: <string>, #key count = #f,
-	  remove-empty-items = #t, start = 0, end: input-end = #f)
-   => (#rest whole-bunch-of-strings :: <string>);
-    split-string(positioner, string, start, input-end | size(string), 
-		 count, remove-empty-items);
-  end method;
-end function make-splitter;
+  do-replacement(positioner, new-substring, big, start,
+		 epos, count, #t);
+end method regex-replace;
+
+// todo -- Improve error message for <invalid-match-group> errors.
+//         Make %s and %= display the regex elided if it's too long.
 
-// Used by split.  Not exported.  (Yes it is.  --cgay)
-//
-define function split-string
-    (positioner :: <function>, input :: <string>, start :: <integer>, 
-     input-end :: <integer>, count :: false-or(<integer>), 
-     remove-empty-items :: <object>)
- => (strings :: <sequence>);
-  let strings = make(<deque>);
-  block (done)
-    let end-of-last-match = 0;
-    let start-of-where-to-look = start;
-    let string-number = 1;    // Since count: starts at 1, so 
-                              // should string-number
-    while (#t)
-      let (substring-start, substring-end)
-	= positioner(input, start: start-of-where-to-look, end: input-end);
-      if (~substring-start | (count & (count <= string-number)))
-	push-last(strings, copy-sequence(input, start: end-of-last-match));
-	done(); 
-      elseif ((substring-start = start-of-where-to-look)
-		&  remove-empty-items)
-	      // delimited item is empty
-	end-of-last-match := substring-end;
-	start-of-where-to-look := end-of-last-match;
+
+
+// Returns a <regex-match> containing info about a successful match, or #f if
+// no match was found.
+//
+// @param big -- The string in which to search.
+// @param pattern -- The pattern to search for.  If not a <regex>, it will be
+//   compiled first with compile-regex (implying that <invalid-regex> may be
+//   signalled), using the defaults for the keyword arguments.  If you wish
+//   to override them, call compile-regex directly.
+// @param anchored -- Whether or not the search should be anchored at the start
+//   position.  This is useful because "^..." will only match at the beginning
+//   of a string, or after \n if the regex was compiled with multi-line = #t.
+// @param start -- Where to begin the search.
+// @param end -- Where to stop searching.
+// @param case-sensitive -- Whether to be case-sensitive while matching.  Default
+//   is #t.  (I don't believe this affects character set (e.g., [a-z]) matching.
+//   Check it.)
+//
+// todo -- Should $ anchor at the provided end position or at the end of the string?
+//
+define sealed generic regex-search
+    (pattern :: <object>, string :: <string>,
+     #key anchored :: <boolean>,
+          start :: <integer>,
+          end: epos :: <integer>,
+          case-sensitive :: <boolean>)
+ => (match :: false-or(<regex-match>));
+
+define method regex-search
+    (pattern :: <string>, string :: <string>,
+     #key anchored  :: <boolean> = #f,
+          start     :: <integer> = 0,
+          end: epos :: <integer> = string.size,
+          case-sensitive :: <boolean> = #t)
+ => (match :: false-or(<regex-match>))
+  regex-search(compile-regex(pattern), string,
+               anchored: anchored,
+               start: start,
+               end: epos,
+               case-sensitive: case-sensitive)
+end method regex-search;
+
+define method regex-search
+    (pattern :: <regex>, string :: <string>,
+     #key anchored :: <boolean> = #f,
+          start    :: <integer> = 0,
+          end: epos :: <integer> = string.size,
+          case-sensitive :: <boolean> = #t)
+ => (match :: false-or(<regex-match>))
+  let substring = make(<substring>, string: string, start: start, end: epos);
+  let num-groups = pattern.regex-group-count;
+  let (matched?, marks)
+    = if (pattern.is-anchored?)
+        anchored-match-root?(pattern, substring, case-sensitive, num-groups, #f);
       else
-	let new-string = copy-sequence(input, start: end-of-last-match, 
-				       end: substring-start);
-	if (~new-string.empty? | ~remove-empty-items)
-	  push-last(strings, new-string);
-	  string-number := string-number + 1;
-	  end-of-last-match := substring-end;
-	  start-of-where-to-look := end-of-last-match;
-	end if;
+        let initial = pattern.initial-substring;
+        let searcher = ~initial.empty?
+          & make-substring-positioner(initial, case-sensitive: case-sensitive);
+        match-root?(pattern, substring, case-sensitive, num-groups, searcher);
       end if;
-    end while;
-  end block;
-  if (remove-empty-items)
-    remove!(strings, #f, test: method (a, b) a.empty? end);
+  if (matched?)
+    let regex-match = make(<regex-match>, regular-expression: pattern);
+    let group-number-to-name :: <table> = pattern.group-number-to-name;
+    for (index from 0 below marks.size by 2)
+      let group-number = floor/(index, 2);
+      let group-name = element(group-number-to-name, group-number, default: #f);
+      let bpos = marks[index];
+      let epos = marks[index + 1];
+      if (bpos & epos)
+        add-group(regex-match,
+                  make(<match-group>,
+                       text: copy-sequence(string, start: bpos, end: epos),
+                       start: bpos,
+                       end: epos),
+                  group-name);
+      else
+        // This group wasn't matched.
+        add-group(regex-match, #f, group-name);
+      end;
+    end;
+    regex-match
   else
-    strings
-  end if;
-end function split-string;
-
-// join--like Perl's join
-//
-// This is not really any more efficient than concatenate-as, but it's
-// more convenient.
-//
-define function join (delimiter :: <byte-string>, #rest strings)
- => big-string :: <byte-string>;
-  let length = max(0, (strings.size - 1 ) * delimiter.size);
-  for (string in strings)
-    length := length + string.size;
-  end for;
-  let big-string = make(<byte-string>, size: length);
-  let big-index = 0;
-  for (i from 0 to strings.size - 2)  // Don't iterate over the last string
-    let string = strings[i];
-    let new-index = big-index + string.size;
-    big-string := replace-subsequence!(big-string, string, 
-				       start: big-index, end: new-index);
-    big-index := new-index;
-    let new-index = big-index + delimiter.size;
-    big-string := replace-subsequence!(big-string, delimiter, 
-				       start: big-index, end: new-index);
-    big-index := new-index;
-  end for;
-  if (strings.size > 0)
-    big-string 
-      := replace-subsequence!(big-string, strings.last, 
-			      start: big-index, end: big-string.size);
-  end if;
-  big-string;
-end function join;
-
+    #f
+  end
+end method regex-search;
+
+// Like regex-search, but returns a string or #f for each group in the regular
+// expression, instead of a <regex-match>.
+define sealed generic regex-search-strings
+    (pattern :: <object>, string :: <string>,
+     #key anchored :: <boolean>,
+          start :: <integer>,
+          end: epos :: <integer>,
+          case-sensitive :: <boolean>)
+ => (#rest strings);
+
+define method regex-search-strings
+    (pattern :: <string>, string :: <string>,
+     #key anchored  :: <boolean> = #f,
+          start     :: <integer> = 0,
+          end: epos :: <integer> = string.size,
+          case-sensitive :: <boolean> = #t)
+ => (#rest strings)
+  regex-search-strings(compile-regex(pattern), string,
+		       anchored: anchored,
+                       start: start,
+                       end: epos,
+                       case-sensitive: case-sensitive)
+end method regex-search-strings;
+
+define method regex-search-strings
+    (pattern :: <regex>, string :: <string>,
+     #key anchored :: <boolean> = #f,
+          start    :: <integer> = 0,
+          end: epos :: <integer> = string.size,
+          case-sensitive :: <boolean> = #t)
+ => (#rest strings)
+  let match = regex-search(pattern, string,
+			   anchored: anchored,
+                           start: start,
+                           end: epos,
+                           case-sensitive: case-sensitive);
+  if (match)
+    apply(values, map(method (group) group & group.group-text end,
+                      match.groups-by-position))
+  else
+    #f
+  end
+end method regex-search-strings;
+
+define sealed class <match-group> (<object>)
+  constant slot group-text :: <string>,
+    required-init-keyword: text:;
+  constant slot group-start :: <integer>,
+    required-init-keyword: start:;
+  constant slot group-end :: <integer>,
+    required-init-keyword: end:;
+end class <match-group>;
+
+define sealed class <regex-match> (<object>)
+  // Groups by position.  Zero is the entire match.
+  constant slot groups-by-position :: <stretchy-vector> = make(<stretchy-vector>);
+  // Named groups, if any.  Initial size 0 on the assumption that most regular
+  // expressions won't use named groups.
+  constant slot groups-by-name :: <string-table> = make(<string-table>, size: 0);
+  constant slot regular-expression :: <regex>, required-init-keyword: regular-expression:;
+end class <regex-match>;
+
+define method add-group
+    (match :: <regex-match>,
+     group :: false-or(<match-group>),
+     name :: false-or(<string>))
+ => (match :: <regex-match>)
+  add!(match.groups-by-position, group);
+  if (name)
+    match.groups-by-name[name] := group;
+  end;
+  match
+end;
+
+define sealed class <invalid-match-group> (<regex-error>)
+end class <invalid-match-group>;
+
+// This has methods for group :: <string> and group :: <integer>.
+// Group zero is always the entire match.
+//
+define sealed generic match-group
+    (match :: <regex-match>, group :: <object>)
+ => (text :: false-or(<string>),
+     start-index :: false-or(<integer>),
+     end-index :: false-or(<integer>));
+
+define method match-group
+    (match :: <regex-match>, group-number :: <integer>)
+ => (text :: false-or(<string>),
+     start-index :: false-or(<integer>),
+     end-index :: false-or(<integer>))
+  if (0 <= group-number & group-number < match.groups-by-position.size)
+    let group = match.groups-by-position[group-number];
+    if (group)
+      values(group.group-text, group.group-start, group.group-end)
+    else
+      values(#f, #f, #f)
+    end
+  else
+    let ng = match.groups-by-position.size;
+    signal(make(<invalid-match-group>,
+                format-string: "Group number %d is out of bounds for regex %s match.  %s",
+                format-arguments: list(group-number,
+                                       match.regular-expression.regex-pattern,
+                                       if (ng == 1)
+                                         "There is only 1 group."
+                                       else
+                                         format-to-string("There are %d groups.", ng)
+                                       end)));
+  end;
+end method match-group;
+
+define method match-group
+    (match :: <regex-match>, group :: <string>)
+ => (text :: false-or(<string>),
+     start-index :: false-or(<integer>),
+     end-index :: false-or(<integer>))
+  let group = element(match.groups-by-name, group, default: #f);
+  if (group)
+    values(group.group-text, group.group-start, group.group-end)
+  else
+    signal(make(<invalid-match-group>,
+                format-string: "There is no group named %=.",
+                format-arguments: list(group)));
+  end
+end method match-group;
+
+
+//// Utilities
+
+// The split method is exported from the common-dylan module.
+//
+define method split
+    (string :: <string>, separator :: <regex>,
+     #key start :: <integer> = 0,
+          end: epos :: <integer> = string.size,
+          count :: <integer> = epos + 1,
+          case-sensitive :: <boolean> = #t,
+          remove-if-empty :: <boolean> = #f)
+ => (parts :: <sequence>)
+  local method find-regex (string :: <string>,
+                           bpos :: <integer>,
+                           epos :: false-or(<integer>))
+          let match = regex-search(separator, string, start: bpos, end: epos);
+          if (match)
+            let (ignore, match-start, match-end) = match-group(match, 0);
+            values(match-start, match-end)
+          else
+            #f
+          end
+        end method find-regex;
+  split(string, find-regex, start: start, end: epos, count: count,
+        remove-if-empty: remove-if-empty)
+end method split;
 

Modified: trunk/libraries/regular-expressions/match.dylan
==============================================================================
--- trunk/libraries/regular-expressions/match.dylan	(original)
+++ trunk/libraries/regular-expressions/match.dylan	Tue Feb 19 00:54:48 2008
@@ -1,4 +1,4 @@
-module:   regular-expressions
+module:   regex-implementation
 author:   Nick Kramer (nkramer at cs.cmu.edu)
 synopsis: This takes a parsed regular expression and tries to find a match
           for it.

Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan	(original)
+++ trunk/libraries/regular-expressions/od-library.dylan	Tue Feb 19 00:54:48 2008
@@ -31,57 +31,58 @@
 //
 //======================================================================
 
-// Revamped.  --cgay Dec 2007
-
 define library regular-expressions
+  use dylan;
   use common-dylan;
   use string-extensions;
-  use io,
-    import: { format-out };  // for debugging only
   export
-    regular-expressions;
-end;
+    regular-expressions,
+    regex-implementation;
+end library regular-expressions;
 
 define module regular-expressions
-  use common-dylan,
-    exclude: {
-      split    // todo -- just add a method to this one
-    };
-  use format-out;  // for debugging only
+  create
+    <regex>,
+    <regex-match>,
+    <match-group>,
+    <regex-error>,
+    <invalid-regex>,
+    <invalid-match-group>,
+
+    // Compiling and accessing regex info
+    compile-regex,
+    regex-group-count,
+    regex-pattern,
+
+    // Search and replace
+    regex-search,
+    regex-search-strings,
+    regex-position,
+    regex-replace,
+
+    // Accessing match groups and individual group info
+    match-group,
+    groups-by-position,
+    groups-by-name,
+    group-text,
+    group-start,
+    group-end;
+end module regular-expressions;
+
+define module regex-implementation
+  use common-dylan;
+  use dylan-extensions,
+    import: { values-hash, string-hash, gefiltafishk };
   use string-conversions;
   use character-type;
   use string-hacking;
   use %do-replacement;
   use %parse-string;
   use substring-search;
+  use regular-expressions,
+    export: all;
   export
-    compile-regex,
-    <regex>,
-      regex-search,
-      regex-search-strings,
-      regex-group-count,
-      regex-position,
-      make-regex-positioner,
-      regex-replace,
-      make-regex-replacer,
-    <regex-error>,
-    <invalid-regex>,
-      regex-pattern,
-    <regex-match>,                   // results of a successful search
-      <match-group>,
-        groups-by-position,
-        groups-by-name,
-      match-group,
-      match-groups,
-      group-start,
-      group-end,
-      group-text,
-      <invalid-match-group>,
-
-    split,
-    make-splitter,
-    join;
-  export
-    split-string;   // ???
-end module regular-expressions;
-
+    // extra exports for the test suite to use
+    <mark>,
+    *regex-cache*;
+end module regex-implementation;

Modified: trunk/libraries/regular-expressions/od-regular-expressions.lid
==============================================================================
--- trunk/libraries/regular-expressions/od-regular-expressions.lid	(original)
+++ trunk/libraries/regular-expressions/od-regular-expressions.lid	Tue Feb 19 00:54:48 2008
@@ -3,4 +3,4 @@
  match.dylan
  parse.dylan
  interface.dylan
- regex.dylan
+

Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan	(original)
+++ trunk/libraries/regular-expressions/parse.dylan	Tue Feb 19 00:54:48 2008
@@ -1,4 +1,4 @@
-module: regular-expressions
+module: regex-implementation
 author: Nick Kramer (nkramer at cs.cmu.edu)
 copyright: see below
 
@@ -119,18 +119,18 @@
 define class <regex-error> (<format-string-condition>, <error>)
 end class <regex-error>;
 
-define class <illegal-regex> (<regex-error>)
+define class <invalid-regex> (<regex-error>)
   constant slot regex-pattern :: <string>, 
     required-init-keyword: #"pattern";
-end class <illegal-regex>;
+end class <invalid-regex>;
 
-define sealed domain make (singleton(<illegal-regex>));
-define sealed domain initialize (<illegal-regex>);
+define sealed domain make (singleton(<invalid-regex>));
+define sealed domain initialize (<invalid-regex>);
 
 define function parse-error
     (pattern :: <string>, format-string :: <string>, #rest format-args)
   let msg = apply(format-to-string, format-string, format-args);
-  signal(make(<illegal-regex>,
+  signal(make(<invalid-regex>,
               format-string: "Invalid regular expression: %=.  %s",
               format-arguments: list(pattern, msg),
               pattern: pattern));
@@ -182,8 +182,12 @@
   slot has-quantifiers? :: <boolean> = #f;
   slot current-group-number :: <integer> = 0;
   constant slot group-number-to-name :: <table> = make(<table>);
-  constant slot character-set-type :: <class>,
-    required-init-keyword: #"set-type";
+
+  // Currently this is only used for character sets (e.g., [a-zA-z]).
+  // It could also be used to generate case-insensitive parsed-characters
+  // and parsed-strings, but right now you get that by passing
+  // case-insensitive: #t to the match function.
+  slot case-sensitive? :: <boolean> = #t;
 
   // If true then . matches \n.  (?s) /s
   slot dot-matches-all? :: <boolean>,
@@ -202,6 +206,7 @@
 // These setters will be used eventually, when we implement the ability to change
 // them via subpatterns like (?i).  Until then, this prevents warnings.
 begin
+  case-sensitive?-setter;
   dot-matches-all?-setter;
   extended?;
   extended?-setter;
@@ -217,18 +222,22 @@
  => (info :: <parse-info>)
   verbose & not-yet-implemented("'verbose' option");
   multi-line & not-yet-implemented("'multi-line' option");
-  let char-set-type = if (case-sensitive)
-                        <case-sensitive-character-set>
-                      else
-                        <case-insensitive-character-set>
-                      end;
   make(<parse-info>,
-       set-type: char-set-type,
+       case-sensitive: case-sensitive,
        verbose: verbose,
        multi-line: multi-line,
        dot-matches-all: dot-matches-all)
 end function make-parse-info;
 
+define inline method character-set-type
+    (info :: <parse-info>) => (set-type :: subclass(<character-set>))
+  if (info.case-sensitive?)
+    <case-sensitive-character-set>
+  else
+    <case-insensitive-character-set>
+  end
+end method character-set-type;
+
 define method has-named-group?
     (info :: <parse-info>, name :: <string>)
   member?(name, info.group-number-to-name, test: \=)

Added: trunk/libraries/regular-expressions/tests/api.dylan
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/tests/api.dylan	Tue Feb 19 00:54:48 2008
@@ -0,0 +1,259 @@
+Module: regular-expressions-test-suite
+
+define library-spec regular-expressions-api ()
+  module regular-expressions;
+end library-spec regular-expressions-api;
+
+define module-spec regular-expressions ()
+  sealed instantiable class <regex> (<mark>);
+  sealed instantiable class <regex-error> (<format-string-condition>, <error>);
+  sealed instantiable class <invalid-regex> (<regex-error>);
+  sealed instantiable class <invalid-match-group> (<regex-error>);
+  sealed instantiable class <match-group> (<object>);
+  sealed instantiable class <regex-match> (<object>);
+
+  // Compiling and accessing regex info
+  sealed generic-function compile-regex
+      (<string>, #"key", #"case-sensitive", #"dot-matches-all", #"verbose", #"multi-line")
+      => (<regex>);
+  sealed generic-function regex-pattern (<regex>) => (<string>);
+  sealed generic-function regex-group-count
+      (<regex>) => (<integer>);
+
+  // Search and replace
+  sealed generic-function regex-position
+      (<object>, <string>, #"key", #"start", #"end", #"case-sensitive")
+      => (false-or(<string>), #"rest");
+  sealed generic-function regex-replace
+      (<object>, <string>, <string>, #"key", #"start", #"end", #"case-sensitive", #"count")
+      => (<string>);
+  sealed generic-function regex-search
+      (<object>, <string>, #"key", #"anchored", #"start", #"end")
+      => (false-or(<regex-match>));
+  sealed generic-function regex-search-strings
+      (<object>, <string>, #"key", #"anchored", #"start", #"end")
+      => (false-or(<regex-match>));
+
+  // Accessing match groups
+  sealed generic-function groups-by-position
+      (<regex-match>) => (<sequence>);
+  sealed generic-function groups-by-name
+      (<regex-match>) => (<sequence>);
+  sealed generic-function match-group
+      (<regex-match>) => (false-or(<string>), false-or(<integer>), false-or(<integer>));
+
+  // Accessing individual group data
+  sealed generic-function group-text
+      (<match-group>) => (false-or(<string>));
+  sealed generic-function group-end
+      (<match-group>) => (false-or(<integer>));
+  sealed generic-function group-start
+      (<match-group>) => (false-or(<integer>));
+end module-spec regular-expressions;
+
+define regular-expressions function-test regex-position ()
+  check-no-errors("regex-position with a string regex",
+                  regex-position("pattern", "pattern"));
+  check-no-errors("regex-position with a regex regex",
+                  regex-position(compile-regex("pattern"), "pattern"));
+  local method check-pos
+          (test-name :: <string>, regex :: <string>, big :: <string>,
+           positions :: <vector>, #rest args)
+    check-equal(test-name,
+                positions,
+                begin
+                  let (#rest marks) = apply(regex-position, regex, big, args);
+                  marks
+                end);
+  end method check-pos;
+
+  check-pos("pos test #1", "a*", "aaaaaaaaaa", #[0, 10]);
+  check-pos("pos test #2", "a*", "aaaaabaaaa", #[0, 5]);
+  check-pos("pos test #3", "ab*(cd|e)", "acd", #[0, 3, 1, 3]);
+  check-pos("pos test #4", "ab*(cd|e)", "abbbbe", #[0, 6, 5, 6]);
+  check-pos("pos test #5", "ab*(cd|e)", "ab", #[#f]);
+
+  check-pos("pos test #6", "^a$", "aaaaaaaaaaaaaa", #[#f]);
+  check-pos("pos test #7", "^a$", "a", #[0, 1]);
+  check-pos("pos test #8", "(^a$)|aba", "abba", #[#f]);
+  check-pos("pos test #9", "(^a$)|aba", "aba", #[0, 3, #f, #f]);
+
+  check-pos("pos test #a",
+            "\\bthe rain (in){1,5} spain$",
+            "the rain in spain", 
+            #[0, 17, 9, 11]);
+  check-pos("pos test #b",
+            "\\bthe rain (in){1,5} spain$",
+            "the rain spain",
+            #[#f]);
+  check-pos("pos test #c",
+            "\\bthe rain (in){1,5} spain$",
+            "the rain ininin spain",
+            #[0, 21, 13, 15]);
+  check-pos("pos test #d",
+            "\\bthe rain (in){1,5} spain$", 
+            "bork the rain in spain",
+            #[5, 22, 14, 16]);
+  check-pos("pos test #e",
+            "\\bthe rain (in){1,5} spain$",
+            "the rain in spainland",
+            #[#f]);
+  check-pos("pos test #f",
+            "\\bthe rain (in){1,5} spain$",
+            "blathe rain in spain",
+            #[#f]);
+  check-pos("pos test #g",
+            "\\bthe rain (in){1,5} spain$",
+            "the rain ininininin spain",
+            #[0, 25, 17, 19]);
+  check-pos("pos test #h",
+            "\\bthe rain (in){1,5} spain$",
+            "the rain inininininin spain",
+            #[#f]);
+  check-pos("pos test #i", "a*", "aaaaa", #[0, 5]);
+  check-pos("pos test #j", "a*", "a", #[0, 1]);
+  check-pos("pos test #k", "a*", "", #[0, 0]);
+  check-pos("pos test #L", "bba*c", "bbc", #[0, 3]);
+  check-pos("pos test #m", "a", "bbbb", #[#f]);
+  check-pos("pos test #n", "a*", "aaaaa", #[3, 4], start: 3, end: 4);
+  check-pos("pos test #o", "^a*", "aaaaa", #[2, 5], start: 2);
+  check-pos("pos test #p", "^a*", "baaaaa", #[2, 6], start: 2);
+  check-pos("pos test #q", "^a+", "bbbaaaaa", #[#f], start: 2);
+  check-pos("pos test #r", "a+", "AAaAA", #[0, 5], case-sensitive: #f);
+  check-pos("pos test #s", "a+", "AAaAA", #[2, 3]);
+  check-pos("pos test #t", "[a-f]+", "SdFbIeNvI", #[1, 2]);
+  // This one is failing due to bug 7371
+  check-pos("pos test #u", "[a-f]+", "SdFbIeNvI", #[1, 4], case-sensitve: #f);
+  check-pos("pos test #v", "[\\s\\]]+", "blah[   \t]", #[5, 10]);
+
+  // test escaped characters
+  check-pos("pos test #w", "\\\"", "\\\"", #[1, 2]);
+  check-pos("pos test #x", "\\\\\"", "\\\"", #[0, 2]);
+  check-condition("pos test #y",
+                  <invalid-regex>,
+                  compile-regex("((a*)|(b*))*c"));
+end function-test regex-position;
+
+define regular-expressions function-test regex-replace ()
+  let big-string = "The rain in spain and some other text";
+  check-no-errors("regex-replace with regex pattern",
+                  regex-replace(compile-regex("the (.*) in (\\w*\\b)"),
+                                big-string,
+                                "\\2 has its \\1"));
+  check-equal("regex-replace #1",
+              regex-replace("the (.*) in (\\w*\\b)", big-string, "\\2 has its \\1"),
+              "spain has its rain and some other text");
+  check-equal("regex-replace #2",
+              regex-replace("in", big-string, "out"),
+              "The raout out spaout and some other text");
+  check-equal("regex-replace #3",
+              regex-replace("in", big-string, "out", count: 2),
+              "The raout out spain and some other text");
+  check-equal("regex-replace #4",
+              regex-replace("in", big-string, "out", start: 8, end: 15),
+              "The rain out spain and some other text");
+end function-test regex-replace;
+
+define regular-expressions function-test regex-group-count ()
+  //---*** Fill this in...
+end function-test regex-group-count;
+
+define regular-expressions function-test regex-search ()
+  // Test case-sensitive parameter
+  // See bug 7371
+  check-true("regex-search(..., case-sensitive: #f) works for character sets",
+             regex-search("[a-z]", "A", case-sensitive: #f));
+  check-true("regex-search(..., case-sensitive: #t) works on character sets",
+             regex-search("[a-z]", "A", case-sensitive: #f));
+  check-false("case-sensitive: #t works for regular strings",
+              regex-search("abc", "aBc", case-sensitive: #t));
+  check-true("case-sensitive: #f works for regular strings",
+             regex-search("abc", "ABC", case-sensitive: #f));
+end function-test regex-search;
+
+define regular-expressions function-test compile-regex ()
+  // Test caching
+  check-true("use-cache: #t uses the cache",
+             compile-regex("abc") == compile-regex("abc", use-cache: #t));
+  check-true("use-cache: #f doesn't use the cache",
+             compile-regex("abc") ~== compile-regex("abc", use-cache: #f));
+
+  // Test case-sensitive parameter
+  // Test verbose parameter
+  // Test multi-line parameter
+  // Test dot-matches-all parameter
+end function-test compile-regex;
+
+define regular-expressions class-test <regex> ()
+  //---*** Fill this in...
+end class-test <regex>;
+
+define sideways method make-test-instance
+    (class == <regex>) => (regex :: <regex>)
+  compile-regex("foo")
+end;
+
+define regular-expressions class-test <regex-error> ()
+  //---*** Fill this in...
+end class-test <regex-error>;
+
+define regular-expressions class-test <invalid-regex> ()
+  //---*** Fill this in...
+end class-test <invalid-regex>;
+
+define sideways method make-test-instance
+    (class == <invalid-regex>) => (error :: <invalid-regex>)
+  make(<invalid-regex>, pattern: "[unterminated")
+end;
+
+define regular-expressions function-test regex-search-strings ()
+  //---*** Fill this in...
+end function-test regex-search-strings;
+
+define regular-expressions class-test <invalid-match-group> ()
+  //---*** Fill this in...
+end class-test <invalid-match-group>;
+
+define regular-expressions function-test group-text ()
+  //---*** Fill this in...
+end function-test group-text;
+
+define regular-expressions function-test group-end ()
+  //---*** Fill this in...
+end function-test group-end;
+
+define regular-expressions function-test group-start ()
+  //---*** Fill this in...
+end function-test group-start;
+
+define regular-expressions function-test match-groups ()
+  //---*** Fill this in...
+end function-test match-groups;
+
+define regular-expressions function-test match-group ()
+  //---*** Fill this in...
+end function-test match-group;
+
+define regular-expressions class-test <match-group> ()
+  //---*** Fill this in...
+end class-test <match-group>;
+
+define sideways method make-test-instance
+    (class == <match-group>) => (group :: <match-group>)
+  make(<match-group>, text: "foo", start: 0, end: 3)
+end;
+
+define regular-expressions class-test <regex-match> ()
+  //---*** Fill this in...
+end class-test <regex-match>;
+
+define sideways method make-test-instance
+    (class == <regex-match>) => (match :: <regex-match>)
+  make(<regex-match>, regular-expression: compile-regex("foo"))
+end;
+
+define regular-expressions function-test regex-pattern ()
+  //---*** Fill this in...
+end function-test regex-pattern;
+
+

Modified: trunk/libraries/regular-expressions/tests/library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/library.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/library.dylan	Tue Feb 19 00:54:48 2008
@@ -3,50 +3,37 @@
 define library regular-expressions-test-suite
   use common-dylan;
   use io,
-    import: {
-      format-out,    // for debugging only
-      streams
-    };
-  use regular-expressions;
+    import: { streams };
   use system,
-    import: {
-      file-system,
-      locators,
-      operating-system
-    };
+    import: { file-system,
+              locators,
+              operating-system };
   use strings;
   use testworks;
+  use testworks-specs;
+  use regular-expressions,
+    import: { regex-implementation };
+
   export
     regular-expressions-test-suite;
 end library regular-expressions-test-suite;
 
 define module regular-expressions-test-suite
   use common-dylan,
-    rename: {
-      format-to-string => sprintf    // to long for 80 chars per line
-    },
-    exclude: {
-      split
-    };
-  use regular-expressions;
+    rename: { format-to-string => sprintf };
+  use regex-implementation;
   use file-system;
   use locators,
-    import: {
-      <directory-locator>,
-      <file-locator>,
-      subdirectory-locator
-    };
+    import: { <directory-locator>,
+              <file-locator>,
+              subdirectory-locator };
   use operating-system,
-    import: {
-      environment-variable
-    };
+    import: { environment-variable };
   use testworks;
-  use format-out;    // for debugging only
+  use testworks-specs;
   use streams;
   use strings,
-    import: {
-      trim
-    };
+    import: { trim };
   export regular-expressions-test-suite;
 end module regular-expressions-test-suite;
 

Modified: trunk/libraries/regular-expressions/tests/pcre.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/pcre.dylan	Tue Feb 19 00:54:48 2008
@@ -201,14 +201,14 @@
   if (match)
     check-equal(sprintf("Match '%s' against regex '%s' -- same # of groups",
                         test-string, pattern),
-                size(match-groups(match)),
+                size(groups-by-position(match)),
                 pcre-groups.size);
     for (group-number from 0,
          pcre-group in pcre-groups)
       // Adding block/exception here causes an infinite loop.
       // Could it be related to using the Visual Studio 8 linker?
       // The if also causes an infinite loop.  Hmmm.
-      let our-group = /* if (group-number < size(match-groups(match))) */
+      let our-group = /* if (group-number < size(groups-by-position(match))) */
                         match-group(match, group-number)
                       /* end */;
       check-equal(sprintf("Match '%s' against regex '%s' -- group %d is the same",

Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan	Tue Feb 19 00:54:48 2008
@@ -1,34 +1,7 @@
 Module: regular-expressions-test-suite
 Author: Carl Gay
 
-define function re/position (pattern, string, #rest args)
-  let (#rest marks) = apply(regex-position, pattern, string, args);
-  marks
-end function re/position;
-
-define test atom-test ()
-  check-no-errors("atom-0", re/position("", ""));
-  check-equal("atom-1", re/position("a", "a"),      #[0, 1]);
-  check-equal("atom-2", re/position("[a]", "a"),    #[0, 1]);
-  check-equal("atom-3", re/position("(a)b", "ab"),  #[0, 2, 0, 1]);
-  check-equal("atom-4", re/position("\\w", "a"),    #[0, 1]);
-  check-equal("atom-5", re/position(".", "a"),      #[0, 1]);
-  check-equal("atom-6", re/position("a{0}", "a"),   #[0, 0]);
-  check-equal("atom-7", re/position("a{2}", "aa"),  #[0, 2]);
-  check-equal("atom-8", re/position("a{1,}", "aa"), #[0, 2]);
-  check-equal("atom-9", re/position("a{1,8}", "aaa"), #[0, 3]);
-  check-equal("atom-A", re/position("a{,}", ""),   #[0, 0]);
-  check-equal("atom-A1", re/position("a{,}", "aaaaaa"),   #[0, 6]);
-  check-condition("atom-B", <invalid-regex>, re/position("a{m,n}", ""));
-  check-condition("atom-C", <invalid-regex>, re/position("a{m,}", ""));
-  check-condition("atom-D", <invalid-regex>, re/position("a{,n}", ""));
-  check-condition("atom-E", <invalid-regex>, re/position("a{m}", ""));
-  check-condition("atom-F", <invalid-regex>, re/position("a{,", ""));
-  check-condition("atom-G", <invalid-regex>, re/position("[a", ""));
-  check-condition("atom-H", <invalid-regex>, re/position("\\", ""));
-  check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
-end;
-
+// Helper function, e.g., check-matches("a(b|c)", "abc", "ab", "b")
 // Note that flags must come at the end of groups-and-flags.
 define function check-matches
     (pattern, input-string, #rest groups-and-flags) => ()
@@ -58,6 +31,44 @@
   end;
 end function check-matches;
 
+define test split-test ()
+  let big-string = "The rain in spain and some other text";
+  check-equal("split #1",
+              split(big-string, compile-regex("\\s")),
+              #("The", "rain", "in", "spain", "and", "some", "other", "text"));
+  check-equal("split #2",
+              split(big-string, compile-regex("\\s"), count: 3),
+              #("The", "rain", "in spain and some other text"));
+  check-equal("split #3",
+              split(big-string, compile-regex("\\s"), start: 12),
+              #("spain", "and", "some", "other", "text"));
+  check-equal("split #4",
+              split(" Some   text with   lots of spaces  ",
+                    compile-regex("\\s"),
+                    count: 3),
+              #("", "Some", "  text with   lots of spaces  "));
+  check-equal("split #5",
+              split(" Some   text with   lots of spaces  ",
+                    compile-regex("\\s+")),
+              #("", "Some", "text", "with", "lots", "of", "spaces", ""));
+end test split-test;
+
+define test atom-test ()
+  check-matches("", "", "");
+  check-matches("a", "a", "a");
+  check-matches("[a]", "a", "a");
+  check-matches("(a)b", "ab", "ab", "a");
+  check-matches("\\w", "a", "a");
+  check-matches(".", "a", "a");
+  check-matches("a{0}", "a", "");
+  check-matches("a{2}", "aa", "aa");
+  check-matches("a{1,}", "aa", "aa");
+  check-matches("a{1,8}", "aaa", "aaa");
+  check-matches("a{1,2}", "aaa", "aa");
+  check-matches("a{,}", "", "");
+  check-matches("a{,}", "aaaaaa", "aaaaaa");
+end test atom-test;
+
 // These are to cover the basics, as I add new features to the code or
 // read through the pcrepattern docs.  The PCRE tests should cover a lot
 // of the more esoteric cases, I hope.
@@ -86,15 +97,25 @@
   check-equal("start: and end: work?",
               regex-search("a", "a b a", start: 1, end: 4),
               #f);
+  check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
 end test ad-hoc-regex-test;
 
 // All these regexes should signal <invalid-regex> on compilation.
 //
 define test invalid-regex-test ()
   let patterns = #(
-    "(?P<name>x)(?P<name>y)",         // can't use same name twice
+    "(?P<foo>x)(?P<foo>y)",           // can't use same group name twice
     "(?@abc)",                        // invalid extended character '@'
-    "(a)\\2"                          // invalid back reference
+    "(a)\\2",                         // invalid back reference
+    "a{m,n}",
+    "a{m,}",
+    "a{,n}",
+    "a{m}",
+    "a{,",
+    "[a",
+    "(",
+    "(()",
+    "((a)b|"
     );
   for (pattern in patterns)
     check-condition(sprintf("Compiling '%s' gets an error", pattern),
@@ -116,10 +137,18 @@
 end;
 
 define suite regular-expressions-test-suite ()
+  test split-test;
   test atom-test;
   test ad-hoc-regex-test;
   test invalid-regex-test;
   test regressions-test;
+
+  // I've changed lots of things that make the gdref documentation
+  // out-of-date, but it still might be useful to look it over for
+  // test ideas.
+  //test gdref-documentation-test;
+
+  suite regular-expressions-api-test-suite;
   // It's sometimes useful to use -ignore-suite to skip this one because it's so noisy.
   suite pcre-test-suite;
 end;

Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid	(original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.lid	Tue Feb 19 00:54:48 2008
@@ -1,4 +1,5 @@
 library: regular-expressions-test-suite
 files: library
+       api
        pcre
        regular-expressions-test-suite



More information about the chatter mailing list