[Gd-chatter] r11533 - in trunk/libraries/regular-expressions: . tests

cgay at gwydiondylan.org cgay at gwydiondylan.org
Tue Dec 11 21:29:31 CET 2007


Author: cgay
Date: Tue Dec 11 21:29:29 2007
New Revision: 11533

Modified:
   trunk/libraries/regular-expressions/interface.dylan
   trunk/libraries/regular-expressions/match.dylan
   trunk/libraries/regular-expressions/od-library.dylan
   trunk/libraries/regular-expressions/parse.dylan
   trunk/libraries/regular-expressions/regex.dylan
   trunk/libraries/regular-expressions/tests/app-library.dylan
   trunk/libraries/regular-expressions/tests/library.dylan
   trunk/libraries/regular-expressions/tests/pcre.dylan
   trunk/libraries/regular-expressions/tests/regression-tests.txt
   trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan
   trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
Log:
Bug: 7357
Fixed "", (), (?#...), ".", and changed order of args in regex-search.
The regex now comes first.
Committing now so bug 7364 can reference this revision.

Modified: trunk/libraries/regular-expressions/interface.dylan
==============================================================================
--- trunk/libraries/regular-expressions/interface.dylan	(original)
+++ trunk/libraries/regular-expressions/interface.dylan	Tue Dec 11 21:29:29 2007
@@ -153,7 +153,7 @@
 // of marks.
 //
 define function regex-position
-    (big :: <string>, regex :: <string>, #key start: big-start = 0,
+    (regex :: <string>, big :: <string>, #key start: big-start = 0,
      end: big-end = #f, case-sensitive = #f)
  => (regex-start :: false-or(<integer>), #rest marks :: false-or(<integer>));
   let substring = make(<substring>, string: big, start: big-start,
@@ -193,16 +193,17 @@
 	  end: big-end = #f)
    => (regex-start :: false-or(<integer>), 
        #rest marks :: false-or(<integer>));
-    regex-position(big, regex, case-sensitive: case-sensitive, 
+    regex-position(regex, big, case-sensitive: case-sensitive, 
 		    start: big-start, end: big-end);
   end method;
 end function make-regex-positioner;
 
 // returns #f if no match, the matching string on match, and another string or #f
 // for each group in the regex.
+/*
 define method regex-match
     (big :: <string>, regex :: <string>) => (#rest results);
-  let (#rest marks) = regex-position(big, regex);
+  let (#rest marks) = regex-position(regex, big);
   let result = make(<stretchy-vector>);
 
   if(marks[0])
@@ -218,6 +219,7 @@
   end;
   apply(values, result)
 end;
+*/
 
 // #if (have-free-time)
 /*

Modified: trunk/libraries/regular-expressions/match.dylan
==============================================================================
--- trunk/libraries/regular-expressions/match.dylan	(original)
+++ trunk/libraries/regular-expressions/match.dylan	Tue Dec 11 21:29:29 2007
@@ -437,13 +437,13 @@
     #"end-of-string"       => index >= target.end-index;
     #"word-boundary"       =>
       index = 0 | index >= target.end-index
-	| (member?(target.entire-string[index], word-chars) 
-	     ~== member?(target.entire-string[index - 1], word-chars));
+	| (member?(target.entire-string[index], $word-chars) 
+	     ~== member?(target.entire-string[index - 1], $word-chars));
 
     #"not-word-boundary"   =>
       index ~== 0 & index < target.end-index
-	& (member?(target.entire-string[index], word-chars)
-	     == member?(target.entire-string[index - 1], word-chars));
+	& (member?(target.entire-string[index], $word-chars)
+	     == member?(target.entire-string[index - 1], $word-chars));
 
     otherwise              => 
       error("Unknown assertion %=", assertion);

Modified: trunk/libraries/regular-expressions/od-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/od-library.dylan	(original)
+++ trunk/libraries/regular-expressions/od-library.dylan	Tue Dec 11 21:29:29 2007
@@ -36,6 +36,8 @@
 define library regular-expressions
   use common-dylan;
   use string-extensions;
+  use io,
+    import: { format-out };  // for debugging only
   export
     regular-expressions;
 end;
@@ -45,6 +47,7 @@
     exclude: {
       split    // todo -- just add a method to this one
     };
+  use format-out;  // for debugging only
   use string-conversions;
   use character-type;
   use string-hacking;

Modified: trunk/libraries/regular-expressions/parse.dylan
==============================================================================
--- trunk/libraries/regular-expressions/parse.dylan	(original)
+++ trunk/libraries/regular-expressions/parse.dylan	Tue Dec 11 21:29:29 2007
@@ -148,6 +148,32 @@
               format-arguments: list(thing)));
 end;
 
+define constant $empty-string :: <parsed-string>
+  = make(<parsed-string>, string: "");
+
+// The useful definitions of all these are in as(<character-set>).
+//
+define constant $any-char
+  = make(<case-sensitive-character-set>, description: "[\<00>-\<FF>]");
+define constant $any-char-except-newline
+  = make(<case-sensitive-character-set>, description: "^\n");
+define constant $digit-chars
+  = make(<case-sensitive-character-set>, description: "\\d");
+define constant $not-digit-chars
+  = make(<case-sensitive-character-set>, description: "^\\d");
+define constant $word-chars
+  = make(<case-sensitive-character-set>, description: "\\w");
+define constant $not-word-chars
+  = make(<case-sensitive-character-set>, description: "^\\w");
+define constant $whitespace-chars
+  = make(<case-sensitive-character-set>, description: "\\s");
+define constant $not-whitespace-chars
+  = make(<case-sensitive-character-set>, description: "^\\s");
+define constant $dot
+  = make(<parsed-set>, set: $any-char-except-newline);
+define constant
+    $dot-all = make(<parsed-set>, set: $any-char);
+
 // <parse-info> contains some information about the current regex
 // being parsed.
 //
@@ -156,14 +182,31 @@
   // Name this has-backreferences, for consistency with the other slots.
   // Add ? to all the has-* slots.  --cgay
   // Also, not sure why anyone cares about these three things.
-  slot backreference-used :: <boolean>, init-value: #f;
-  slot has-alternatives :: <boolean>, init-value: #f;
-  slot has-quantifiers :: <boolean>, init-value: #f;
-  slot current-group-number :: <integer>, init-value: 0;
-  constant slot group-number-to-name :: <table>, init-value: make(<table>);
-  constant slot set-type :: <class>, required-init-keyword: #"set-type";
+  slot backreference-used :: <boolean> = #f;
+  slot has-alternatives :: <boolean> = #f;
+  slot has-quantifiers :: <boolean> = #f;
+  slot current-group-number :: <integer> = 0;
+  constant slot group-number-to-name :: <table> = make(<table>);
+  constant slot set-type :: <class>,
+    required-init-keyword: #"set-type";
+  slot dot-matches-all? :: <boolean>,
+    required-init-keyword: #"dot-matches-all";
+  slot verbose? :: <boolean>,
+    required-init-keyword: #"verbose";
+  slot multi-line? :: <boolean>,
+    required-init-keyword: #"multi-line";
 end class <parse-info>;
 
+// These setters will be used eventually, when we implement the ability to change
+// them via subpatterns like (?i).  Until then, this prevents warnings.
+begin
+  dot-matches-all?-setter;
+  verbose?;
+  verbose?-setter;
+  multi-line?;
+  multi-line?-setter;
+end;
+
 define function make-parse-info
     (#key case-sensitive  :: <boolean> = #t,
           verbose         :: <boolean> = #f,
@@ -172,14 +215,16 @@
  => (info :: <parse-info>)
   verbose & not-yet-implemented("'verbose' option");
   multi-line & not-yet-implemented("'multi-line' option");
-  dot-matches-all & not-yet-implemented("'dot-matches-all' option");
-  let char-set-type
-   = if (case-sensitive)
-       <case-sensitive-character-set>
-     else
-       <case-insensitive-character-set>
-     end;
-  make(<parse-info>, set-type: char-set-type)
+  let char-set-type = if (case-sensitive)
+                        <case-sensitive-character-set>
+                      else
+                        <case-insensitive-character-set>
+                      end;
+  make(<parse-info>,
+       set-type: char-set-type,
+       verbose: verbose,
+       multi-line: multi-line,
+       dot-matches-all: dot-matches-all)
 end function make-parse-info;
 
 define method has-named-group?
@@ -189,7 +234,7 @@
 
 define method parse
     (regex :: <string>, parse-info :: <parse-info>)
- => (parsed-regex :: <parsed-regex>,
+ => (regex :: <parsed-regex>,
      last-group :: <integer>,
      backrefs? :: <boolean>,
      alternatives? :: <boolean>, 
@@ -211,60 +256,59 @@
 	   parse-info.backreference-used,
 	   parse-info.has-alternatives,
 	   parse-info.has-quantifiers);
-  end if;
+  end if
 end method parse;
 
-define method parse-regex (s :: <parse-string>, info :: <parse-info>)
- => parsed-regex :: <parsed-regex>;
-  let alternative = parse-alternative(s, info);
+define method parse-regex
+    (str :: <parse-string>, info :: <parse-info>)
+ => (regex :: <parsed-regex>)
+  let alternative = parse-alternative(str, info);
   if (~alternative)
-    parse-error(s.parse-string, "");
-  elseif (lookahead(s) = '|')
+    parse-error(str.parse-string, "");
+  elseif (lookahead(str) = '|')
     info.has-alternatives := #t;
-    make(<union>, left: alternative, right: parse-regex(consume(s), info));
+    make(<union>, left: alternative, right: parse-regex(consume(str), info))
   else
-    alternative;
-  end if;
+    alternative
+  end if
 end method parse-regex;
 
 define method parse-alternative
-    (s :: <parse-string>, info :: <parse-info>)
+    (str :: <parse-string>, info :: <parse-info>)
  => (re :: false-or(<parsed-regex>))
-  let term = parse-quantified-atom(s, info);
-  if (member?(lookahead(s), #(#f, '|', ')')))
-    term;
+  let term = parse-quantified-atom(str, info);
+  if (member?(lookahead(str), #(#f, '|', ')')))
+    term
   else
-    make(<alternative>, left: term, right: parse-alternative(s, info));
-  end if;
+    make(<alternative>, left: term, right: parse-alternative(str, info))
+  end
 end method parse-alternative;
 
-define method parse-quantified-atom (s :: <parse-string>, info :: <parse-info>)
+define method parse-quantified-atom
+    (str :: <parse-string>, info :: <parse-info>)
  => (result :: false-or(<parsed-regex>))
-  // I think this breaks when parse-atom returns #f and then we quantify that.
-  // I added some regexes to regression-tests.txt starting with /a()b/ that I
-  // hope will exercise that case.  --cgay
-  let atom = parse-atom(s, info);
-  let char = lookahead(s);
+  let atom = parse-atom(str, info);
+  let char = lookahead(str);
   select (char by \=)
     '*' =>
       info.has-quantifiers := #t;
-      consume(s);
+      consume(str);
       make(<quantified-atom>, min: 0, atom: atom);
 
     '+' =>
       info.has-quantifiers := #t;
-      consume(s);
+      consume(str);
       make(<quantified-atom>, min: 1, atom: atom);
 
     '?' =>
       info.has-quantifiers := #t;
-      consume(s);
+      consume(str);
       make(<quantified-atom>, min: 0, max: 1, atom: atom);
 
     '{' =>
       info.has-quantifiers := #t;
-      consume(s);
-      parse-minmax-quantifier(atom, s);
+      consume(str);
+      parse-minmax-quantifier(atom, str);
 
     otherwise =>
       atom;
@@ -274,75 +318,83 @@
 // {m,n}, {m,}, {,n}, {m}, {}, and {,} are all valid.
 // m defaults to 0 and n defaults to #f (unlimited).
 define method parse-minmax-quantifier
-    (atom :: <parsed-regex>, s :: <parse-string>) => (qatom :: <quantified-atom>)
+    (atom :: <parsed-regex>, str :: <parse-string>)
+ => (qatom :: <quantified-atom>)
   local method parse-integer () => (int :: false-or(<integer>))
           let digits = make(<deque>);
-          while (lookahead(s) & digit?(lookahead(s)))
-            push-last(digits, lookahead(s));
-            consume(s);
+          while (lookahead(str) & digit?(lookahead(str)))
+            push-last(digits, lookahead(str));
+            consume(str);
           end;
           ~empty?(digits) & string-to-integer(as(<byte-string>, digits));
         end method parse-integer;
   let qmin = parse-integer() | 0;
   let qmax = #f;
-  if (lookahead(s) = ',')
-    consume(s);
+  if (lookahead(str) = ',')
+    consume(str);
     qmax := parse-integer();
   else
     qmax := qmin;
   end;
-  if (lookahead(s) ~= '}')
-    parse-error(s.parse-string,
+  if (lookahead(str) ~= '}')
+    parse-error(str.parse-string,
                 "Close brace expected in {m,n} quantifier (index = %s).",
-                s.parse-index);
+                str.parse-index);
   end;
-  consume(s);
+  consume(str);
   make(<quantified-atom>, atom: atom, min: qmin, max: qmax);
 end method parse-minmax-quantifier;
 
-define method parse-atom (s :: <parse-string>, info :: <parse-info>)
+define method parse-atom (str :: <parse-string>, info :: <parse-info>)
  => (regex :: false-or(<parsed-regex>))
-  let char = lookahead(s);
+  let char = lookahead(str);
   select (char)
     '(' =>
-      consume(s);   // Consume beginning paren
-      parse-group(s, info);
-
-    ')' =>
-      #f;              // Need something to terminate upon seeing a close paren
+      consume(str);   // Consume beginning paren
+      // If parse-group returns #f it's because it parsed a (?#...) comment,
+      // so just ignore the comment and try to parse another atom (if there's
+      // anything left in str).  This means comments can't immediately precede
+      // |, ), or EOI.  Should fix this.
+      parse-group(str, info)
+        | (lookahead(str) & parse-atom(str, info));
 
     #f  =>
-      #f;   // Signal error?  (end of stream)
+      // Handle valid patterns like "" and "a|"
+      $empty-string;
 
-    '*', '|', '+' =>
+    '*', '|', '+', ')' =>
       #f;
 
     '\\' =>
-      consume(s);        // Consume the backslash
+      consume(str);        // Consume the backslash
       // Perhaps add support for a different escape character to aid readability.
       // The escape character could be specified in the <parse-info>.  --cgay
-      parse-escaped-character(s, info);
+      parse-escaped-character(str, info);
 
     '[' =>
-      consume(s);        // Eat the opening brace
-      parse-character-set(s, info);
+      consume(str);        // Eat the opening brace
+      parse-character-set(str, info);
 
     '.' =>
-      consume(s);
-      dot;
+      consume(str);
+      if (info.dot-matches-all?)
+        $dot-all
+      else
+        $dot
+      end;
 
     '^' =>
-      consume(s);
+      consume(str);
       make(<parsed-assertion>, assertion: #"beginning-of-string");
 
     '$' =>
-      consume(s);
+      consume(str);
       make(<parsed-assertion>, assertion: #"end-of-string");
   
       // Insert more special characters here
 
     otherwise =>
-      consume(s);
+      consume(str);
       make(<parsed-character>, character: char);
   end select;
 end method parse-atom;
@@ -390,18 +442,21 @@
       while (lookahead(str) & lookahead(str) ~== ')')
         consume(str);
       end;
-      if (~ lookahead(str))
-        parse-error(str.parse-string, "Unterminated subpattern commend (?#....");
-      else
+      if (lookahead(str) == ')')
+        consume(str);
         #f
+      else
+        parse-error(str.parse-string, "Unterminated subpattern comment (?#....");
       end;
 
     otherwise =>
       // See the Python re docs for what all these do.
+      // See "INTERNAL OPTION SETTING" in pcre.txt doc as well.
       if (member?(char, "iLmsux=!<("))
         not-yet-implemented("'(?%c' subpattern construct", char);
       else
-        parse-error(str.parse-string, "Invalid subpattern construct (?%c...) at index %s.",
+        parse-error(str.parse-string,
+                    "Invalid subpattern construct (?%c...) at index %s.",
                     char, str.parse-index);
       end;
   end select
@@ -420,13 +475,14 @@
     consume(str);
     copy-sequence(str.parse-string, start: start-index, end: str.parse-index - 1)
   else
-    parse-error(str.parse-string, "Unterminated named group name at index %s.",
+    parse-error(str.parse-string,
+                "Unterminated named group name at index %s.",
                 str.parse-index);
   end
 end function parse-group-name;
 
-// Parse a group/subpattern, possibly after having already parsed any
-// options given via "(?...".
+// Parse a group/subpattern.  The open paren and any subpattern options given
+// via "(?..." have already been consumed.
 //
 define inline function parse-simple-group
     (str :: <parse-string>,
@@ -437,27 +493,30 @@
   if (save-group?)
     info.current-group-number := info.current-group-number + 1;
   end;
-  let regex = parse-regex(str, info);
-  if (lookahead(str) ~== ')')
-    parse-error(str.parse-string, "Unbalanced parens in regex (index = %s).",
-                str.parse-index);
+  let regex = if (lookahead(str) == ')')
+                consume(str);
+                $empty-string
+              else
+                let re = parse-regex(str, info);
+                if (lookahead(str) == ')')
+                  consume(str)
+                end;
+                re
+              end;
+  if (~save-group?)
+    regex
   else
-    consume(str);
-    if (~ save-group?)
-      regex
-    else
-      if (group-name)
-        if (has-named-group?(info, group-name))
-          parse-error(str.parse-string,
-                      "Duplicate group name (%s) at index %s.",
-                      group-name, str.parse-index);
-        else
-          info.group-number-to-name[info.current-group-number] := group-name;
-        end;
+    if (group-name)
+      if (has-named-group?(info, group-name))
+        parse-error(str.parse-string,
+                    "Duplicate group name (%s) at index %s.",
+                    group-name, str.parse-index);
+      else
+        info.group-number-to-name[info.current-group-number] := group-name;
       end;
-      make(<mark>, child: regex, group: info.current-group-number)
-    end
-  end
+    end;
+    make(<mark>, child: regex, group: info.current-group-number)
+  end if
 end function parse-simple-group;
 
 // This just does a quick scan to find the closing ] and then lets
@@ -508,43 +567,19 @@
   make(<parsed-set>, set: make(info.set-type, description: set-string))
 end function parse-character-set;
 
-define constant any-char 
-  = make(<case-sensitive-character-set>, description: "^\n");
-
-// The useful definitions of all these are in as(<character-set>).
-//
-define constant digit-chars
-  = make(<case-sensitive-character-set>, description: "\\d");
-define constant not-digit-chars
-  = make(<case-sensitive-character-set>, description: "^\\d");
-define constant word-chars
-  = make(<case-sensitive-character-set>, description: "\\w");
-define constant not-word-chars
-  = make(<case-sensitive-character-set>, description: "^\\w");
-define constant whitespace-chars
-  = make(<case-sensitive-character-set>, description: "\\s");
-define constant not-whitespace-chars
-  = make(<case-sensitive-character-set>, description: "^\\s");
-
-define constant dot = make(<parsed-set>, set: any-char);
-/* KJP: Not used.
-define constant dot-star = make(<quantified-atom>, min: 0, max: #f,
-				atom: dot);
-*/
-
 // This only handles escaped characters *outside* of a character
 // set. Inside of a character set is a whole different story.
 //
 define method parse-escaped-character 
-    (s :: <parse-string>, info :: <parse-info>)
+    (str :: <parse-string>, info :: <parse-info>)
  => parsed-regex :: <parsed-regex>;
-  let next-char = lookahead(s);
+  let next-char = lookahead(str);
   if (~next-char)
-    parse-error(s.parse-string,
+    parse-error(str.parse-string,
                 "Unterminated escape sequence at index %d.",
-                s.parse-index - 1)
+                str.parse-index - 1)
   end;
-  consume(s);
+  consume(str);
   select (next-char)
     '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
       info.backreference-used := #t;
@@ -560,14 +595,14 @@
 
     'b' =>   make(<parsed-assertion>, assertion: #"word-boundary");
     'B' =>   make(<parsed-assertion>, assertion: #"not-word-boundary");
-       // Beginning and end of string are not escaped
+    // Beginning and end of string are not escaped
 
-    'd' =>   make(<parsed-set>, set: digit-chars);
-    'D' =>   make(<parsed-set>, set: not-digit-chars);
-    'w' =>   make(<parsed-set>, set: word-chars);
-    'W' =>   make(<parsed-set>, set: not-word-chars);
-    's' =>   make(<parsed-set>, set: whitespace-chars);
-    'S' =>   make(<parsed-set>, set: not-whitespace-chars);
+    'd' =>   make(<parsed-set>, set: $digit-chars);
+    'D' =>   make(<parsed-set>, set: $not-digit-chars);
+    'w' =>   make(<parsed-set>, set: $word-chars);
+    'W' =>   make(<parsed-set>, set: $not-word-chars);
+    's' =>   make(<parsed-set>, set: $whitespace-chars);
+    'S' =>   make(<parsed-set>, set: $not-whitespace-chars);
 
     // Insert more escaped characters here
 

Modified: trunk/libraries/regular-expressions/regex.dylan
==============================================================================
--- trunk/libraries/regular-expressions/regex.dylan	(original)
+++ trunk/libraries/regular-expressions/regex.dylan	Tue Dec 11 21:29:29 2007
@@ -29,7 +29,7 @@
 // This function signals <invalid-regex> if the regular expression is invalid.
 //
 define sealed generic compile-regex
-    (string :: <string>,
+    (pattern :: <string>,
      #key case-sensitive  :: <boolean> = #t,
           verbose         :: <boolean> = #f,
           multi-line      :: <boolean> = #f,
@@ -37,13 +37,13 @@
  => (regex :: <regex>);
 
 define method compile-regex
-    (string :: <string>,
+    (pattern :: <string>,
      #key case-sensitive  :: <boolean> = #t,
           verbose         :: <boolean> = #f,
           multi-line      :: <boolean> = #f,
           dot-matches-all :: <boolean> = #f)
  => (regex :: <regex>)
-  parse(string,
+  parse(pattern,
         make-parse-info(case-sensitive: case-sensitive,
                         verbose: verbose,
                         multi-line: multi-line,
@@ -68,33 +68,33 @@
 // todo -- Should $ anchor at the provided end position or at the end of the string?
 //
 define sealed generic regex-search
-    (big :: <string>, pattern :: <object>,
+    (pattern :: <object>, string :: <string>,
      #key anchored  :: <boolean> = #f,
           start     :: <integer> = 0,
           end: _end :: <integer> = big.size)
  => (match :: false-or(<regex-match>));
 
 define method regex-search
-    (big :: <string>, pattern :: <string>,
+    (pattern :: <string>, string :: <string>,
      #key anchored  :: <boolean> = #f,
           start     :: <integer> = 0,
-          end: _end :: <integer> = big.size)
+          end: _end :: <integer> = string.size)
  => (match :: false-or(<regex-match>))
-  regex-search(big, compile-regex(pattern),
-                anchored: anchored, start: start, end: _end)
+  regex-search(compile-regex(pattern),
+               string,
+               anchored: anchored, start: start, end: _end)
 end method regex-search;
 
 define method regex-search
-    (big :: <string>, pattern :: <regex>,
+    (pattern :: <regex>, string :: <string>,
      #key anchored :: <boolean> = #f,
           start    :: <integer> = 0,
-          end: _end :: <integer> = big.size)
+          end: _end :: <integer> = string.size)
  => (match :: false-or(<regex-match>))
-  // Copied from regex-position with some mods to match our interface.
   // Unlike regex-position there is no caching.  If you don't want to
   // recompile your regex each time, compile it explicitly with compile-regex
   // and save it.
-  let substring = make(<substring>, string: big, start: start, end: _end);
+  let substring = make(<substring>, string: string, start: start, end: _end);
   let case-sensitive? = #t;
   let num-groups = pattern.regex-group-count;
   let (matched?, marks)

Modified: trunk/libraries/regular-expressions/tests/app-library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/app-library.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/app-library.dylan	Tue Dec 11 21:29:29 2007
@@ -3,11 +3,13 @@
 Synopsis:  An application library for regular-expressions-test-suite
 
 define library regular-expressions-test-suite-app
+  use common-dylan;
   use regular-expressions-test-suite;
   use testworks;
 end;
 
 define module regular-expressions-test-suite-app
+  use common-dylan;
   use regular-expressions-test-suite;
   use testworks;
 end;

Modified: trunk/libraries/regular-expressions/tests/library.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/library.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/library.dylan	Tue Dec 11 21:29:29 2007
@@ -4,6 +4,7 @@
   use common-dylan;
   use io,
     import: {
+      format-out,    // for debugging only
       streams
     };
   use regular-expressions;
@@ -21,6 +22,9 @@
 
 define module regular-expressions-test-suite
   use common-dylan,
+    rename: {
+      format-to-string => sprintf    // to long for 80 chars per line
+    },
     exclude: {
       split
     };
@@ -37,6 +41,7 @@
       environment-variable
     };
   use testworks;
+  use format-out;    // for debugging only
   use streams;
   use strings,
     import: {

Modified: trunk/libraries/regular-expressions/tests/pcre.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/pcre.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/pcre.dylan	Tue Dec 11 21:29:29 2007
@@ -20,14 +20,14 @@
         // Some multi-line regular expression patterns have empty lines and we
         // don't want to think that's the end of the section.
         if (lines.size > 0
-              & regex-search(lines[lines.size - 1], $group-regex))
+              & regex-search($group-regex, lines[lines.size - 1]))
           check-pcre-section(make(<section>,
                                   lines: lines,
                                   start-line-number: line-number - lines.size));
           lines := make(<stretchy-vector>);
         elseif (lines.size < 3)
-          // A section must have a regex, a test string and one group result to make
-          // any sense.  Note we don't add the line to the section here.
+          // A section must have a regex, a test string and one group result to
+          // make any sense.  Note we don't add the line to the section here.
           test-output("Line %d: empty line in first 3 lines of a section.\n",
                       line-number);
         else
@@ -41,8 +41,10 @@
 end function run-pcre-checks;
 
 define class <section> (<object>)
-  constant slot section-lines :: <sequence>, required-init-keyword: #"lines";
-  constant slot start-line-number :: <integer>, required-init-keyword: #"start-line-number";
+  constant slot section-lines :: <sequence>,
+    required-init-keyword: #"lines";
+  constant slot start-line-number :: <integer>,
+    required-init-keyword: #"start-line-number";
   slot %index :: <integer> = 0;
 end class <section>;
 
@@ -91,10 +93,11 @@
       block (done-with-this-test-string)
         while (#t)
           let line = peek-line(section);
-          let match = line & regex-search(line, $group-regex);
+          let match = line & regex-search($group-regex, line);
           if (match)
             consume-line(section);
-            let group-text = match-group(match, $group-index-of-what-pcre-matched);
+            let group-text = match-group(match,
+                                         $group-index-of-what-pcre-matched);
             //test-output("   pcre group: %s\n", group-text | "No match");
             if (group-text)
               add!(group-strings, group-text);
@@ -109,16 +112,17 @@
         end;
       end;
       if (regex)
-        check-no-errors(format-to-string("search for %s in %s",
-                                         test-string, regex.regex-pattern),
-                        regex-search(test-string, regex));
+        check-no-errors(sprintf("search for %s in %s",
+                                test-string, regex.regex-pattern),
+                        regex-search(regex, test-string));
         let match = block ()
-                      regex-search(test-string, regex)
+                      regex-search(regex, test-string)
                     exception (ex :: <error>)
                       #f
                     end;
         if (match)
-          compare-to-pcre-results(regex.regex-pattern, test-string, match, group-strings);
+          compare-to-pcre-results(regex.regex-pattern, test-string, match,
+                                  group-strings);
         end;
       end if;
     end while;
@@ -162,8 +166,7 @@
   let (pattern, flags) = read-pattern-and-flags();
   //test-output("pattern: %s (flags = %s)\n", pattern, flags);
   for (flag in flags)
-    check-true(format-to-string("For regex %s, flag %s is recognized",
-                                pattern, flag),
+    check-true(sprintf("For regex %s, flag %s is recognized", pattern, flag),
                member?(flag, "ixms"));
   end for;
   block ()
@@ -173,10 +176,11 @@
                   multi-line: member?('m', flags),
                   dot-matches-all: member?('s', flags))
   // Unfortunately we can't catch <regex-error> here because the charset
-  // parser is in string-extensions and signals <invalid-character-set-description>
-  // which isn't related to <regex-error> (and isn't even exported).
+  // parser is in string-extensions and signals
+  // <invalid-character-set-description> which isn't related to <regex-error>
+  // (and isn't even exported).
   exception (ex :: <error>)
-    check-true(format-to-string("can compile regex %s", pattern), #f);
+    check-true(sprintf("can compile regex %s", pattern), #f);
     //test-output("  ERROR: %s\n", ex);
     #f
   end block
@@ -194,8 +198,8 @@
      pcre-groups :: <sequence>)
  => ()
   if (match)
-    check-equal(format-to-string("Match %s against %s -- same # of groups",
-                                 test-string, pattern),
+    check-equal(sprintf("Match %s against %s -- same # of groups",
+                        test-string, pattern),
                 size(match-groups(match)),
                 pcre-groups.size);
     for (group-number from 0,
@@ -206,14 +210,14 @@
       let our-group = /* if (group-number < size(match-groups(match))) */
                         match-group(match, group-number)
                       /* end */;
-      check-equal(format-to-string("Match %s against %s -- group %d is the same",
-                                   test-string, pattern, group-number),
+      check-equal(sprintf("Match %s against %s -- group %d is the same",
+                          test-string, pattern, group-number),
                   our-group,
                   pcre-group);
     end;
   else
-    check-equal(format-to-string("Pattern %s doesn't match test string %s",
-                                 pattern, test-string),
+    check-equal(sprintf("Pattern %s doesn't match test string %s",
+                        pattern, test-string),
                 0,
                 pcre-groups.size);
   end if;

Modified: trunk/libraries/regular-expressions/tests/regression-tests.txt
==============================================================================
--- trunk/libraries/regular-expressions/tests/regression-tests.txt	(original)
+++ trunk/libraries/regular-expressions/tests/regression-tests.txt	Tue Dec 11 21:29:29 2007
@@ -20,6 +20,18 @@
     b]b
 No match
 
+/./
+    a
+ 0: a
+    \n
+No match
+
+/./s
+    a
+ 0: a
+    \n
+ 0: \n
+
 /a()b/
 
 /a()+b/

Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite-app.dylan	Tue Dec 11 21:29:29 2007
@@ -1,3 +1,9 @@
 Module: regular-expressions-test-suite-app
 
-run-test-application(regular-expressions-test-suite);
+define function main
+    () => ()
+  run-test-application(regular-expressions-test-suite);
+end;
+
+main();
+

Modified: trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan
==============================================================================
--- trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan	(original)
+++ trunk/libraries/regular-expressions/tests/regular-expressions-test-suite.dylan	Tue Dec 11 21:29:29 2007
@@ -1,64 +1,81 @@
 Module: regular-expressions-test-suite
 Author: Carl Gay
 
-define function re/position (string, pattern, #rest args)
-  let (#rest marks) = apply(regex-position, string, pattern, args);
+define function re/position (pattern, string, #rest args)
+  let (#rest marks) = apply(regex-position, pattern, string, args);
   marks
 end function re/position;
 
 define test atom-test ()
-  // In current code the empty string is an illegal regex, but Python
-  // (and probably perl?) allow it, so I think we should consider that
-  // a bug.  --cgay
   check-no-errors("atom-0", re/position("", ""));
   check-equal("atom-1", re/position("a", "a"),      #[0, 1]);
-  check-equal("atom-2", re/position("a", "[a]"),    #[0, 1]);
-  check-equal("atom-3", re/position("ab", "(a)b"),  #[0, 2, 0, 1]);
-  check-equal("atom-4", re/position("a", "\\w"),    #[0, 1]);
-  check-equal("atom-5", re/position("a", "."),      #[0, 1]);
-  check-equal("atom-6", re/position("a", "a{0}"),   #[0, 0]);
-  check-equal("atom-7", re/position("aa", "a{2}"),  #[0, 2]);
-  check-equal("atom-8", re/position("aa", "a{1,}"), #[0, 2]);
-  check-equal("atom-9", re/position("aaa", "a{1,8}"), #[0, 3]);
-  check-equal("atom-A", re/position("", "a{,}"),   #[0, 0]);
-  check-equal("atom-A1", re/position("aaaaaa", "a{,}"),   #[0, 6]);
-  check-condition("atom-B", <invalid-regex>, re/position("", "a{m,n}"));
-  check-condition("atom-C", <invalid-regex>, re/position("", "a{m,}"));
-  check-condition("atom-D", <invalid-regex>, re/position("", "a{,n}"));
-  check-condition("atom-E", <invalid-regex>, re/position("", "a{m}"));
-  check-condition("atom-F", <invalid-regex>, re/position("", "a{,"));
-  check-condition("atom-G", <invalid-regex>, re/position("", "[a"));
-  check-condition("atom-H", <invalid-regex>, re/position("", "\\"));
-  //check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
+  check-equal("atom-2", re/position("[a]", "a"),    #[0, 1]);
+  check-equal("atom-3", re/position("(a)b", "ab"),  #[0, 2, 0, 1]);
+  check-equal("atom-4", re/position("\\w", "a"),    #[0, 1]);
+  check-equal("atom-5", re/position(".", "a"),      #[0, 1]);
+  check-equal("atom-6", re/position("a{0}", "a"),   #[0, 0]);
+  check-equal("atom-7", re/position("a{2}", "aa"),  #[0, 2]);
+  check-equal("atom-8", re/position("a{1,}", "aa"), #[0, 2]);
+  check-equal("atom-9", re/position("a{1,8}", "aaa"), #[0, 3]);
+  check-equal("atom-A", re/position("a{,}", ""),   #[0, 0]);
+  check-equal("atom-A1", re/position("a{,}", "aaaaaa"),   #[0, 6]);
+  check-condition("atom-B", <invalid-regex>, re/position("a{m,n}", ""));
+  check-condition("atom-C", <invalid-regex>, re/position("a{m,}", ""));
+  check-condition("atom-D", <invalid-regex>, re/position("a{,n}", ""));
+  check-condition("atom-E", <invalid-regex>, re/position("a{m}", ""));
+  check-condition("atom-F", <invalid-regex>, re/position("a{,", ""));
+  check-condition("atom-G", <invalid-regex>, re/position("[a", ""));
+  check-condition("atom-H", <invalid-regex>, re/position("\\", ""));
+  check-equal("atom-tan", "\<44>\<79>\<6c>\<61>\<6e>", "Dylan");
 end;
 
-// These should all compile.
-//
-define test good-regex-test ()
-  let patterns = #(
-    "",
-    "a()b",
-    "a(?#blah)b"
-    );
-  for (pattern in patterns)
-    check-no-errors(format-to-string("Regex '%s' compiles", pattern),
-                    compile-regex(pattern));
+define function check-matches
+    (pattern, input-string, #rest groups-and-flags) => ()
+  let string? = rcurry(instance?, <string>);
+  let groups = choose(string?, groups-and-flags);
+  let flags = choose(complement(string?), groups-and-flags);
+  let regex = apply(compile-regex, pattern, flags);
+  let match = regex-search(regex, input-string);
+  if (empty?(groups))
+    check-false(sprintf("Regex '%s' matches '%s'", pattern, input-string),
+                match);
+  else
+    for (group in groups, i from 0)
+      check-equal(sprintf("Regex '%s' group %d is '%s'", pattern, i, group),
+                  group,
+                  if (match)
+                    match-group(match, i)
+                  else
+                    "{no such group}"
+                  end);
+    end;
   end;
-end test good-regex-test;
+end function check-matches;
+
+// These are to cover the basics, as I add new features to the code.
+// The PCRE tests should cover a lot of the more esoteric cases, I hope.
+//
+define test ad-hoc-regex-test ()
+  check-matches("", "abc", "");
+  check-matches("a()b", "ab", "ab", "");
+  check-matches("a(?#blah)b", "ab", "ab"); // comments shouldn't create a group
+  check-matches(".", "x", "x");
+  check-matches(".", "\n", "\n", dot-matches-all: #t);
+end test ad-hoc-regex-test;
 
 // All these regexes should signal <invalid-regex> on compilation.
 //
-define test bad-regex-test ()
+define test invalid-regex-test ()
   let patterns = #(
     "(?P<name>x)(?P<name>y)",         // can't use same name twice
     "(?@abc)"                         // invalid extended character '@'
     );
   for (pattern in patterns)
-    check-condition(format-to-string("Compiling '%s' gets an error", pattern),
+    check-condition(sprintf("Compiling '%s' gets an error", pattern),
                     <invalid-regex>,
                     compile-regex(pattern));
   end;
-end test bad-regex-test;
+end test invalid-regex-test;
 
 define test pcre-testoutput1 ()
   run-pcre-checks(make-pcre-locator("pcre-testoutput1.txt"));
@@ -74,11 +91,10 @@
 
 define suite regular-expressions-test-suite ()
   test atom-test;
-  test good-regex-test;
-  test bad-regex-test;
+  test ad-hoc-regex-test;
+  test invalid-regex-test;
   test regressions-test;
-  // For now leaving out the PCRE tests because they're too verbose.
-  // Once more basic stuff is working will start including them again.
-  // suite pcre-test-suite;
+  // It's sometimes useful to use -ignore-suite to skip this one because it's so noisy.
+  suite pcre-test-suite;
 end;
 



More information about the chatter mailing list