[Gd-chatter] r11377 - trunk/libraries/regular-expressions

cgay at gwydiondylan.org cgay at gwydiondylan.org
Sun May 27 06:00:32 CEST 2007


Author: cgay
Date: Sun May 27 06:00:28 2007
New Revision: 11377

Added:
   trunk/libraries/regular-expressions/gd-library.dylan   (contents, props changed)
   trunk/libraries/regular-expressions/gd-regular-expressions.lid   (contents, props changed)
   trunk/libraries/regular-expressions/interface.dylan
      - copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/interface.dylan
   trunk/libraries/regular-expressions/library.dylan
      - copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/library.dylan
   trunk/libraries/regular-expressions/match.dylan
      - copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/match.dylan
   trunk/libraries/regular-expressions/parse.dylan
      - copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/parse.dylan
   trunk/libraries/regular-expressions/regular-expressions.lid
      - copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/regular-expressions.lid
Log:
job: 7357
Committing the few changes made so far to merge the regular-expressions
libraries for OD and GD.  Not committing the registry entry yet since that
could cause someone to get the wrong version.  I started with the OD regex
sources, so one could diff with that to see what changes were made.

Added: trunk/libraries/regular-expressions/gd-library.dylan
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/gd-library.dylan	Sun May 27 06:00:28 2007
@@ -0,0 +1,63 @@
+module:     dylan-user
+author:     Nick Kramer (nkramer at cs.cmu.edu)
+synopsis:   Contains the library and module definitions for the Regular
+            Expressions library.
+copyright: see below
+
+//======================================================================
+//
+// Copyright (c) 1994  Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000  Gwydion Dylan Maintainers
+// All rights reserved.
+// 
+// Use and copying of this software and preparation of derivative
+// works based on this software are permitted, including commercial
+// use, provided that the following conditions are observed:
+// 
+// 1. This copyright notice must be retained in full on any copies
+//    and on appropriate parts of any derivative works.
+// 2. Documentation (paper or online) accompanying any system that
+//    incorporates this software, or any part of it, must acknowledge
+//    the contribution of the Gwydion Project at Carnegie Mellon
+//    University, and the Gwydion Dylan Maintainers.
+// 
+// This software is made available "as is".  Neither the authors nor
+// Carnegie Mellon University make any warranty about the software,
+// its performance, or its conformity to any specification.
+// 
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation. 
+//
+//======================================================================
+
+
+define library regular-expressions
+  use dylan;
+  use collection-extensions;
+  use string-extensions;
+  use table-extensions;
+  export
+    regular-expressions;
+end library regular-expressions;
+
+define module regular-expressions
+  use dylan;
+  use extensions;
+  use string-conversions;
+  use character-type;
+  use string-hacking;
+  use subseq;
+  use %do-replacement;
+  use %parse-string;
+  use substring-search;
+  use table-extensions, import: { string-hash };
+  export
+    regexp-position, make-regexp-positioner,
+    regexp-replace, make-regexp-replacer,
+    regexp-match, regexp-matches,
+    translate, make-translator,
+    split, make-splitter,
+    join,
+    <illegal-regexp>;
+end module regular-expressions;

Added: trunk/libraries/regular-expressions/gd-regular-expressions.lid
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/gd-regular-expressions.lid	Sun May 27 06:00:28 2007
@@ -0,0 +1,6 @@
+library: regular-expressions
+unique-id-base: 1400
+files: gd-library
+       match
+       parse
+       interface

Copied: trunk/libraries/regular-expressions/interface.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/interface.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/interface.dylan	(original)
+++ trunk/libraries/regular-expressions/interface.dylan	Sun May 27 06:00:28 2007
@@ -3,13 +3,12 @@
 synopsis: This provides a useable interface for users. Functions 
 	  defined outside this file are really too strange and quirky 
           to be of use to people.
-copyright:  Copyright (C) 1994, Carnegie Mellon University.
-            All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/interface.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
 
 //======================================================================
 //
 // Copyright (c) 1994  Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000  Gwydion Dylan Maintainers
 // All rights reserved.
 // 
 // Use and copying of this software and preparation of derivative
@@ -21,14 +20,15 @@
 // 2. Documentation (paper or online) accompanying any system that
 //    incorporates this software, or any part of it, must acknowledge
 //    the contribution of the Gwydion Project at Carnegie Mellon
-//    University.
+//    University, and the Gwydion Dylan Maintainers.
 // 
 // This software is made available "as is".  Neither the authors nor
 // Carnegie Mellon University make any warranty about the software,
 // its performance, or its conformity to any specification.
 // 
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation. 
 //
 //======================================================================
 
@@ -90,40 +90,28 @@
 // weak pointers to these strings.  In practice, however, most of the
 // regexp strings are literals, so this isn't usually a drawback.
 //
-// For speed, we compare strings with == rather than = (thus
-// object-table).  Again, because in practice we're dealing mostly
-// with literals, == and = should be almost identical.
-//
+// This used to compare strings with == rather than =, but this leaks
+// lots of memory
+// 
 define class <regexp-cache> (<table>) end;
 
 // table-protocol{<regexp-cache>} -- method on imported G.F.
 //
 define method table-protocol (table :: <regexp-cache>) 
  => (equal? :: <function>, hash :: <function>);
-  /*
-  values(method (key1 :: <cache-key>, key2 :: <cache-key>) // equal?
-	  => res :: <boolean>;
-	   key1.regexp-string == key2.regexp-string
-	     & key1.character-set-type == key2.character-set-type;
-	 end method,
-	 method (key :: <cache-key>) => (id :: <integer>, state); // hash()
-	   let (string-id, string-state) = object-hash(key.regexp-string);
-	   let (set-type-id, set-type-state) 
-	     = object-hash(key.character-set-type);
-	   merge-hash-codes(string-id, string-state, 
-			    set-type-id, set-type-state, ordered: #t);
-	 end method);
-  */
   values(method (key1 :: <cache-key>, key2 :: <cache-key>) // equal?
 	  => res :: <boolean>;
-	   key1.regexp-string == key2.regexp-string
+	   key1.regexp-string = key2.regexp-string
 	     & key1.character-set-type == key2.character-set-type;
 	 end method,
-	 method (key :: <cache-key>, initial-state) => (id :: <integer>, state); // hash()
-	   let (string-id, string-state) = object-hash(key.regexp-string, initial-state);
+	 method (key :: <cache-key>, initial-state)
+	  => (id :: <integer>, state); // hash()
+	   let (string-id, string-state)
+	     = string-hash(key.regexp-string, initial-state);
 	   let (set-type-id, set-type-state) 
 	     = object-hash(key.character-set-type, string-state);
-	   values(merge-hash-ids(string-id, set-type-id, ordered: #t), set-type-state);
+	   let id = merge-hash-ids(string-id, set-type-id, ordered: #t);
+	   values(id, set-type-state);
 	 end method);
 end method table-protocol;
 
@@ -143,8 +131,8 @@
  => (parsed-regexp :: <parsed-regexp>, last-group :: <integer>);
   let key = make(<cache-key>, regexp-string: regexp, 
 		 character-set-type: character-set-type); 
-  let (cached?, cached-value) = key-exists?(*regexp-cache*, key);
-  if (cached?)
+  let cached-value = element(*regexp-cache*, key, default: #f);
+  if (cached-value)
     values(cached-value.parse-tree, cached-value.last-group);
   else
     let (parsed-regexp, last-group) = parse(regexp, character-set-type);
@@ -154,18 +142,6 @@
   end if;
 end function parse-or-use-cached;
 
-// KJP: added
-//
-define inline function key-exists? (table :: <table>, key :: <object>)
- => (exists? :: <boolean>, value :: <object>)
-  let value = element(table, key, default: unfound());
-  if (found?(value))
-    values(#t, value)
-  else
-    values(#f, #f)
-  end;
-end function;
-
 
 // Regexp positioner stuff
 
@@ -244,8 +220,6 @@
   apply(values, result)
 end;
 
-// #if (have-free-time)
-/*
 // regexp-matches -- exported
 //
 // A more convenient form of regexp-position.  Usually you want
@@ -258,47 +232,28 @@
     (big :: <string>, regexp :: <string>,
      #key start: start-index :: <integer> = 0,
           end: end-index :: false-or(<integer>),
-          case-sensitive :: <boolean> = #f,
-          groups :: false-or(<sequence>))
- => (#rest group-strings :: false-or(<string>));
-  if (~groups)
-    error("Mandatory keyword groups: not used in call to regexp-matches");
-  end if;
-  let (#rest marks)
+          case-sensitive :: <boolean> = #f)
+
+  let (regexp-start, lemon, #rest marks)
     = regexp-position(big, regexp, start: start-index, end: end-index, 
 		      case-sensitive: case-sensitive);
-  let return-val = make(<vector>, size: groups.size, fill: #f);
-  for (index from 0 below return-val.size)
-    let group-start = groups[index] * 2;
-    let group-end = group-start + 1;
-    if (element(marks, group-start, default: #f))
-      return-val[index] := copy-sequence(big, start: 
-
-  let sz = floor/(marks.size, 2);
-  let return = make(<vector>, size: sz, fill: #f);
-  for (index from 0 below sz)
-    let pos = index * 2;
-    if (element(marks, pos, default: #f))
-      return[index] := copy-sequence(big, start: marks[pos],
-				     end: marks[pos + 1]);
-    end if;
-  end for;
-  if (matches)
-    let return = make(<vector>, size: matches.size * 2);
-    for (raw-pos in matches, index from 0)
-      let src-pos = raw-pos * 2;
-      let dest-pos = index * 2;
-      return[dest-pos] := element(marks, src-pos, default: #f);
-      return[dest-pos + 1] := element(marks, src-pos + 1, default: #f);
+
+  let return-size = floor/(marks.size, 2);
+  let return = make(<vector>, size: return-size, fill: #f);
+  if (regexp-start)
+    // all groups associate by index in the result
+
+    for (index from 0 below return-size)
+      let pos = index * 2;
+      if (element(marks, pos, default: #f))
+        // "14 0", "4 5", "7 8", "9 10" for "this is a test"
+        //return[index] := concatenate(integer-to-string(marks[pos]), concatenate(" ", integer-to-string(marks[pos + 1])));
+        return[index] := copy-sequence(big, start: marks[pos], end: marks[pos + 1]);
+      end if;
     end for;
-    apply(values, return);
-  else
-    
-    apply(values, marks);
   end if;
-
-// #endif
-*/
+  apply(values, return);
+end function;
 
 
 // Functions based on regexp-position

Copied: trunk/libraries/regular-expressions/library.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/library.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/library.dylan	(original)
+++ trunk/libraries/regular-expressions/library.dylan	Sun May 27 06:00:28 2007
@@ -2,13 +2,12 @@
 author:     Nick Kramer (nkramer at cs.cmu.edu)
 synopsis:   Contains the library and module definitions for the Regular
             Expressions library.
-copyright:  Copyright (C) 1994, Carnegie Mellon University.
-            All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/library.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
 
 //======================================================================
 //
 // Copyright (c) 1994  Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000  Gwydion Dylan Maintainers
 // All rights reserved.
 // 
 // Use and copying of this software and preparation of derivative
@@ -20,28 +19,29 @@
 // 2. Documentation (paper or online) accompanying any system that
 //    incorporates this software, or any part of it, must acknowledge
 //    the contribution of the Gwydion Project at Carnegie Mellon
-//    University.
+//    University, and the Gwydion Dylan Maintainers.
 // 
 // This software is made available "as is".  Neither the authors nor
 // Carnegie Mellon University make any warranty about the software,
 // its performance, or its conformity to any specification.
 // 
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation. 
 //
 //======================================================================
 
 
 define library regular-expressions
   use functional-dylan;
-  // use collection-extensions; // KJP: removed
   use string-extensions;
   export
     regular-expressions;
 end library regular-expressions;
 
 define module regular-expressions
-  use functional-dylan;
+  use functional-dylan, exclude: { split };
+  use dylan-extensions, import: { string-hash };
   // use extensions;
   use string-conversions;
   use character-type;
@@ -52,7 +52,7 @@
   use substring-search;
   export
     regexp-position, make-regexp-positioner,
-    regexp-match,
+    regexp-match, regexp-matches,
     regexp-replace, make-regexp-replacer,
     translate, make-translator,
     split, make-splitter,

Copied: trunk/libraries/regular-expressions/match.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/match.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/match.dylan	(original)
+++ trunk/libraries/regular-expressions/match.dylan	Sun May 27 06:00:28 2007
@@ -2,13 +2,12 @@
 author:   Nick Kramer (nkramer at cs.cmu.edu)
 synopsis: This takes a parsed regular expression and tries to find a match
           for it.
-copyright:  Copyright (C) 1994, Carnegie Mellon University.
-            All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/match.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
 
 //======================================================================
 //
 // Copyright (c) 1994  Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000  Gwydion Dylan Maintainers
 // All rights reserved.
 // 
 // Use and copying of this software and preparation of derivative
@@ -20,14 +19,15 @@
 // 2. Documentation (paper or online) accompanying any system that
 //    incorporates this software, or any part of it, must acknowledge
 //    the contribution of the Gwydion Project at Carnegie Mellon
-//    University.
+//    University, and the Gwydion Dylan Maintainers.
 // 
 // This software is made available "as is".  Neither the authors nor
 // Carnegie Mellon University make any warranty about the software,
 // its performance, or its conformity to any specification.
 // 
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation. 
 //
 //======================================================================
 
@@ -69,7 +69,6 @@
 // backtrack until it gets to match-root?'s "fail" non-local exit.
 
 define class <substring> (<object>)
-  // KJP: Constant slots.
   constant slot entire-string :: <string>, required-init-keyword: #"string";
   constant slot start-index :: <integer>, required-init-keyword: #"start";
   constant slot end-index :: <integer>, required-init-keyword: #"end";

Copied: trunk/libraries/regular-expressions/parse.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/parse.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/parse.dylan	(original)
+++ trunk/libraries/regular-expressions/parse.dylan	Sun May 27 06:00:28 2007
@@ -1,12 +1,11 @@
 module: regular-expressions
 author: Nick Kramer (nkramer at cs.cmu.edu)
-copyright:  Copyright (C) 1994, Carnegie Mellon University.
-            All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/parse.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
 
 //======================================================================
 //
 // Copyright (c) 1994  Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000  Gwydion Dylan Maintainers
 // All rights reserved.
 // 
 // Use and copying of this software and preparation of derivative
@@ -18,14 +17,15 @@
 // 2. Documentation (paper or online) accompanying any system that
 //    incorporates this software, or any part of it, must acknowledge
 //    the contribution of the Gwydion Project at Carnegie Mellon
-//    University.
+//    University, and the Gwydion Dylan Maintainers.
 // 
 // This software is made available "as is".  Neither the authors nor
 // Carnegie Mellon University make any warranty about the software,
 // its performance, or its conformity to any specification.
 // 
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation. 
 //
 //======================================================================
 
@@ -121,6 +121,7 @@
 define sealed domain make (singleton(<illegal-regexp>));
 define sealed domain initialize (<illegal-regexp>);
 
+// cgay todo
 /* KJP: Doesn't work this way in Functional Developer.
 define sealed method report-condition (cond :: <illegal-regexp>, stream) => ();
   condition-format(stream, "Illegal regular expression: \n"
@@ -129,7 +130,7 @@
 		   cond.regular-expression);
 end method report-condition;
 */
-ignorable(regular-expression);
+//ignorable(regular-expression);
 
 define method parse (regexp :: <string>, character-set-type :: <class>)
  => (parsed-regexp :: <parsed-regexp>, last-group :: <integer>,
@@ -173,7 +174,7 @@
 end method parse-alternative;
 
 define method parse-quantified-atom (s :: <parse-string>, info :: <parse-info>)
- => parsed-regexp :: <parsed-regexp>;
+ => parsed-regexp :: false-or(<parsed-regexp>);
   let atom = parse-atom(s, info);
   let char = lookahead(s);
   select (char by \=)
@@ -209,14 +210,15 @@
 	end if;
       end for;
       consume(s);         // Eat closing brace
+      let first-num = string-to-integer(as(<byte-string>, first-string));
       make(<quantified-atom>, atom: atom, 
-	   min: sequence-to-integer(first-string), // KJP: string-to -> sequence-to
+	   min: first-num,
 	   max:  if (~has-comma)    
-		   sequence-to-integer(first-string)
+		   first-num
 		 elseif (empty?(second-string))   
 		   #f
 		 else
-		   sequence-to-integer(second-string) 
+		   string-to-integer(as(<byte-string>, second-string))
 		 end if);
 
     otherwise =>
@@ -224,14 +226,8 @@
   end select;
 end method parse-quantified-atom;
 
-// KJP: added, quickie
-//
-define method sequence-to-integer (seq :: <deque>) => (int :: <integer>)
-  string-to-integer(as(<byte-string>, seq));
-end method sequence-to-integer;
-
 define method parse-atom (s :: <parse-string>, info :: <parse-info>)
- => parsed-regexp :: <parsed-regexp>;
+ => parsed-regexp :: false-or(<parsed-regexp>);
   let char = lookahead(s);
   select (char)
     '(' =>

Copied: trunk/libraries/regular-expressions/regular-expressions.lid (from r11362, trunk/fundev/sources/lib/regular-expressions/regular-expressions.lid)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/regular-expressions.lid	(original)
+++ trunk/libraries/regular-expressions/regular-expressions.lid	Sun May 27 06:00:28 2007
@@ -1,5 +1,4 @@
-library: Regular-expressions
-unique-id-base: 1400
+library: regular-expressions
 files: library.dylan
  match.dylan
  parse.dylan



More information about the chatter mailing list