[Gd-chatter] r11377 - trunk/libraries/regular-expressions
cgay at gwydiondylan.org
cgay at gwydiondylan.org
Sun May 27 06:00:32 CEST 2007
Author: cgay
Date: Sun May 27 06:00:28 2007
New Revision: 11377
Added:
trunk/libraries/regular-expressions/gd-library.dylan (contents, props changed)
trunk/libraries/regular-expressions/gd-regular-expressions.lid (contents, props changed)
trunk/libraries/regular-expressions/interface.dylan
- copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/interface.dylan
trunk/libraries/regular-expressions/library.dylan
- copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/library.dylan
trunk/libraries/regular-expressions/match.dylan
- copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/match.dylan
trunk/libraries/regular-expressions/parse.dylan
- copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/parse.dylan
trunk/libraries/regular-expressions/regular-expressions.lid
- copied, changed from r11362, trunk/fundev/sources/lib/regular-expressions/regular-expressions.lid
Log:
job: 7357
Committing the few changes made so far to merge the regular-expressions
libraries for OD and GD. Not committing the registry entry yet since that
could cause someone to get the wrong version. I started with the OD regex
sources, so one could diff with that to see what changes were made.
Added: trunk/libraries/regular-expressions/gd-library.dylan
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/gd-library.dylan Sun May 27 06:00:28 2007
@@ -0,0 +1,63 @@
+module: dylan-user
+author: Nick Kramer (nkramer at cs.cmu.edu)
+synopsis: Contains the library and module definitions for the Regular
+ Expressions library.
+copyright: see below
+
+//======================================================================
+//
+// Copyright (c) 1994 Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000 Gwydion Dylan Maintainers
+// All rights reserved.
+//
+// Use and copying of this software and preparation of derivative
+// works based on this software are permitted, including commercial
+// use, provided that the following conditions are observed:
+//
+// 1. This copyright notice must be retained in full on any copies
+// and on appropriate parts of any derivative works.
+// 2. Documentation (paper or online) accompanying any system that
+// incorporates this software, or any part of it, must acknowledge
+// the contribution of the Gwydion Project at Carnegie Mellon
+// University, and the Gwydion Dylan Maintainers.
+//
+// This software is made available "as is". Neither the authors nor
+// Carnegie Mellon University make any warranty about the software,
+// its performance, or its conformity to any specification.
+//
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation.
+//
+//======================================================================
+
+
+define library regular-expressions
+ use dylan;
+ use collection-extensions;
+ use string-extensions;
+ use table-extensions;
+ export
+ regular-expressions;
+end library regular-expressions;
+
+define module regular-expressions
+ use dylan;
+ use extensions;
+ use string-conversions;
+ use character-type;
+ use string-hacking;
+ use subseq;
+ use %do-replacement;
+ use %parse-string;
+ use substring-search;
+ use table-extensions, import: { string-hash };
+ export
+ regexp-position, make-regexp-positioner,
+ regexp-replace, make-regexp-replacer,
+ regexp-match, regexp-matches,
+ translate, make-translator,
+ split, make-splitter,
+ join,
+ <illegal-regexp>;
+end module regular-expressions;
Added: trunk/libraries/regular-expressions/gd-regular-expressions.lid
==============================================================================
--- (empty file)
+++ trunk/libraries/regular-expressions/gd-regular-expressions.lid Sun May 27 06:00:28 2007
@@ -0,0 +1,6 @@
+library: regular-expressions
+unique-id-base: 1400
+files: gd-library
+ match
+ parse
+ interface
Copied: trunk/libraries/regular-expressions/interface.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/interface.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/interface.dylan (original)
+++ trunk/libraries/regular-expressions/interface.dylan Sun May 27 06:00:28 2007
@@ -3,13 +3,12 @@
synopsis: This provides a useable interface for users. Functions
defined outside this file are really too strange and quirky
to be of use to people.
-copyright: Copyright (C) 1994, Carnegie Mellon University.
- All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/interface.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
//======================================================================
//
// Copyright (c) 1994 Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000 Gwydion Dylan Maintainers
// All rights reserved.
//
// Use and copying of this software and preparation of derivative
@@ -21,14 +20,15 @@
// 2. Documentation (paper or online) accompanying any system that
// incorporates this software, or any part of it, must acknowledge
// the contribution of the Gwydion Project at Carnegie Mellon
-// University.
+// University, and the Gwydion Dylan Maintainers.
//
// This software is made available "as is". Neither the authors nor
// Carnegie Mellon University make any warranty about the software,
// its performance, or its conformity to any specification.
//
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation.
//
//======================================================================
@@ -90,40 +90,28 @@
// weak pointers to these strings. In practice, however, most of the
// regexp strings are literals, so this isn't usually a drawback.
//
-// For speed, we compare strings with == rather than = (thus
-// object-table). Again, because in practice we're dealing mostly
-// with literals, == and = should be almost identical.
-//
+// This used to compare strings with == rather than =, but this leaks
+// lots of memory
+//
define class <regexp-cache> (<table>) end;
// table-protocol{<regexp-cache>} -- method on imported G.F.
//
define method table-protocol (table :: <regexp-cache>)
=> (equal? :: <function>, hash :: <function>);
- /*
- values(method (key1 :: <cache-key>, key2 :: <cache-key>) // equal?
- => res :: <boolean>;
- key1.regexp-string == key2.regexp-string
- & key1.character-set-type == key2.character-set-type;
- end method,
- method (key :: <cache-key>) => (id :: <integer>, state); // hash()
- let (string-id, string-state) = object-hash(key.regexp-string);
- let (set-type-id, set-type-state)
- = object-hash(key.character-set-type);
- merge-hash-codes(string-id, string-state,
- set-type-id, set-type-state, ordered: #t);
- end method);
- */
values(method (key1 :: <cache-key>, key2 :: <cache-key>) // equal?
=> res :: <boolean>;
- key1.regexp-string == key2.regexp-string
+ key1.regexp-string = key2.regexp-string
& key1.character-set-type == key2.character-set-type;
end method,
- method (key :: <cache-key>, initial-state) => (id :: <integer>, state); // hash()
- let (string-id, string-state) = object-hash(key.regexp-string, initial-state);
+ method (key :: <cache-key>, initial-state)
+ => (id :: <integer>, state); // hash()
+ let (string-id, string-state)
+ = string-hash(key.regexp-string, initial-state);
let (set-type-id, set-type-state)
= object-hash(key.character-set-type, string-state);
- values(merge-hash-ids(string-id, set-type-id, ordered: #t), set-type-state);
+ let id = merge-hash-ids(string-id, set-type-id, ordered: #t);
+ values(id, set-type-state);
end method);
end method table-protocol;
@@ -143,8 +131,8 @@
=> (parsed-regexp :: <parsed-regexp>, last-group :: <integer>);
let key = make(<cache-key>, regexp-string: regexp,
character-set-type: character-set-type);
- let (cached?, cached-value) = key-exists?(*regexp-cache*, key);
- if (cached?)
+ let cached-value = element(*regexp-cache*, key, default: #f);
+ if (cached-value)
values(cached-value.parse-tree, cached-value.last-group);
else
let (parsed-regexp, last-group) = parse(regexp, character-set-type);
@@ -154,18 +142,6 @@
end if;
end function parse-or-use-cached;
-// KJP: added
-//
-define inline function key-exists? (table :: <table>, key :: <object>)
- => (exists? :: <boolean>, value :: <object>)
- let value = element(table, key, default: unfound());
- if (found?(value))
- values(#t, value)
- else
- values(#f, #f)
- end;
-end function;
-
// Regexp positioner stuff
@@ -244,8 +220,6 @@
apply(values, result)
end;
-// #if (have-free-time)
-/*
// regexp-matches -- exported
//
// A more convenient form of regexp-position. Usually you want
@@ -258,47 +232,28 @@
(big :: <string>, regexp :: <string>,
#key start: start-index :: <integer> = 0,
end: end-index :: false-or(<integer>),
- case-sensitive :: <boolean> = #f,
- groups :: false-or(<sequence>))
- => (#rest group-strings :: false-or(<string>));
- if (~groups)
- error("Mandatory keyword groups: not used in call to regexp-matches");
- end if;
- let (#rest marks)
+ case-sensitive :: <boolean> = #f)
+
+ let (regexp-start, lemon, #rest marks)
= regexp-position(big, regexp, start: start-index, end: end-index,
case-sensitive: case-sensitive);
- let return-val = make(<vector>, size: groups.size, fill: #f);
- for (index from 0 below return-val.size)
- let group-start = groups[index] * 2;
- let group-end = group-start + 1;
- if (element(marks, group-start, default: #f))
- return-val[index] := copy-sequence(big, start:
-
- let sz = floor/(marks.size, 2);
- let return = make(<vector>, size: sz, fill: #f);
- for (index from 0 below sz)
- let pos = index * 2;
- if (element(marks, pos, default: #f))
- return[index] := copy-sequence(big, start: marks[pos],
- end: marks[pos + 1]);
- end if;
- end for;
- if (matches)
- let return = make(<vector>, size: matches.size * 2);
- for (raw-pos in matches, index from 0)
- let src-pos = raw-pos * 2;
- let dest-pos = index * 2;
- return[dest-pos] := element(marks, src-pos, default: #f);
- return[dest-pos + 1] := element(marks, src-pos + 1, default: #f);
+
+ let return-size = floor/(marks.size, 2);
+ let return = make(<vector>, size: return-size, fill: #f);
+ if (regexp-start)
+ // all groups associate by index in the result
+
+ for (index from 0 below return-size)
+ let pos = index * 2;
+ if (element(marks, pos, default: #f))
+ // "14 0", "4 5", "7 8", "9 10" for "this is a test"
+ //return[index] := concatenate(integer-to-string(marks[pos]), concatenate(" ", integer-to-string(marks[pos + 1])));
+ return[index] := copy-sequence(big, start: marks[pos], end: marks[pos + 1]);
+ end if;
end for;
- apply(values, return);
- else
-
- apply(values, marks);
end if;
-
-// #endif
-*/
+ apply(values, return);
+end function;
// Functions based on regexp-position
Copied: trunk/libraries/regular-expressions/library.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/library.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/library.dylan (original)
+++ trunk/libraries/regular-expressions/library.dylan Sun May 27 06:00:28 2007
@@ -2,13 +2,12 @@
author: Nick Kramer (nkramer at cs.cmu.edu)
synopsis: Contains the library and module definitions for the Regular
Expressions library.
-copyright: Copyright (C) 1994, Carnegie Mellon University.
- All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/library.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
//======================================================================
//
// Copyright (c) 1994 Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000 Gwydion Dylan Maintainers
// All rights reserved.
//
// Use and copying of this software and preparation of derivative
@@ -20,28 +19,29 @@
// 2. Documentation (paper or online) accompanying any system that
// incorporates this software, or any part of it, must acknowledge
// the contribution of the Gwydion Project at Carnegie Mellon
-// University.
+// University, and the Gwydion Dylan Maintainers.
//
// This software is made available "as is". Neither the authors nor
// Carnegie Mellon University make any warranty about the software,
// its performance, or its conformity to any specification.
//
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation.
//
//======================================================================
define library regular-expressions
use functional-dylan;
- // use collection-extensions; // KJP: removed
use string-extensions;
export
regular-expressions;
end library regular-expressions;
define module regular-expressions
- use functional-dylan;
+ use functional-dylan, exclude: { split };
+ use dylan-extensions, import: { string-hash };
// use extensions;
use string-conversions;
use character-type;
@@ -52,7 +52,7 @@
use substring-search;
export
regexp-position, make-regexp-positioner,
- regexp-match,
+ regexp-match, regexp-matches,
regexp-replace, make-regexp-replacer,
translate, make-translator,
split, make-splitter,
Copied: trunk/libraries/regular-expressions/match.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/match.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/match.dylan (original)
+++ trunk/libraries/regular-expressions/match.dylan Sun May 27 06:00:28 2007
@@ -2,13 +2,12 @@
author: Nick Kramer (nkramer at cs.cmu.edu)
synopsis: This takes a parsed regular expression and tries to find a match
for it.
-copyright: Copyright (C) 1994, Carnegie Mellon University.
- All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/match.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
//======================================================================
//
// Copyright (c) 1994 Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000 Gwydion Dylan Maintainers
// All rights reserved.
//
// Use and copying of this software and preparation of derivative
@@ -20,14 +19,15 @@
// 2. Documentation (paper or online) accompanying any system that
// incorporates this software, or any part of it, must acknowledge
// the contribution of the Gwydion Project at Carnegie Mellon
-// University.
+// University, and the Gwydion Dylan Maintainers.
//
// This software is made available "as is". Neither the authors nor
// Carnegie Mellon University make any warranty about the software,
// its performance, or its conformity to any specification.
//
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation.
//
//======================================================================
@@ -69,7 +69,6 @@
// backtrack until it gets to match-root?'s "fail" non-local exit.
define class <substring> (<object>)
- // KJP: Constant slots.
constant slot entire-string :: <string>, required-init-keyword: #"string";
constant slot start-index :: <integer>, required-init-keyword: #"start";
constant slot end-index :: <integer>, required-init-keyword: #"end";
Copied: trunk/libraries/regular-expressions/parse.dylan (from r11362, trunk/fundev/sources/lib/regular-expressions/parse.dylan)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/parse.dylan (original)
+++ trunk/libraries/regular-expressions/parse.dylan Sun May 27 06:00:28 2007
@@ -1,12 +1,11 @@
module: regular-expressions
author: Nick Kramer (nkramer at cs.cmu.edu)
-copyright: Copyright (C) 1994, Carnegie Mellon University.
- All rights reserved.
-rcs-header: $Header: /scm/cvs/fundev/Sources/lib/regular-expressions/parse.dylan,v 1.1 2004/03/12 00:08:52 cgay Exp $
+copyright: see below
//======================================================================
//
// Copyright (c) 1994 Carnegie Mellon University
+// Copyright (c) 1998, 1999, 2000 Gwydion Dylan Maintainers
// All rights reserved.
//
// Use and copying of this software and preparation of derivative
@@ -18,14 +17,15 @@
// 2. Documentation (paper or online) accompanying any system that
// incorporates this software, or any part of it, must acknowledge
// the contribution of the Gwydion Project at Carnegie Mellon
-// University.
+// University, and the Gwydion Dylan Maintainers.
//
// This software is made available "as is". Neither the authors nor
// Carnegie Mellon University make any warranty about the software,
// its performance, or its conformity to any specification.
//
-// Bug reports, questions, comments, and suggestions should be sent by
-// E-mail to the Internet address "gwydion-bugs at cs.cmu.edu".
+// Bug reports should be sent to <gd-bugs at gwydiondylan.org>; questions,
+// comments and suggestions are welcome at <gd-hackers at gwydiondylan.org>.
+// Also, see http://www.gwydiondylan.org/ for updates and documentation.
//
//======================================================================
@@ -121,6 +121,7 @@
define sealed domain make (singleton(<illegal-regexp>));
define sealed domain initialize (<illegal-regexp>);
+// cgay todo
/* KJP: Doesn't work this way in Functional Developer.
define sealed method report-condition (cond :: <illegal-regexp>, stream) => ();
condition-format(stream, "Illegal regular expression: \n"
@@ -129,7 +130,7 @@
cond.regular-expression);
end method report-condition;
*/
-ignorable(regular-expression);
+//ignorable(regular-expression);
define method parse (regexp :: <string>, character-set-type :: <class>)
=> (parsed-regexp :: <parsed-regexp>, last-group :: <integer>,
@@ -173,7 +174,7 @@
end method parse-alternative;
define method parse-quantified-atom (s :: <parse-string>, info :: <parse-info>)
- => parsed-regexp :: <parsed-regexp>;
+ => parsed-regexp :: false-or(<parsed-regexp>);
let atom = parse-atom(s, info);
let char = lookahead(s);
select (char by \=)
@@ -209,14 +210,15 @@
end if;
end for;
consume(s); // Eat closing brace
+ let first-num = string-to-integer(as(<byte-string>, first-string));
make(<quantified-atom>, atom: atom,
- min: sequence-to-integer(first-string), // KJP: string-to -> sequence-to
+ min: first-num,
max: if (~has-comma)
- sequence-to-integer(first-string)
+ first-num
elseif (empty?(second-string))
#f
else
- sequence-to-integer(second-string)
+ string-to-integer(as(<byte-string>, second-string))
end if);
otherwise =>
@@ -224,14 +226,8 @@
end select;
end method parse-quantified-atom;
-// KJP: added, quickie
-//
-define method sequence-to-integer (seq :: <deque>) => (int :: <integer>)
- string-to-integer(as(<byte-string>, seq));
-end method sequence-to-integer;
-
define method parse-atom (s :: <parse-string>, info :: <parse-info>)
- => parsed-regexp :: <parsed-regexp>;
+ => parsed-regexp :: false-or(<parsed-regexp>);
let char = lookahead(s);
select (char)
'(' =>
Copied: trunk/libraries/regular-expressions/regular-expressions.lid (from r11362, trunk/fundev/sources/lib/regular-expressions/regular-expressions.lid)
==============================================================================
--- trunk/fundev/sources/lib/regular-expressions/regular-expressions.lid (original)
+++ trunk/libraries/regular-expressions/regular-expressions.lid Sun May 27 06:00:28 2007
@@ -1,5 +1,4 @@
-library: Regular-expressions
-unique-id-base: 1400
+library: regular-expressions
files: library.dylan
match.dylan
parse.dylan
More information about the chatter
mailing list