[Gd-chatter] r11744 - trunk/libraries/utilities/peg-parser

agent at gwydiondylan.org agent at gwydiondylan.org
Mon Mar 24 00:41:47 CET 2008


Author: agent
Date: Mon Mar 24 00:41:46 2008
New Revision: 11744

Modified:
   trunk/libraries/utilities/peg-parser/parser-definers.dylan
   trunk/libraries/utilities/peg-parser/parser-support.dylan
Log:
Job: minor
Add "cleanup" and "afterwards" clauses to parsers like in blocks.
These let you do additional validation that considers context.


Modified: trunk/libraries/utilities/peg-parser/parser-definers.dylan
==============================================================================
--- trunk/libraries/utilities/peg-parser/parser-definers.dylan	(original)
+++ trunk/libraries/utilities/peg-parser/parser-definers.dylan	Mon Mar 24 00:41:46 2008
@@ -46,34 +46,72 @@
 /// token.
 ///
 /// The macro takes three forms. A form like
+///
 /// [code]
 /// define parser t (<c>)
 ///   rule many(t2) => tokens;
-///   slot content = tokens[1];
+///   inherited slot content = tokens[1];
+///   slot more-content :: <string> = tokens[2];
 /// end parser;
 /// [end code]
+///
 /// defines a rule parser named `parse-t` and a token class named `<t-token>`
-/// which inherits from `<c>` (optional) and `<token>` [code qv]. `<t-token>` will
-/// have a slot named `content` that gets initialized to the expression `tokens[1]`,
-/// where `tokens` is the product of the rule `many(t2)`.
+/// which inherits from `<c>` (optional) and `<token>`. `<t-token>` will have
+/// a slot named `content` (inherited from <c>) and a slot named `more-content`.
+/// When <t-token> is initialized, `tokens` gets set to the product of the rule
+/// `many(t2)`, `content` gets set to the expression `tokens[1]`, and
+/// `more-content` gets set to the expression `tokens[2]`, which must be a
+/// <string>.
 /// 
 /// A form like 
+///
 /// [code]
 /// define parser t
 ///   rule many(t2) => tokens;
 ///   yield tokens[1];
 /// end parser;
 /// [end code]
+///
 /// defines a rule parser that returns `tokens[1]` directly, without defining
 /// a `<t-token>` class.
 ///
 /// A form like
+///
 /// [code]
 /// define parser t
 ///   rule many(t2)
 /// end parser;
 /// [end code]
+///
 /// defines a rule parser that return `#"t"`.
+///
+/// All three forms allow two additional clauses, "afterwards" and "cleanup",
+/// that perform actions after the rule parser matches or fails to match.
+///
+/// [code]
+/// define parser t
+///   rule many(t2) => tokens;
+///   slot t2 = tokens.count
+///   afterwards (context, tokens)
+///     // Executes if match is successful. 'tokens' is local to this clause.
+///     context.total-t2-count := context.total-t2-count + tokens.size
+///   cleanup (context)
+///     // Executes if match is successful or not after 'afterwards' clause.
+///     // 'tokens' is not accessible.
+///     context.tried-t? := #t
+/// end parser;
+/// [end code]
+///
+/// [code]
+/// define parser t
+///   rule many(t2);
+///   afterwards (context, tokens)
+///     // The product of this parser is #"t", but "tokens" will be the
+///     // product of many(t2).
+///     ...
+/// end parser;
+/// [end code]
+///
 define macro parser-definer
 
    //
@@ -81,18 +119,18 @@
    {
       define parser ?token-name:name (?supers)
          rule ?rule => ?product-name:name;
-         ?class-slots
+         ?class-slots-and-clauses
       end
    } => {
       // Define the class.
-      class-specifier(?token-name; ?supers; ?class-slots);
+      class-specifier(?token-name; ?supers; ?class-slots-and-clauses);
       
       // Initialize the class's slots based on results of rule.
       define method initialize (?token-name :: "<" ## ?token-name ## "-token>",
             #next next-method, #key ?product-name = unsupplied())
          next-method();
          if (supplied?(?product-name))
-            slot-initializers(?token-name; ?class-slots)
+            slot-initializers(?token-name; ?class-slots-and-clauses)
          end if;
       end method;
       
@@ -109,13 +147,18 @@
          let pos = stream.stream-position;
          let production =
                block()
-                  ?token-name ## "-parser-rule" (stream, context)
+                  // User-defined match action on rule product
+                  "match-" ## ?token-name
+                        (context, ?token-name ## "-parser-rule" (stream, context))
                afterwards
                   format-trace(?"token-name" ## " matched chars %x-%x",
                                pos, stream.stream-position);
                cleanup
                   outdent-trace();
+                  // User-defined cleanup actions
+                  "cleanup-" ## ?token-name (context);
                exception (err :: <parse-failure>)
+                  unless (err.failure-position) err.failure-position := pos end;
                   err.parse-expected :=
                         concatenate!(err.parse-expected, " in " ## ?"token-name");
                   indent-trace();
@@ -128,6 +171,9 @@
               start: pos, end: stream.stream-position, ?product-name: production)
       end function;
       
+      // User defined action functions
+      user-functions(?token-name; ?class-slots-and-clauses);
+      
       // Names for the parser.
       *rule-names*["parse-" ## ?token-name] := "?token-name";
       *rule-name-parts*["parse-" ## ?token-name] := #( ?"token-name" );
@@ -139,6 +185,7 @@
       define parser ?token-name:name
          rule ?rule => ?product-name:name;
          yield ?:expression;
+         ?body-clauses
       end
    } => {
       // Define the parser rule as the result of all the 'seq' etc. functions.
@@ -153,13 +200,18 @@
          let pos = stream.stream-position;
          let ?product-name =
                block()
-                  ?token-name ## "-parser-rule" (stream, context)
+                  // User-defined match action on rule product
+                  "match-" ## ?token-name
+                        (context, ?token-name ## "-parser-rule" (stream, context))
                afterwards
                   format-trace(?"token-name" ## " matched chars %x-%x",
                                pos, stream.stream-position);
                cleanup
                   outdent-trace();
+                  // User-defined cleanup actions
+                  "cleanup-" ## ?token-name (context);
                exception (err :: <parse-failure>)
+                  unless (err.failure-position) err.failure-position := pos end;
                   err.parse-expected :=
                         concatenate!(err.parse-expected, " in " ## ?"token-name");
                   indent-trace();
@@ -171,6 +223,9 @@
          ?expression
       end function;
       
+      // User defined action functions
+      user-functions(?token-name; ?body-clauses);
+      
       // Names for parser.
       *rule-names*["parse-" ## ?token-name] := "?token-name";
       *rule-name-parts*["parse-" ## ?token-name] := #( ?"token-name" );
@@ -181,6 +236,7 @@
    {
       define parser ?token-name:name
          rule ?rule;
+         ?body-clauses
       end
    } => {
       // Define the parser rule as the result of all the 'seq' etc. functions.
@@ -194,13 +250,18 @@
          format-trace(?"token-name" ## "...");
          let pos = stream.stream-position;
          block()
-            ?token-name ## "-parser-rule" (stream, context)
+            // User-defined match action on rule product
+            "match-" ## ?token-name
+                  (context, ?token-name ## "-parser-rule" (stream, context))
          afterwards
             format-trace(?"token-name" ## " matched chars %x-%x",
                          pos, stream.stream-position);
          cleanup
             outdent-trace();
+            // User-defined cleanup actions
+            "cleanup-" ## ?token-name (context);
          exception (err :: <parse-failure>)
+            unless (err.failure-position) err.failure-position := pos end;
             err.parse-expected :=
                   concatenate!(err.parse-expected, " in " ## ?"token-name");
             format-trace("  " ## ?"token-name" ## " no match, exp. %s at %x",
@@ -210,6 +271,9 @@
          ?#"token-name"
       end function;
       
+      // User defined action functions
+      user-functions(?token-name; ?body-clauses);
+      
       // Names for parser.
       *rule-names*["parse-" ## ?token-name] := ?"token-name";
       *rule-name-parts*["parse-" ## ?token-name] := #( ?"token-name" );
@@ -241,9 +305,19 @@
    { } => { <token> }
 
 // No transformation; simply describe/enforce syntax.
-class-slots:
-   { slot ?:variable = ?:expression; ... } => { slot ?variable = ?expression; ... }
-   { slot ?:variable; ... } => { slot ?variable; ... }
+class-slots-and-clauses:
+   { slot ?:variable = ?:expression; ... }
+      => { slot ?variable = ?expression; ... }
+   { inherited slot ?:name = ?:expression; ... }
+      => { inherited slot ?name = ?expression; ... }
+   { ?body-clauses } => { ?body-clauses }
+   
+// Optional.
+body-clauses:
+   { afterwards (?context:name, ?product:name) ?:body; ... }
+      => { afterwards ?context, ?product, ?body; ... }
+   { cleanup (?context:name) ?:body }
+      => { cleanup ?context, ?body }
    { } => { }
 end macro;
 
@@ -259,8 +333,14 @@
       end class;
    }
 class-slots:
-   { slot ?:variable = ?:expression; ... } => { slot ?variable; ... }
-   { slot ?:variable; ... } => { slot ?variable; ... }
+   { slot ?:variable = ?:expression; ... }
+      => { slot ?variable; ... }
+   { inherited slot ?:name = ?:expression; ... }
+      => { inherited slot ?name; ... }
+      
+   // These are extra baggage to be ignored
+   { afterwards ?dummy:*; ... } => { ... }
+   { cleanup ?dummy:*; ... } => { ... }
    { } => { }
 end macro;
 
@@ -274,9 +354,74 @@
                         ?more:*) }
       => { ?token.?slot-name := ?expression; slot-initializers(?token; ?more) }
    { slot-initializers (?token:name;
-                        slot ?slot-name:name :: ?slot-type:expression;
+                        inherited slot ?slot-name:name = ?:expression;
                         ?more:*) }
+      => { ?token.?slot-name := ?expression; slot-initializers(?token; ?more) }
+      
+   // These are extra baggage to be ignored
+   { slot-initializers (?token:name; afterwards ?dummy:*; ?more:*) }
+      => { slot-initializers(?token; ?more) }
+   { slot-initializers (?token:name; cleanup ?dummy:*; ?more:*) }
       => { slot-initializers(?token; ?more) }
    { slot-initializers (?token:name) }
       => { }
 end macro;
+
+
+// This auxiliary macro generates the match- and cleanup- functions.
+define macro user-functions
+
+   // Clean out any slot baggage.
+   { user-functions(?token:name; slot ?dummy:*; ?more:*) }
+      => { user-functions(?token; ?more) }
+   { user-functions(?token:name; inherited slot ?dummy:*; ?more:*) }
+      => { user-functions(?token; ?more) }
+
+   {
+      user-functions(?token:name;
+            afterwards ?after-ctxt:name, ?after-prod:name, ?after-expr:expression;
+            cleanup ?clean-ctxt:name, ?clean-expr:expression)
+   } => {
+      define inline function "match-" ## ?token (?after-ctxt, ?after-prod)
+      => (p)
+         ?after-expr; ?after-prod;
+      end function;
+      define inline function "cleanup-" ## ?token (?clean-ctxt) => ()
+         ?clean-expr
+      end function;
+   }
+   
+   {
+      user-functions(?token:name;
+            cleanup ?clean-ctxt:name, ?clean-expr:expression)
+   } => {
+      define inline function "match-" ## ?token (c, p) => (p)
+         p
+      end function;
+      define inline function "cleanup-" ## ?token (?clean-ctxt) => ()
+         ?clean-expr
+      end function;
+   }
+   
+   {
+      user-functions(?token:name;
+            afterwards ?after-ctxt:name, ?after-prod:name, ?after-expr:expression)
+   } => {
+      define inline function "match-" ## ?token (?after-ctxt, ?after-prod)
+      => (p)
+         ?after-expr; ?after-prod
+      end function;
+      define inline function "cleanup-" ## ?token (c) => ()
+      end function;
+   }
+   
+   {
+      user-functions(?token:name)
+   } => {
+      define inline function "match-" ## ?token (c, p) => (p)
+         p
+      end function;
+      define inline function "cleanup-" ## ?token (c) => ()
+      end function;
+   }
+end macro;

Modified: trunk/libraries/utilities/peg-parser/parser-support.dylan
==============================================================================
--- trunk/libraries/utilities/peg-parser/parser-support.dylan	(original)
+++ trunk/libraries/utilities/peg-parser/parser-support.dylan	Mon Mar 24 00:41:46 2008
@@ -20,7 +20,7 @@
    // "expected:" is required because parent tokens add themselves to
    // parse-expected.
    slot parse-expected :: <string>, required-init-keyword: #"expected";
-   constant slot failure-position = #f, init-keyword: #"position";
+   slot failure-position = #f, init-keyword: #"position";
 end class;
 
 



More information about the chatter mailing list