[Gd-chatter] r11488 - trunk/libraries/network/wiki
hannes at gwydiondylan.org
hannes at gwydiondylan.org
Sat Nov 10 22:11:17 CET 2007
Author: hannes
Date: Sat Nov 10 22:11:16 2007
New Revision: 11488
Modified:
trunk/libraries/network/wiki/monday-parser.dylan
trunk/libraries/network/wiki/parser-test.dylan
Log:
Job: minor
nearly all test cases work now (apart from nested lists)
code looks bad, especially I'm unhappy with the lexer, productions
and then another step for <pre> and lists... comments are welcome!
also, whitespaces are now parsed explicitly, and are not ignored.
Modified: trunk/libraries/network/wiki/monday-parser.dylan
==============================================================================
--- trunk/libraries/network/wiki/monday-parser.dylan (original)
+++ trunk/libraries/network/wiki/monday-parser.dylan Sat Nov 10 22:11:16 2007
@@ -15,29 +15,33 @@
tstart :: <integer>,
tend :: <integer>)
=> (res :: <integer>)
- tend - tstart
+ let res = tend - tstart;
+ if (string[tend - 1] = ' ')
+ res := res - 1;
+ end;
+ res;
end;
define constant $base-url = "/wiki/view.dsp?title=";
define constant $wiki-tokens
= simple-lexical-definition
token EOF;
- inert "([ ])+";
+ inert "([ \t\r])+";
token LBRACKET = "\\[";
token RBRACKET = "\\]";
- token EQUALS = "(=)+",
+ token EQUALS = "(=)+[ ]?",
semantic-value-function: count-chars;
- token TILDES = "(~)+",
+ token TILDES = "(~)+[ ]?",
semantic-value-function: count-chars;
token TICKS = "(')+",
semantic-value-function: count-chars;
token AMPERSAND = "&";
- token HASHMARK = "#";
- token STAR = "*";
+ token HASHMARK = "#[ ]?";
+ token STAR = "*[ ]?";
token FOUR-DASHES = "----", priority: 3;
@@ -46,16 +50,19 @@
token SMALLER = "<";
token GREATER = ">";
- token CLIST = "(\n|\r|\r\n)(\\*|#)", priority: 3;
- token PREFORMATTED = "(\r|\n|\r\n) ", priority: 3;
+ token WHITESPACE = " ", priority: 3;
- token NEWLINE = "(\n|\r|\r\n)";
+ //token LIST-ITEM = "(\\*|#)", priority: 3;
+ //token PREFORMATTED = "\n ", priority: 3;
+ token NEWLINENEWLINE = "\n\n", priority: 3;
+
+ token NEWLINE = "\n";
//todo: ignore spaces?!
- token TEXT = "[a-zA-Z_-0-9\\.]+",
+ token TEXT = "[a-zA-Z_-0-9\\.][a-zA-Z_-0-9\\. ]*",
semantic-value-function: extract-action;
- token URL = "(http|ftp|https)://",
+ token URL = "(http|ftp|https)://[a-zA-Z_-0-9\\.:]+",
semantic-value-function: extract-action;
end;
@@ -63,18 +70,18 @@
= simple-grammar-productions
production description :: false-or(<string>) => [TEXT] (data)
- if (TEXT.size = 0) #f else TEXT end;
+ TEXT;
production wiki-page-name :: <string> => [TEXT] (data)
TEXT;
- production myurl :: <string> => [URL TEXT] (data)
- concatenate(URL, TEXT);
+ production myurl :: <string> => [URL] (data)
+ URL;
production external-link :: xml$<element> => [LBRACKET myurl RBRACKET] (data)
with-xml() a(myurl, href => myurl) end;
- production external-link :: xml$<element> => [LBRACKET myurl description RBRACKET] (data)
+ production external-link :: xml$<element> => [LBRACKET myurl WHITESPACE description RBRACKET] (data)
with-xml() a(description, href => myurl) end;
production internal-link :: xml$<element> => [LBRACKET LBRACKET wiki-page-name RBRACKET RBRACKET] (data)
@@ -83,7 +90,7 @@
production internal-link :: xml$<element> => [LBRACKET LBRACKET wiki-page-name PIPE description RBRACKET RBRACKET] (data)
with-xml() a(description, href => concatenate($base-url, wiki-page-name)) end;
- production header :: xml$<element> => [EQUALS more-wiki-text EQUALS], action:
+ production header :: xml$<element> => [EQUALS wiki-text EQUALS], action:
method (p :: <simple-parser>, data, s, e)
let heading = max(p[0], p[2]);
unless (p[0] = p[2])
@@ -95,28 +102,11 @@
children: p[1]);
end;
- production unnumbered-list :: xml$<element> => [STAR list-elements] (data)
- format-out("Hit unnumbered-list %=\n", list-elements);
- make(xml$<element>, name: "ul", children: list-elements);
-
- production numbered-list :: xml$<element> => [HASHMARK list-elements] (data)
- make(xml$<element>, name: "ol", children: list-elements);
-
- production list-elements :: <collection> => [list-element more-list-elements] (data)
- format-out("Hit list-elements\n");
- add!(more-list-elements, list-element);
-
- production more-list-elements :: <collection> => [CLIST list-element more-list-elements] (data)
- format-out("Hit more-list-elements\n");
- add!(more-list-elements | #(), list-element);
-
- production more-list-elements :: <collection> => [] (data)
- format-out("Hit more-list-elements, empty\n");
- #();
+ production unnumbered-list :: <list-node> => [STAR line] (data)
+ make(<list-node>, kind: #"unnumbered", data: line);
- production list-element :: xml$<element> => [wiki-text] (data)
- format-out("Hit list-element %=\n", wiki-text);
- make(xml$<element>, name: "li", children: wiki-text);
+ production numbered-list :: <list-node> => [HASHMARK line] (data)
+ make(<list-node>, kind: #"numbered", data: line);
production simple-format :: xml$<xml> => [TICKS TEXT TICKS], action:
method (p :: <simple-parser>, data, s, e)
@@ -159,20 +149,8 @@
production horizontal-line :: xml$<element> => [FOUR-DASHES] (data)
with-xml() hr end;
- production preformat :: xml$<element> => [PREFORMATTED TEXT more-preformat] (data)
- let pre-string = concatenate("\n ", TEXT, more-preformat);
- make(xml$<element>,
- name: "pre",
- children: list(make(xml$<char-string>, text: pre-string)));
-
- production more-preformat :: <string> => [TEXT more-preformat] (data)
- concatenate(" ", TEXT, more-preformat);
-
- production more-preformat :: <string> => [PREFORMATTED more-preformat] (data)
- concatenate("\n", more-preformat);
-
- production more-preformat :: <string> => [NEWLINE] (data)
- "\n";
+ production preformat :: <pre-node> => [WHITESPACE TEXT] (data)
+ make(<pre-node>, data: concatenate(" ", TEXT));
production line :: <collection> => [wiki-text] (data)
wiki-text;
@@ -190,12 +168,17 @@
list(horizontal-line);
production lines => [] (data)
+ //format-out("empty!\n");
- production lines => [preformat lines] (data)
+ production lines => [preformat NEWLINE lines] (data)
add!(data.my-real-data, preformat);
- production lines => [line NEWLINE NEWLINE lines] (data)
- add!(data.my-real-data, with-xml() p end);
+ production lines => [preformat NEWLINENEWLINE lines] (data)
+ add!(data.my-real-data, make(xml$<element>, name: "p"));
+ add!(data.my-real-data, preformat);
+
+ production lines => [line NEWLINENEWLINE lines] (data)
+ add!(data.my-real-data, make(xml$<element>, name: "p"));
do(curry(add!, data.my-real-data), line);
production lines => [line NEWLINE lines] (data)
@@ -232,11 +215,8 @@
define function parse-wiki-markup (input :: <string>)
let rangemap = make(<source-location-rangemap>);
rangemap-add-line(rangemap, 0, 1);
- if(input[0] = ' ')
- input := concatenate("\n", input);
- end;
- unless(input[input.size - 1] = '\n')
- input := add!(input, '\n')
+ if (input[input.size - 1] ~= '\n')
+ input := add!(input, '\n');
end;
let scanner = make(<simple-lexical-scanner>,
definition: $wiki-tokens,
@@ -256,13 +236,85 @@
end: input.size,
partial?: #f);
let end-position = scanner.scanner-source-position;
- format-out("before consuming EOF at %d\n", end-position);
+ //format-out("before consuming EOF at %d\n", end-position);
simple-parser-consume-token(parser, 0, #"EOF", parser, end-position, end-position);
- format-out("data (%d) is %=\n", data.my-real-data.size, data.my-real-data);
- reduce1(concatenate, (map(curry(as, <string>), reverse(data.my-real-data))));
+ //format-out("data (%d) is %=\n", data.my-real-data.size, data.my-real-data);
+ let result = reverse(data.my-real-data);
+ let res = make(<stretchy-vector>);
+ let i :: <integer> = 0;
+ while (i < result.size)
+ let (value, next) = process-node(result[i], i, result);
+ add!(res, value);
+ i := i + next;
+ end;
+ reduce1(concatenate, (map(curry(as, <string>), res)));
+end;
+
+define class <parsed-node> (<object>)
+ slot data, init-keyword: data:;
+end;
+
+define method as (class == <string>, object :: <parsed-node>) => (res :: <string>)
+ as(<string>, object.data);
+end;
+define class <list-node> (<parsed-node>)
+ slot list-kind :: one-of(#"numbered", #"unnumbered"), required-init-keyword: kind:;
+end;
+define method as (class == <string>, object :: <list-node>) => (res :: <string>)
+ as(<string>, object.data[0]);
+end;
+define class <pre-node> (<parsed-node>)
+end;
+
+define method process-node (node :: <parsed-node>, index :: <integer>, rest-data :: <collection>) => (res :: xml$<element>, next :: <integer>)
+ format-out("This should never happen\n");
+end;
+
+define method process-node (node :: <list-node>, index :: <integer>, rest-data :: <collection>) => (res :: xml$<element>, next :: <integer>)
+ let list-nodes = make(<stretchy-vector>);
+ let kind = node.list-kind;
+ block(ret)
+ for (i from index below rest-data.size)
+ if (instance?(rest-data[i], <list-node>))
+ if (kind == rest-data[i].list-kind)
+ add!(list-nodes, rest-data[i])
+ end;
+ end;
+ ret;
+ end;
+ end;
+
+ let child = map(method(x)
+ make(xml$<element>, name: "li", children: data(x))
+ end, list-nodes);
+ values(make(xml$<element>,
+ name: if (kind == #"numbered") "ol" else "ul" end,
+ children: child),
+ list-nodes.size);
end;
+define method process-node (node :: <pre-node>, index :: <integer>, rest-data :: <collection>) => (res :: xml$<element>, next :: <integer>)
+ let pre-nodes = make(<stretchy-vector>);
+ block(ret)
+ for (i from index below rest-data.size)
+ if (instance?(rest-data[i], <pre-node>))
+ add!(pre-nodes, rest-data[i]);
+ else
+ ret;
+ end;
+ end;
+ end;
+ let child = reduce1(method(a, b) concatenate(a, "\n", b) end,
+ map(data, pre-nodes));
+ let node = make(xml$<element>, name: "pre", children: list(with-xml() text(concatenate("\n", child, "\n")) end));
+ values(node, size(pre-nodes));
+end;
+
+define method process-node (node :: xml$<xml>, index :: <integer>, rest-data :: <collection>) => (res :: xml$<xml>, next :: <integer>)
+ values(node, 1);
+end;
begin
- parse-wiki-markup(" one\n two\n three\n foo");
+ parse-wiki-markup(" one\n two\n three\nfoo\n");
parse-wiki-markup(" this is pre-text\n and another line");
+ format-out("RES %s\n", parse-wiki-markup("foo bar fnord\n fooo bar ffff\n\n* one\n* '''ttt'''"))
end;
Modified: trunk/libraries/network/wiki/parser-test.dylan
==============================================================================
--- trunk/libraries/network/wiki/parser-test.dylan (original)
+++ trunk/libraries/network/wiki/parser-test.dylan Sat Nov 10 22:11:16 2007
@@ -4,7 +4,7 @@
define test newline ()
- check-equal("Newline inserts paragraph", "foo<P/>bar", *markup-method*("foo\n\nbar\n"));
+ check-equal("Newline inserts paragraph", "foo<p/>bar", *markup-method*("foo\n\nbar\n"));
end;
define test internal-link ()
@@ -34,15 +34,15 @@
end;
define test heading4 ()
- check-equal("Heading 4", "<h4>foooo</h4>", *markup-method*("==== foooo ===="));
+ check-equal("Heading 4", "<h4>foooo </h4>", *markup-method*("==== foooo ===="));
end;
define test heading5 ()
- check-equal("Heading 5", "<h5>fooooo</h5>", *markup-method*("===== fooooo ====="));
+ check-equal("Heading 5", "<h5>fooooo </h5>", *markup-method*("===== fooooo ====="));
end;
define test heading54 ()
- check-equal("Heading 54", "<h5>fooooo</h5>", *markup-method*("===== fooooo ===="));
+ check-equal("Heading 54", "<h5>fooooo </h5>", *markup-method*("===== fooooo ===="));
end;
define test unnumbered-list ()
More information about the chatter
mailing list