aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDrahflow <drahflow@gmx.de>2014-03-29 15:49:47 +0100
committerDrahflow <drahflow@gmx.de>2014-03-29 15:49:47 +0100
commit218eb7183aad1aa68bcbc49e791141f509194767 (patch)
tree718ad20bd9b653bb96e6c8fc1ffcea00652e60c4
parente7f73a02d0358a6d93f627da1bcd2e661d3a26f2 (diff)
Simplistic XML parser
-rw-r--r--compiler/elymasGlobal.ey2
-rw-r--r--elymas/lib/xml.ey93
l---------examples/working-loaded/lib1
-rw-r--r--examples/working-loaded/xml.test23
-rw-r--r--examples/working-loaded/xml.test.xml91
5 files changed, 209 insertions, 1 deletions
diff --git a/compiler/elymasGlobal.ey b/compiler/elymasGlobal.ey
index 312cb9a..6f16eb2 100644
--- a/compiler/elymasGlobal.ey
+++ b/compiler/elymasGlobal.ey
@@ -1229,7 +1229,7 @@
%40 /dl :testbImmReg
/patchConstant :jnzLbl8
- %30 /dl :testbImmReg # TODO separate cases for static and typed
+ %30 /dl :testbImmReg # FIXME separate cases for static and typed
/patchStatic :jnzLbl32
/rdx /rcx :movqRegReg
diff --git a/elymas/lib/xml.ey b/elymas/lib/xml.ey
new file mode 100644
index 0000000..b947ff1
--- /dev/null
+++ b/elymas/lib/xml.ey
@@ -0,0 +1,93 @@
+<
+ <
+ txt .consume .|hu "%" defq
+ { "2120" "-" | |le "021" "-" | |ge |and } /in defq
+ { "Unconfigured npeek/take/get/set/noErr/snip" die } -000000 =*npeek =*take =*get =*set =*noErr =*snip
+ { 0 npeek } =*peek
+
+ # parser generator
+ { _ sys .typed .type 1 eq { ==str { 1 ==r str { peek eq r and =r take } each r _ |noErr rep } } { } ? * } /lit deffd
+ { lit =*p { get ==s [ p { ] _ len dearray } { ] -- s set } ? * 1 } } ",?" deffd
+ { lit =*p { get ==s { get =s [ p } { ] _ len dearray } loop ] -- s set 1 } } ",*" deffd
+ { _ ,* ,; } ",+" deffd
+ { lit =*q lit =*p { get ==s [ p { ] _ len dearray 1 } { ] -- s set q } ? * } } ",|" deffd
+ { lit ==q lit =*p { p q { 0 } ? * } } ",;" deffd
+ { [ [ } { ] |lit each ] ",;" | fold } -01 ",[" deffd ",]" deffd
+ { defvst }' =*:defp
+ # FIXME: remove the useless { * }_ once static / typed are correctly discerned by optimizations
+ { ==name "}" | * { ,[ } -01 ; { ,] } ; { * { * }_ _ name defp * }_ name defp } "}==" defq
+ "{" | "(" defq { 1 "}" | * } ")" defq
+
+ { lit =*p lit =*q { get ==s
+ { get =s [ p { ] -- s set [ 0 } { ] -- s set [ q } ? * } { ] _ len dearray } loop ] -- s set 1
+ } } /upto deffd
+ { txt .consume .hu ==h ==l { peek take l h in } } "-%" defq
+
+ # compare http://www.w3.org/TR/2006/REC-xml11-20060816/
+ { peek take _ %41 %5A in -01 %61 %7A in or } "[A-Za-z]" ==
+ { peek take ==c c %30 %39 in } "[0-9]" ==
+ { peek take ==c c %3C neq c %26 neq and } "[^<&]" ==
+ { peek take ==c c %41 %46 in c %61 %66 in or c %30 %39 in or } "[0-9a-fA-F]" ==
+ { peek take ==c c %3C neq c %26 neq and c %22 neq and } "[^<&\"]" ==
+ { peek take ==c c %3C neq c %26 neq and c %27 neq and } "[^<&']" ==
+ { peek take ==c c %41 %5A in c %61 %7A in or c %30 %39 in or
+ c %2E eq or c %5F eq or c %2D eq or } "[A-Za-z0-9._-]" ==
+ { peek take ==c c %20 eq c %D eq or c %A eq or c %3F %5A in or c %61 %7A in or c %21 eq or
+ c %23 %25 in or c %27 %3B in or c %3D eq or c %5F eq or } ==PubidChar
+ { peek take ==c c %1 %2C in c %2E %D7FF in or c %E000 %FFFD in or c %10000 %10FFFF in or } ==CharNotMinus
+ { peek take ==c c %1 %D7FF in c %E000 %FFFD in or c %10000 %10FFFF in or } ==Char
+ [ ":" "_" %41 -%5A %61 -%7A %C0 -%D6 %D8 -%F6 %F8 -%2FF %370 -%37D
+ %37F -%1FFF %200C -%200D %2070 -%218F %2C00 -%2FEF %3001 -%D7FF %F900 -%FDCF %FDF0 -%FFFD
+ %10000 -%EFFFF ] ",|" | fold ==NameStartChar
+ [ NameStartChar "-" "." %30 -%39 %B7 -%B7 %0300 -%036F %203F -%2040 ] ",|" | fold ==NameChar
+ { { peek [ %20 %9 %D %A ] eq any take } ,+ }==S
+
+ { "<![CDATA[" Char "]]>" upto "]]>" }==CDSect
+ { "[^<&]" | "]]>" upto }==CharData
+ { CharData ,? ,[ [ element Reference CDSect PI Comment ] ",|" | fold CharData ,? ,] ,* }==content
+ { ,[ "&#" "[0-9]" | ,+ ";" ,] ,[ "&#x" "[0-9a-fA-F]" | ,+ ";" ,] ,| }==CharRef
+ { "&" Name ";" }==EntityRef
+ { EntityRef CharRef ,| }==Reference
+ { ,[ "\"" ( get ) "[^<&\"]" | Reference ,| ,* ( get snip ) "\"" ,]
+ ,[ "'" ( get ) "[^<&']" | Reference ,| ,* ( get snip ) "'" ,] ,| }==AttValue
+ { ( get ) Name ( get snip ) Eq AttValue }==Attribute
+ { "<" ( get ) Name ( get snip [ ) ,[ S Attribute ,] ,* ( ] elem ) S ,? "/>" }==EmptyElemTag # TODO: left-factorize
+ { "<" ( get ) Name ( get snip [ ) ,[ S Attribute ,] ,* ( ] elem ) S ,? ">" }==STag
+ { "</" Name S ,? ">" }==ETag
+ { ,[ "\"" PubidChar ,* "\"" ,] ,[ "'" PubidChar "'" upto "'" ,] ,| }==PubidLiteral
+ { ,[ "\"" { take 1 } "\"" upto "\"" ,] ,[ "'" { take 1 } "'" upto "'" ,] ,| }==SystemLiteral
+ { markupdecl DeclSep ,| ,* }==intSubset
+ { ,[ "SYSTEM" S SystemLiteral ,] ,[ "PUBLIC" S PubidLiteral S SystemLiteral ,] ,| }==ExternalID
+ { NameStartChar NameChar ,* }==Name
+ { Name }==PITarget # TODO: guard against [Xx][Mm][Ll]
+ { "<!--" CharNotMinus ,[ "-" CharNotMinus ,] ,| ,* "-->" }==Comment
+ { "<?" PITarget ,[ S Char "?>" upto ,] ,? "?>" }==PI
+ { Comment PI ,| S ,| }==Misc
+ { S ,? "=" S ,? }==Eq
+ { "1.1" }==VersionNum
+ { "[A-Za-z]" | "[A-Za-z0-9._-]" | ,* }==EncName
+ { S "encoding" Eq ,[ "\"" EncName "\"" ,] ,[ "'" EncName "'" ,] ,| }==EncodingDecl
+ { S "version" Eq ,[ "'" VersionNum "'" ,] ,[ "\"" VersionNum "\"" ,] ,| }==VersionInfo
+ { "<?xml" VersionInfo EncodingDecl ,? SDDecl ,? S ,? "?>" }==XMLDecl
+ { "<!DOCTYPE" S Name ,[ S ExternalID ,] ,? S ,? ,[ "[" intSubset "]" S ,? ,] ,? ">" }==doctypedecl
+ { XMLDecl Misc ,* doctypedecl ,? Misc ,* }==prolog
+ { S "standalone" Eq ,[ "'" "yes" "no" ,| "'" ,] ,[ "\"" "yes" "no" ,| "\"" ,] ,| }==SDDecl
+ { EmptyElemTag ,[ STag ( [ ) content ( ] -101 .setChildren ) ETag ,] ,| }==element
+ { prolog element Misc ,* }==document
+
+ { _ =*s len ==LEN 0 ==i 0 ==last
+ { i add LEN lt { i s } { 1 neg } ? * } =npeek { i 1 add =i } =take { i } =get { =i } =set { i =last } =noErr
+ { |s str .infix } =snip
+ document * {
+ i LEN neq { < last |s str .postfix ==remaining > ???parse.xml.trailing-garbage } rep
+ } { < last |s str .postfix ==remaining > ???parse.xml } ? *
+ }
+
+ { < -01 ==name
+ map ==attr _ len _ ==l dearray l 2 div { -01 attr =[] } rep
+ [ ] ==children { =children } =*setChildren
+ > } /elem deffd
+ > -- /parse deffd
+> /xml defvd
+
+# vim: syn=elymas
diff --git a/examples/working-loaded/lib b/examples/working-loaded/lib
new file mode 120000
index 0000000..7d4b88b
--- /dev/null
+++ b/examples/working-loaded/lib
@@ -0,0 +1 @@
+../../elymas/lib \ No newline at end of file
diff --git a/examples/working-loaded/xml.test b/examples/working-loaded/xml.test
new file mode 100644
index 0000000..8375638
--- /dev/null
+++ b/examples/working-loaded/xml.test
@@ -0,0 +1,23 @@
+"lib/xml.ey" include
+
+{ ==indent ==elem
+ indent { " " sys .out .writeall } rep
+ elem .name sys .out .writeall "\n" sys .out .writeall
+ elem .children {
+ indent 2 add dumpTree
+ } each
+} /dumpTree deffst
+
+sys .file ":" via
+ "xml.test.xml" :open
+ 65536 :read ==content
+ :close
+
+{
+ content xml .parse 0 dumpTree
+} {
+ "Error during parse: " dump
+ .remaining dump
+} ?!parse
+
+# vim: syn=elymas
diff --git a/examples/working-loaded/xml.test.xml b/examples/working-loaded/xml.test.xml
new file mode 100644
index 0000000..73deefd
--- /dev/null
+++ b/examples/working-loaded/xml.test.xml
@@ -0,0 +1,91 @@
+<?xml version="1.1" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+ <title>Jens-Wolfhard Schicke - Drahflow - Welcome to my Homepage</title>
+ <link rel="stylesheet" href="/css/default.css" type="text/css" />
+</head><body>
+<div id="menudiv"><ul id="menu"><li><a href="/index.html">Home</a></li>
+<li><a href="/contact.html">Contact</a></li>
+<li><a href="/pirates.html">Pirates</a></li>
+<li><a href="/australia.html">Australia</a></li>
+<li><a href="/programming.html">Programming</a></li>
+<li><a href="/research.html">Research</a></li>
+<li><a href="/dvorak.html">Dvorak</a></li>
+<li><a href="/security.html">Security</a></li>
+<li><a href="/misc.html">Misc</a></li>
+<li><a href="/blog.html">Blog</a></li></ul></div>
+<div id="maintitle"><h1>Welcome to my Homepage</h1></div>
+<p>
+Here I'll try to help anybody who (by what ever accident was necessary)
+found this page, in using Linux and C(++). Feel free to browse whatever
+files you encounter and <a href="mailto:drahflow@gmx.de">mail</a>
+me whenever you have questions. This page does not use much images
+mainly because I'm completely unable to design anything with graphics
+so I stay with text. Additionally this provides you with faster load
+times, although these don't matter much anymore in times of flatrates.
+</p>
+<p>
+As a response to recent fancy ideas of our Government, I now support the
+<a href="http://www.piratenpartei.de">German Pirate Party</a>
+trying to stop the increasing surveillance in our country.
+</p>
+<p>
+<a href='http://www.vim.org/'>
+<img src='img/vim_created.gif' class='fl' alt='VIm Logo' />
+</a>
+This is by far the
+<a href='http://www.vim.org/'>best text-editor</a>
+I ever encountered. Once you read
+all the help files you have thousands of commands just a few keystrokes
+away.
+</p>
+
+<p>
+<a href='http://validator.w3.org/check?uri=http%3A%2F%2Fjens.schicke.hvf-bs.net'>
+<img src='http://www.w3.org/Icons/valid-html401' alt="Valid HTML 4.01!" class='fl' />
+</a>
+The
+<a href='http://validator.w3.org/check?uri=http%3A%2F%2Fjens.schicke.hvf-bs.net'>W3C HTML Validator</a>
+states that this page is valid HTML 4.01, actually not does not mean a
+thing because not all browsers are W3C compliant but at least it looks
+nice, doesn't it?
+</p>
+
+<p>
+<a href='http://jigsaw.w3.org/css-validator/validator?uri=http://jens.schicke.hvf-bs.net'>
+<img src='http://jigsaw.w3.org/css-validator/images/vcss' alt="Valid CSS!" class='fl' />
+</a>
+Another test of W3C, this time it's a
+<a href='http://jigsaw.w3.org/css-validator/validator?uri=http://jens.schicke.hvf-bs.net'>CSS test</a>
+for this page, which also returned this to be a valid page.
+</p>
+
+<p>
+<a href='http://www.kernel.org'>
+<img src='img/linux.png' alt="powered by Linux" class='fl' />
+</a>
+And of course this server runs on
+<a href='http://www.kernel.org/'>Linux</a>
+the best operating system I got used to since now.
+<a href='http://www.bsd.org'>BSD</a> is quite nice, too, but I not yet
+relly dug myself into it.
+</p>
+
+<p>
+<a href='http://www.apache.org'>
+<img src='img/apache_pb.gif' alt="powered by Apache" class='fl' />
+</a>
+This site is powered by
+<a href='http://www.apache.org/'>Apache</a>,
+one of the most often used Linux Webservers around.
+</p>
+
+<p>
+<a href="http://www.uberwach.de/">
+<img src="http://www.uberwach.de/wanze?http%3A//jens.schicke.hvf-bs.net/" alt="Aktion UBERWACH!" width="80" height="15" />
+</a>
+Don't be intimidated. The image referenced will only log accesses by our most beloved government.
+</p>
+
+</body></html>