diff options
| author | Drahflow <drahflow@gmx.de> | 2014-03-29 15:49:47 +0100 |
|---|---|---|
| committer | Drahflow <drahflow@gmx.de> | 2014-03-29 15:49:47 +0100 |
| commit | 218eb7183aad1aa68bcbc49e791141f509194767 (patch) | |
| tree | 718ad20bd9b653bb96e6c8fc1ffcea00652e60c4 | |
| parent | e7f73a02d0358a6d93f627da1bcd2e661d3a26f2 (diff) | |
Simplistic XML parser
| -rw-r--r-- | compiler/elymasGlobal.ey | 2 | ||||
| -rw-r--r-- | elymas/lib/xml.ey | 93 | ||||
| l--------- | examples/working-loaded/lib | 1 | ||||
| -rw-r--r-- | examples/working-loaded/xml.test | 23 | ||||
| -rw-r--r-- | examples/working-loaded/xml.test.xml | 91 |
5 files changed, 209 insertions, 1 deletions
diff --git a/compiler/elymasGlobal.ey b/compiler/elymasGlobal.ey index 312cb9a..6f16eb2 100644 --- a/compiler/elymasGlobal.ey +++ b/compiler/elymasGlobal.ey @@ -1229,7 +1229,7 @@ %40 /dl :testbImmReg /patchConstant :jnzLbl8 - %30 /dl :testbImmReg # TODO separate cases for static and typed + %30 /dl :testbImmReg # FIXME separate cases for static and typed /patchStatic :jnzLbl32 /rdx /rcx :movqRegReg diff --git a/elymas/lib/xml.ey b/elymas/lib/xml.ey new file mode 100644 index 0000000..b947ff1 --- /dev/null +++ b/elymas/lib/xml.ey @@ -0,0 +1,93 @@ +< + < + txt .consume .|hu "%" defq + { "2120" "-" | |le "021" "-" | |ge |and } /in defq + { "Unconfigured npeek/take/get/set/noErr/snip" die } -000000 =*npeek =*take =*get =*set =*noErr =*snip + { 0 npeek } =*peek + + # parser generator + { _ sys .typed .type 1 eq { ==str { 1 ==r str { peek eq r and =r take } each r _ |noErr rep } } { } ? * } /lit deffd + { lit =*p { get ==s [ p { ] _ len dearray } { ] -- s set } ? * 1 } } ",?" deffd + { lit =*p { get ==s { get =s [ p } { ] _ len dearray } loop ] -- s set 1 } } ",*" deffd + { _ ,* ,; } ",+" deffd + { lit =*q lit =*p { get ==s [ p { ] _ len dearray 1 } { ] -- s set q } ? * } } ",|" deffd + { lit ==q lit =*p { p q { 0 } ? * } } ",;" deffd + { [ [ } { ] |lit each ] ",;" | fold } -01 ",[" deffd ",]" deffd + { defvst }' =*:defp + # FIXME: remove the useless { * }_ once static / typed are correctly discerned by optimizations + { ==name "}" | * { ,[ } -01 ; { ,] } ; { * { * }_ _ name defp * }_ name defp } "}==" defq + "{" | "(" defq { 1 "}" | * } ")" defq + + { lit =*p lit =*q { get ==s + { get =s [ p { ] -- s set [ 0 } { ] -- s set [ q } ? * } { ] _ len dearray } loop ] -- s set 1 + } } /upto deffd + { txt .consume .hu ==h ==l { peek take l h in } } "-%" defq + + # compare http://www.w3.org/TR/2006/REC-xml11-20060816/ + { peek take _ %41 %5A in -01 %61 %7A in or } "[A-Za-z]" == + { peek take ==c c %30 %39 in } "[0-9]" == + { peek take ==c c %3C neq c %26 neq and } "[^<&]" == + { peek take ==c c %41 %46 in c %61 %66 in or c %30 %39 in or } "[0-9a-fA-F]" == + { peek take ==c c %3C neq c %26 neq and c %22 neq and } "[^<&\"]" == + { peek take ==c c %3C neq c %26 neq and c %27 neq and } "[^<&']" == + { peek take ==c c %41 %5A in c %61 %7A in or c %30 %39 in or + c %2E eq or c %5F eq or c %2D eq or } "[A-Za-z0-9._-]" == + { peek take ==c c %20 eq c %D eq or c %A eq or c %3F %5A in or c %61 %7A in or c %21 eq or + c %23 %25 in or c %27 %3B in or c %3D eq or c %5F eq or } ==PubidChar + { peek take ==c c %1 %2C in c %2E %D7FF in or c %E000 %FFFD in or c %10000 %10FFFF in or } ==CharNotMinus + { peek take ==c c %1 %D7FF in c %E000 %FFFD in or c %10000 %10FFFF in or } ==Char + [ ":" "_" %41 -%5A %61 -%7A %C0 -%D6 %D8 -%F6 %F8 -%2FF %370 -%37D + %37F -%1FFF %200C -%200D %2070 -%218F %2C00 -%2FEF %3001 -%D7FF %F900 -%FDCF %FDF0 -%FFFD + %10000 -%EFFFF ] ",|" | fold ==NameStartChar + [ NameStartChar "-" "." %30 -%39 %B7 -%B7 %0300 -%036F %203F -%2040 ] ",|" | fold ==NameChar + { { peek [ %20 %9 %D %A ] eq any take } ,+ }==S + + { "<![CDATA[" Char "]]>" upto "]]>" }==CDSect + { "[^<&]" | "]]>" upto }==CharData + { CharData ,? ,[ [ element Reference CDSect PI Comment ] ",|" | fold CharData ,? ,] ,* }==content + { ,[ "&#" "[0-9]" | ,+ ";" ,] ,[ "&#x" "[0-9a-fA-F]" | ,+ ";" ,] ,| }==CharRef + { "&" Name ";" }==EntityRef + { EntityRef CharRef ,| }==Reference + { ,[ "\"" ( get ) "[^<&\"]" | Reference ,| ,* ( get snip ) "\"" ,] + ,[ "'" ( get ) "[^<&']" | Reference ,| ,* ( get snip ) "'" ,] ,| }==AttValue + { ( get ) Name ( get snip ) Eq AttValue }==Attribute + { "<" ( get ) Name ( get snip [ ) ,[ S Attribute ,] ,* ( ] elem ) S ,? "/>" }==EmptyElemTag # TODO: left-factorize + { "<" ( get ) Name ( get snip [ ) ,[ S Attribute ,] ,* ( ] elem ) S ,? ">" }==STag + { "</" Name S ,? ">" }==ETag + { ,[ "\"" PubidChar ,* "\"" ,] ,[ "'" PubidChar "'" upto "'" ,] ,| }==PubidLiteral + { ,[ "\"" { take 1 } "\"" upto "\"" ,] ,[ "'" { take 1 } "'" upto "'" ,] ,| }==SystemLiteral + { markupdecl DeclSep ,| ,* }==intSubset + { ,[ "SYSTEM" S SystemLiteral ,] ,[ "PUBLIC" S PubidLiteral S SystemLiteral ,] ,| }==ExternalID + { NameStartChar NameChar ,* }==Name + { Name }==PITarget # TODO: guard against [Xx][Mm][Ll] + { "<!--" CharNotMinus ,[ "-" CharNotMinus ,] ,| ,* "-->" }==Comment + { "<?" PITarget ,[ S Char "?>" upto ,] ,? "?>" }==PI + { Comment PI ,| S ,| }==Misc + { S ,? "=" S ,? }==Eq + { "1.1" }==VersionNum + { "[A-Za-z]" | "[A-Za-z0-9._-]" | ,* }==EncName + { S "encoding" Eq ,[ "\"" EncName "\"" ,] ,[ "'" EncName "'" ,] ,| }==EncodingDecl + { S "version" Eq ,[ "'" VersionNum "'" ,] ,[ "\"" VersionNum "\"" ,] ,| }==VersionInfo + { "<?xml" VersionInfo EncodingDecl ,? SDDecl ,? S ,? "?>" }==XMLDecl + { "<!DOCTYPE" S Name ,[ S ExternalID ,] ,? S ,? ,[ "[" intSubset "]" S ,? ,] ,? ">" }==doctypedecl + { XMLDecl Misc ,* doctypedecl ,? Misc ,* }==prolog + { S "standalone" Eq ,[ "'" "yes" "no" ,| "'" ,] ,[ "\"" "yes" "no" ,| "\"" ,] ,| }==SDDecl + { EmptyElemTag ,[ STag ( [ ) content ( ] -101 .setChildren ) ETag ,] ,| }==element + { prolog element Misc ,* }==document + + { _ =*s len ==LEN 0 ==i 0 ==last + { i add LEN lt { i s } { 1 neg } ? * } =npeek { i 1 add =i } =take { i } =get { =i } =set { i =last } =noErr + { |s str .infix } =snip + document * { + i LEN neq { < last |s str .postfix ==remaining > ???parse.xml.trailing-garbage } rep + } { < last |s str .postfix ==remaining > ???parse.xml } ? * + } + + { < -01 ==name + map ==attr _ len _ ==l dearray l 2 div { -01 attr =[] } rep + [ ] ==children { =children } =*setChildren + > } /elem deffd + > -- /parse deffd +> /xml defvd + +# vim: syn=elymas diff --git a/examples/working-loaded/lib b/examples/working-loaded/lib new file mode 120000 index 0000000..7d4b88b --- /dev/null +++ b/examples/working-loaded/lib @@ -0,0 +1 @@ +../../elymas/lib
\ No newline at end of file diff --git a/examples/working-loaded/xml.test b/examples/working-loaded/xml.test new file mode 100644 index 0000000..8375638 --- /dev/null +++ b/examples/working-loaded/xml.test @@ -0,0 +1,23 @@ +"lib/xml.ey" include + +{ ==indent ==elem + indent { " " sys .out .writeall } rep + elem .name sys .out .writeall "\n" sys .out .writeall + elem .children { + indent 2 add dumpTree + } each +} /dumpTree deffst + +sys .file ":" via + "xml.test.xml" :open + 65536 :read ==content + :close + +{ + content xml .parse 0 dumpTree +} { + "Error during parse: " dump + .remaining dump +} ?!parse + +# vim: syn=elymas diff --git a/examples/working-loaded/xml.test.xml b/examples/working-loaded/xml.test.xml new file mode 100644 index 0000000..73deefd --- /dev/null +++ b/examples/working-loaded/xml.test.xml @@ -0,0 +1,91 @@ +<?xml version="1.1" encoding="utf-8" ?> +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> +<head> + <title>Jens-Wolfhard Schicke - Drahflow - Welcome to my Homepage</title> + <link rel="stylesheet" href="/css/default.css" type="text/css" /> +</head><body> +<div id="menudiv"><ul id="menu"><li><a href="/index.html">Home</a></li> +<li><a href="/contact.html">Contact</a></li> +<li><a href="/pirates.html">Pirates</a></li> +<li><a href="/australia.html">Australia</a></li> +<li><a href="/programming.html">Programming</a></li> +<li><a href="/research.html">Research</a></li> +<li><a href="/dvorak.html">Dvorak</a></li> +<li><a href="/security.html">Security</a></li> +<li><a href="/misc.html">Misc</a></li> +<li><a href="/blog.html">Blog</a></li></ul></div> +<div id="maintitle"><h1>Welcome to my Homepage</h1></div> +<p> +Here I'll try to help anybody who (by what ever accident was necessary) +found this page, in using Linux and C(++). Feel free to browse whatever +files you encounter and <a href="mailto:drahflow@gmx.de">mail</a> +me whenever you have questions. This page does not use much images +mainly because I'm completely unable to design anything with graphics +so I stay with text. Additionally this provides you with faster load +times, although these don't matter much anymore in times of flatrates. +</p> +<p> +As a response to recent fancy ideas of our Government, I now support the +<a href="http://www.piratenpartei.de">German Pirate Party</a> +trying to stop the increasing surveillance in our country. +</p> +<p> +<a href='http://www.vim.org/'> +<img src='img/vim_created.gif' class='fl' alt='VIm Logo' /> +</a> +This is by far the +<a href='http://www.vim.org/'>best text-editor</a> +I ever encountered. Once you read +all the help files you have thousands of commands just a few keystrokes +away. +</p> + +<p> +<a href='http://validator.w3.org/check?uri=http%3A%2F%2Fjens.schicke.hvf-bs.net'> +<img src='http://www.w3.org/Icons/valid-html401' alt="Valid HTML 4.01!" class='fl' /> +</a> +The +<a href='http://validator.w3.org/check?uri=http%3A%2F%2Fjens.schicke.hvf-bs.net'>W3C HTML Validator</a> +states that this page is valid HTML 4.01, actually not does not mean a +thing because not all browsers are W3C compliant but at least it looks +nice, doesn't it? +</p> + +<p> +<a href='http://jigsaw.w3.org/css-validator/validator?uri=http://jens.schicke.hvf-bs.net'> +<img src='http://jigsaw.w3.org/css-validator/images/vcss' alt="Valid CSS!" class='fl' /> +</a> +Another test of W3C, this time it's a +<a href='http://jigsaw.w3.org/css-validator/validator?uri=http://jens.schicke.hvf-bs.net'>CSS test</a> +for this page, which also returned this to be a valid page. +</p> + +<p> +<a href='http://www.kernel.org'> +<img src='img/linux.png' alt="powered by Linux" class='fl' /> +</a> +And of course this server runs on +<a href='http://www.kernel.org/'>Linux</a> +the best operating system I got used to since now. +<a href='http://www.bsd.org'>BSD</a> is quite nice, too, but I not yet +relly dug myself into it. +</p> + +<p> +<a href='http://www.apache.org'> +<img src='img/apache_pb.gif' alt="powered by Apache" class='fl' /> +</a> +This site is powered by +<a href='http://www.apache.org/'>Apache</a>, +one of the most often used Linux Webservers around. +</p> + +<p> +<a href="http://www.uberwach.de/"> +<img src="http://www.uberwach.de/wanze?http%3A//jens.schicke.hvf-bs.net/" alt="Aktion UBERWACH!" width="80" height="15" /> +</a> +Don't be intimidated. The image referenced will only log accesses by our most beloved government. +</p> + +</body></html> |
