aboutsummaryrefslogtreecommitdiff
path: root/elymas/lib/xml.ey
blob: 3f34b5e9d65668da4cb6519beb80da6c1715a533 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
<
  <
    txt .consume .|hu "%" defq
    { "2120" "-" | |le "021" "-" | |ge |and } /in defq
    map ==defaultEnts [ { "&" /amp } { "<" /lt } { ">" /gt } { "'" /apos } { "\"" /quot } ] { * defaultEnts =[] } each
    { "Unconfigured npeek/take/get/set/noErr/snip" die } -000000 =*npeek =*take =*get =*set =*noErr =*snip
    { 0 npeek } =*peek map ==ents

    # parser generator
    { _ sys .typed .type 1 eq { ==str { 1 ==r str { peek eq r and =r take } each r _ |noErr rep } } { } ? * } /lit deffd
    { lit =*p { get ==s [ p { ] _ len dearray } { ] -- s set } ? * 1 } } ",?" deffd
    { lit =*p { get ==s { get =s [ p } { ] _ len dearray } loop ] -- s set 1 } } ",*" deffd
    { _ ,* ,; } ",+" deffd
    { lit =*q lit =*p { get ==s [ p { ] _ len dearray 1 } { ] -- s set q } ? * } } ",|" deffd
    { lit ==q lit =*p { p q { 0 } ? * } } ",;" deffd
    { [ [ } { ] |lit each ] ",;" | fold } -01 ",[" deffd ",]" deffd
    { defvst }' =*:defp
    # FIXME: remove the useless { * }_ once static / typed are correctly discerned by optimizations
    { ==name "}" | * { ,[ } -01 ; { ,] } ; { * { * }_ _ name defp * }_ name defp } "}==" defq
    "{" | "(" defq { 1 "}" | * } ")" defq

    { lit =*p lit =*q { get ==s
      { get =s [ p { ] -- s set [ 0 } { ] -- s set [ q } ? * } { ] _ len dearray } loop ] -- s set 1
    } } /upto deffd
    { txt .consume .hu ==h ==l { peek take l h in } } "-%" defq

    # compare http://www.w3.org/TR/2006/REC-xml11-20060816/
    { peek take _ %41 %5A in -01 %61 %7A in or } "[A-Za-z]" ==
    { peek take ==c c %30 %39 in } "[0-9]" ==
    { peek take ==c c %3C neq       c %26 neq and } "[^<&]" ==
    { peek take ==c c %41 %46 in    c %61 %66 in or     c %30 %39 in or } "[0-9a-fA-F]" ==
    { peek take ==c c %3C neq       c %26 neq and       c %22 neq and } "[^<&\"]" ==
    { peek take ==c c %3C neq       c %26 neq and       c %27 neq and } "[^<&']" ==
    { peek take ==c c %41 %5A in    c %61 %7A in or     c %30 %39 in or
                    c %2E eq or     c %5F eq or         c %2D eq or } "[A-Za-z0-9._-]" ==
    { peek take ==c c %20 eq        c %D eq or          c %A eq or          c %3F %5A in or c %61 %7A in or c %21 eq or
                    c %23 %25 in or c %27 %3B in or     c %3D eq or         c %5F eq or } ==PubidChar
    { peek take ==c c %1 %2C in     c %2E %D7FF in or   c %E000 %FFFD in or c %10000 %10FFFF in or } ==CharNotMinus
    { peek take ==c c %1 %D7FF in   c %E000 %FFFD in or c %10000 %10FFFF in or } ==Char
    [ ":" "_"       %41 -%5A      %61 -%7A      %C0 -%D6      %D8 -%F6      %F8 -%2FF     %370 -%37D
      %37F -%1FFF   %200C -%200D  %2070 -%218F  %2C00 -%2FEF  %3001 -%D7FF  %F900 -%FDCF  %FDF0 -%FFFD
      %10000 -%EFFFF ] ",|" | fold ==NameStartChar
    [ NameStartChar "-" "." %30 -%39  %B7 -%B7  %0300 -%036F  %203F -%2040 ] ",|" | fold ==NameChar
    { { peek [ %20 %9 %D %A ] eq any take } ,+ }==S

    { "<![CDATA[" Char "]]>" upto "]]>" }==CDSect
    { ( get ) "[^<&]" | "]]>" upto ( get snip text ) }==CharData
    { CharData ,? ,[ [ element Reference CDSect PI Comment ] ",|" | fold CharData ,? ,] ,* }==content
    { ,[ "&#" ( get ) "[0-9]" | ,+ ( get snip txt .consume .u [ -01 ] str .fromArray text ) ";" ,]
      ,[ "&#x" ( get ) "[0-9a-fA-F]" | ,+ ( get snip txt .consume .hu [ -01 ] str .fromArray text ) ";" ,] ,| }==CharRef
    { "&" ( get ) Name ( get snip ) ";" ( _ ents .has { ents * text } { ??parse.xml.undeclared-entity } ? * ) }==EntityRef
    { EntityRef CharRef ,| }==Reference
    { ,[ "\"" ( get ) "[^<&\"]" | Reference ,| ,* ( get snip ) "\"" ,]
      ,[ "'" ( get ) "[^<&']" | Reference ,| ,* ( get snip ) "'" ,] ,| }==AttValue
    { ( get ) Name ( get snip ) Eq AttValue }==Attribute
    { "<" ( get ) Name ( get snip [ ) ,[ S Attribute ,] ,* ( ] elem ) S ,? "/>" }==EmptyElemTag # TODO: left-factorize
    { "<" ( get ) Name ( get snip [ ) ,[ S Attribute ,] ,* ( ] elem ) S ,? ">" }==STag
    { "</" Name S ,? ">" }==ETag
    { ,[ "\"" PubidChar ,* "\"" ,] ,[ "'" PubidChar "'" upto "'" ,] ,| }==PubidLiteral
    { ,[ "\"" { take 1 } "\"" upto "\"" ,] ,[ "'" { take 1 } "'" upto "'" ,] ,| }==SystemLiteral
    { markupdecl DeclSep ,| ,* }==intSubset
    { ,[ "SYSTEM" S SystemLiteral ,] ,[ "PUBLIC" S PubidLiteral S SystemLiteral ,] ,| }==ExternalID
    { NameStartChar NameChar ,* }==Name
    { Name }==PITarget # TODO: guard against [Xx][Mm][Ll]
    { "<!--" CharNotMinus ,[ "-" CharNotMinus ,] ,| ,* "-->" }==Comment
    { "<?" PITarget ,[ S Char "?>" upto ,] ,? "?>" }==PI
    { Comment PI ,| S ,| }==Misc
    { S ,? "=" S ,? }==Eq
    { "1.1" }==VersionNum
    { "[A-Za-z]" | "[A-Za-z0-9._-]" | ,* }==EncName
    { S "encoding" Eq ,[ "\"" EncName "\"" ,] ,[ "'" EncName "'" ,] ,| }==EncodingDecl
    { S "version" Eq ,[ "'" VersionNum "'" ,] ,[ "\"" VersionNum "\"" ,] ,| }==VersionInfo
    { "<?xml" VersionInfo EncodingDecl ,? SDDecl ,? S ,? "?>" }==XMLDecl
    { "<!DOCTYPE" S Name ,[ S ExternalID ,] ,? S ,? ,[ "[" intSubset "]" S ,? ,] ,? ">" }==doctypedecl
    { XMLDecl Misc ,* doctypedecl ,? Misc ,* }==prolog
    { S "standalone" Eq ,[ "'" "yes" "no" ,| "'" ,] ,[ "\"" "yes" "no" ,| "\"" ,] ,| }==SDDecl
    { EmptyElemTag ,[ STag ( [ ) content ( ] -101 .setChildren ) ETag ,] ,| }==element
    { prolog element Misc ,* }==document

    { _ =*s len ==LEN 0 ==i 0 ==last
      { i add LEN lt { i s } { 1 neg } ? * } =npeek { i 1 add =i } =take { i } =get { =i } =set { i =last } =noErr
      { |s str .infix } =snip defaultEnts .clone =ents
      { document * { i LEN neq { ??parse.xml.trailing-garbage } rep } { ??parse.xml } ? * }
        { -- < last |s str .postfix ==remaining > ??!' } ?!parse.xml
    }

    { < "" ==name ==text > } /text deffd
    { < -01 ==name
        map ==attr _ len _ ==l dearray l 2 div { -01 attr =[] } rep
        [ ] ==children { =children } =*setChildren
    > } /elem deffd
  > -- /parse deffd
> /xml defvd

# vim: syn=elymas