aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDrahflow <drahflow@gmx.de>2013-04-16 23:20:41 +0200
committerDrahflow <drahflow@gmx.de>2013-04-16 23:20:41 +0200
commit50a8cffc4af2301d89dcc5ccab05cece55ddbcd2 (patch)
tree56aaad15799607d8d794556301af68f953f5f182
parent8a848799e6db6c4a77ceaf256c29ac38a5b465e1 (diff)
Regex support for all lexing regexes
-rw-r--r--compiler/standardClient.ey46
-rw-r--r--examples/working/regex.ey31
2 files changed, 62 insertions, 15 deletions
diff --git a/compiler/standardClient.ey b/compiler/standardClient.ey
index 53b1a8d..638a9f2 100644
--- a/compiler/standardClient.ey
+++ b/compiler/standardClient.ey
@@ -426,7 +426,7 @@
0 ==MATCH 1 ==TERM 2 ==JUMP 3 ==SPLIT 4 ==SAVE 5 ==FIRST 6 ==LAST
{ ==b ==a [
- [ SPLIT 1 a len 1 add ]
+ [ SPLIT 1 a len 1 add ] # FIXME this should be "2 add"?!
a _ len dearray
[ JUMP b len ]
b _ len dearray
@@ -434,9 +434,10 @@
|cat /sequence deff
- { ==a [
+ { ==a [ # TODO measure separate + implementation performance impact
+ [ JUMP a len 1 add ]
a _ len dearray
- [ SPLIT a len neg 1 ]
+ [ SPLIT 1 a len neg ]
] } /star deff
{ ==p [
@@ -461,8 +462,9 @@
{ 1 -01 str .postfix } /tail deff
+ { 0 -01 * -101 head eq } "^" deff
{ deff }' /install deff
- [ "(" ")" "[" "]" "-" "|" "^" "*" "." "$" ] { ==c
+ [ "(" ")" "[" "]" "-" "|" "^" "*" "+" "." "$" "\\" ] { ==c
{ _ head 0 c * eq } "^" c cat install
} each
@@ -487,13 +489,16 @@
^) -01
-0321 or or not
} {
- ^* {
+ { ^* } {
l star =l
tail
+ } { ^+ } {
+ l l star sequence =l
+ tail
} {
a l sequence =a
atom =l
- } ? *
+ } ifthenelse ifthenelse *
} loop
a l sequence
} /seq deff
@@ -515,7 +520,7 @@
} { ^[ } {
tail
^^ {
- tail chars ==nset
+ tail chars =*nset
{ nset not } ==set
^] not { "] expected" die } rep
tail
@@ -534,10 +539,21 @@
} { ^$ } {
[ [ LAST ] ] =a
tail
+ } { ^\ } {
+ tail
+ { ^d } {
+ { _ 0 "0" * ge -01 0 "9" * le and } terminal =a
+ tail
+ } { ^\ } {
+ { 0 "\\" * eq } terminal =a
+ tail
+ } {
+ "invalid character after \\ in regex" die
+ } ifthenelse ifthenelse *
} {
_ head { eq }_ terminal =a
tail
- } ifthenelse ifthenelse ifthenelse ifthenelse ifthenelse *
+ } ifthenelse ifthenelse ifthenelse ifthenelse ifthenelse ifthenelse *
# "(atom end) re: " -101 cat dump
a
@@ -545,8 +561,8 @@
{ # "(chars) re: " -101 cat dump
^] {
- tail chars2 ==set
- set { 0 "]" * eq } or =set "TODO" die
+ tail chars2 =*s
+ { _ s -01 0 "]" * eq or } ==set
}' {
chars2 ==set
}' ? *
@@ -555,8 +571,8 @@
{ # "(chars2) re: " -101 cat dump
^- {
- tail chars2 ==set
- set { 0 "-" * eq } or =set "TODO" die
+ tail chars2 =*s
+ { _ s -01 0 "-" * eq or } ==set
}' {
charsR ==set
}' ? *
@@ -566,7 +582,9 @@
{ # "(charsR) re: " -101 cat dump
charsN ==set
{ ^] not } {
- charsN set or =set "TODO" die
+ set =*s1
+ charsN =*s2
+ { _ s1 -01 s2 or } =set
} loop
set
} /charsR deff
@@ -653,7 +671,7 @@
} { # FIRST
position 0 eq { pc 1 add thread cloneThread clist .add } rep
} { # LAST
- position string len eq { pc 1 add thread cloneThread clist .add } rep
+ position maxPosition eq { pc 1 add thread cloneThread clist .add } rep
}
] * *
i 1 add =i
diff --git a/examples/working/regex.ey b/examples/working/regex.ey
index 5d0992d..0cd916a 100644
--- a/examples/working/regex.ey
+++ b/examples/working/regex.ey
@@ -4,5 +4,34 @@
{ "([d-h][d-h])(.*)" regex } { dump } loop
"abcdefghijklmnopqrstuvwxyz"
{ "^(...)(.*)" regex } { dump } loop
-"abcdefghijklmnopqrstuvwxyz"
+"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"(...)$" regex { dump } rep
+
+" code"
+{ _ "^ (.*)" regex } { -01 -- } loop dump
+"# comment"
+"^#" regex dump
+"1234 remaining"
+"^(\\d+) +(.*)" regex dump dump dump
+"\"stringcontent..."
+"^\"(.*)" regex dump dump
+"\\\\remaining"
+"^\\\\\\\\(.*)" regex dump dump
+"\\nremaining"
+"^\\\\n(.*)" regex dump dump
+"\\0foo"
+"^\\\\0(.*)" regex dump dump
+"\\0"
+"^\\\\0(.*)" regex dump dump
+"\\\"remaining"
+"^\\\\\"(.*)" regex dump dump
+"abcdef"
+{ "([^c])(.*)" regex } { dump } loop
+"...stringcontent\""
+{ "^([^\"\\\\])(.*)" regex } { dump } loop
+"/quoted123 remaining"
+"^([^a-zA-Z0-9 ]+)([a-zA-Z0-9][^ ]*) +(.*)" regex dump dump dump
+"unquoted remaining"
+"^([a-zA-Z0-9]+|[^a-zA-Z0-9 ]+) +(.*)" regex dump dump dump
+"+++ remaining"
+"^([a-zA-Z0-9]+|[^a-zA-Z0-9 ]+) +(.*)" regex dump dump dump