aboutsummaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorDrahflow <drahflow@gmx.de>2013-09-01 21:53:19 +0200
committerDrahflow <drahflow@gmx.de>2013-09-01 21:53:19 +0200
commitdd98844bd4968664f9dd7be996df596873733ecd (patch)
treef9f653742e9a9cae65a13d85a051681359dc95fc /compiler
parentfdd6dbf870747bd02f3a3ce6523dcc2c32880acb (diff)
RegEx behavior now greedy as everyone expects it
Diffstat (limited to 'compiler')
-rw-r--r--compiler/elymasGlobal.ey40
-rw-r--r--compiler/standardClient.ey82
2 files changed, 105 insertions, 17 deletions
diff --git a/compiler/elymasGlobal.ey b/compiler/elymasGlobal.ey
index 9a91aa1..d9308de 100644
--- a/compiler/elymasGlobal.ey
+++ b/compiler/elymasGlobal.ey
@@ -1945,6 +1945,19 @@
/rax :popqReg
/rax 24 /r15 :movqRegMemDisp8
+
+ 7 /rax /cl :movbMemDisp8Reg
+ %F0 /cl :andbImmReg
+ %70 /cl :cmpbImmReg
+ /eachArray :jzLbl8
+ %10 /cl :cmpbImmReg
+ /eachString :jzLbl8
+
+ "neither string nor array in each" ::outputError
+ :ud2
+
+ @eachArray
+
8 /rax /rcx :leaqMemDisp8Reg
/rcx /r15 :movqRegMem
@@ -1970,6 +1983,33 @@
8 /r15 :addqImm8Mem
/loop :jmpLbl8
+ @eachString
+
+ 16 /rax /rcx :movqMemDisp8Reg
+ /rcx 8 /r15 :movqRegMemDisp8
+ 24 /rax /rcx :leaqMemDisp8Reg
+ /rcx /r15 :movqRegMem
+
+ # /r15 -> current string element
+ # 8 /r15 -> count remaining
+ # 16 /r15 -> code to execute
+ # 24 /r15 -> string object (to keep the GC away)
+
+ @stringLoop
+ ::internalAllocateInteger /rax :movqImmReg
+ /rax :callqReg
+ /rax :pushqReg
+ /r15 /rdx :movqMemReg
+ /rdx /rdx :movzxMem8Reg64
+ /rdx 8 /rax :movqRegMemDisp8
+ 16 /r15 :pushqMemDisp8 # push code
+ |ey* /rax :movqImmReg
+ /rax :callqReg
+
+ 1 /r15 :addqImm8Mem
+ 1 8 /r15 :subqImm8MemDisp8
+ /stringLoop :jnzLbl8
+
@end
32 /r15 :addqImm8Reg
diff --git a/compiler/standardClient.ey b/compiler/standardClient.ey
index 6e3c19e..940b974 100644
--- a/compiler/standardClient.ey
+++ b/compiler/standardClient.ey
@@ -1,12 +1,13 @@
## regex support
# ideas taken from http://swtch.com/~rsc/regexp/regexp3.html
+# FIXME: correctly handly */+/? priority
{
0 ==:MATCH 1 ==:TERM 2 ==:JUMP 3 ==:SPLIT 4 ==:SAVE 5 ==:FIRST 6 ==:LAST
{ ==b ==a [
- [ SPLIT 1 a len 1 add ] # FIXME this should be "2 add"?!
+ [ SPLIT 1 a len 2 add ]
a _ len dearray
- [ JUMP b len ]
+ [ JUMP b len 1 add ]
b _ len dearray
] } /alternative deffst
@@ -15,7 +16,7 @@
{ ==?a [ # TODO measure separate + implementation performance impact
[ JUMP a len 1 add ]
a _ len dearray
- [ SPLIT 1 a len neg ]
+ [ SPLIT a len neg 1 ]
] } /star deffst
{ ==?p [
@@ -42,7 +43,7 @@
{ 0 -01 * -101 head eq } "^" deffd
{ deffd }' /install deffst
- [ "(" ")" "[" "]" "-" "|" "^" "*" "+" "." "$" "\\" ] { ==?c
+ [ "(" ")" "[" "]" "-" "|" "^" "*" "+" "." "$" "\\" "?" ] { ==?c
{ _ head 0 c * eq } "^" c cat install
} each
@@ -73,6 +74,9 @@
} { ^+ } {
l l star sequence =l
tail
+ } { ^? } {
+ l empty alternative =l
+ tail
} { 1 } {
a l sequence =a
atom =l
@@ -235,20 +239,54 @@
}" { -- }" ? *
}' /add deffst
+ { ==i # ==thread
+ _ threadGetPC pcFree {
+ _ i |get =[]
+ 0 -01 threadGetPC |pcFree =[]
+ 1
+ }" { -- 0 }" ? *
+ } /update deffst
+
{
0 =size
[ maxSize { 1 }" rep ] =pcFree
}' /clear deffst
> } /threadList deffd
+ { < ==maxSize
+ 0 ==size
+ [ maxSize { 0 }" rep ] =*get
+ [ maxSize { 1 }" rep ] =*pcFree
+
+ { # ==thread
+ _ threadGetPC pcFree {
+ _ size |get =[]
+ 0 -01 threadGetPC |pcFree =[]
+ size 1 origadd =size
+ }" { -- }" ? *
+ }' /push deffst
+
+ {
+ size 1 sub _ =size
+ get
+ }' /pop deffst
+
+ {
+ 0 =size
+ [ maxSize { 1 }" rep ] =pcFree
+ }' /clear deffst
+ > } /threadStack deffd
+
+ # TODO: reconsider clist/ilist and also reconsider optimisation potential
{ ==prog ==string
0 ==position
string len ==maxPosition
- 0 ==done
0 ==matched
+ < > ==matchedThread
prog len _ threadList ==clist
- threadList ==nlist
+ _ threadList ==nlist
+ threadStack ==ilist
newThread _ ==thread clist .add
@@ -258,40 +296,50 @@
[
{ # MATCH
1 =matched
+ thread =matchedThread
clist .clear
}" { # TERM
position maxPosition lt {
position string * 1 code * { pc 1 add thread cloneThread nlist .add }" rep
}" rep
}" { # JUMP
- pc 1 code add thread cloneThread clist .add
+ pc 1 code add thread cloneThread ilist .push
}" { # SPLIT
- pc 1 code add thread cloneThread clist .add
- pc 2 code add thread cloneThread clist .add
+ pc 2 code add thread cloneThread ilist .push
+ pc 1 code add thread cloneThread ilist .push
}" { # SAVE
pc 1 add thread fullCloneThread
position 1 code -2102 threadGetCaptures =[]
- clist .add
+ ilist .push
}" { # FIRST
- position 0 eq { pc 1 add thread cloneThread clist .add }" rep
+ position 0 eq { pc 1 add thread cloneThread ilist .push }" rep
}" { # LAST
- position maxPosition eq { pc 1 add thread cloneThread clist .add }" rep
+ position maxPosition eq { pc 1 add thread cloneThread ilist .push }" rep
}"
] =*codeSemantics
-
+
0 ==i
- { position maxPosition le done not and }" {
+ { position maxPosition le }" {
0 =i
- { i clist .size lt done not and }" {
+ { i clist .size lt }" {
i clist .get _ =thread
threadGetPC _ =pc
prog * =code
0 code codeSemantics *
i 1 add =i
+
+ { ilist .size }" {
+ ilist .pop _ =thread
+ threadGetPC _ =pc
+ prog * =code
+ 0 code codeSemantics *
+ }" loop
}" loop
+ # "Next input character ========" dump
clist nlist =clist =nlist
nlist .clear
+ ilist .clear
position 1 add =position
}" loop
@@ -299,8 +347,8 @@
currentCapture ==i
{ i } { i 1 sub =i
string
- i 2 mul thread threadGetCaptures * _ ==start -01 str .postfix
- i 2 mul 1 add thread threadGetCaptures * start sub -01 str .inplacePrefix
+ i 2 mul matchedThread threadGetCaptures * _ ==start -01 str .postfix
+ i 2 mul 1 add matchedThread threadGetCaptures * start sub -01 str .inplacePrefix
} loop
} rep
matched