diff options
| author | Drahflow <drahflow@gmx.de> | 2017-03-01 00:18:03 +0100 |
|---|---|---|
| committer | Drahflow <drahflow@gmx.de> | 2017-03-01 00:18:03 +0100 |
| commit | c8db228e59f890fcf79aed5176555c48c43da481 (patch) | |
| tree | 4132bbe21f620de3edfe817bd15cea0217d6bc98 /elymas | |
| parent | 5e081a4b2676fd456f7ff3c65b902eb61b1c7737 (diff) | |
WIP: new assembly written regex engine
Diffstat (limited to 'elymas')
| -rw-r--r-- | elymas/betterregex.ey | 753 |
1 files changed, 753 insertions, 0 deletions
diff --git a/elymas/betterregex.ey b/elymas/betterregex.ey new file mode 100644 index 0000000..d32b62d --- /dev/null +++ b/elymas/betterregex.ey @@ -0,0 +1,753 @@ +## regex support +# ideas taken from http://swtch.com/~rsc/regexp/regexp3.html +< + sys .asm "::" via + sys .asm .ops ":" via + sys .asm .ops .|label "@" deffd + { :labelRecord [ } "[[" deffd + { ] :labelResolve } "]]" deffd + + 0 ==:MATCH 1 ==:TERM 2 ==:JUMP 3 ==:SPLIT 4 ==:SAVE 5 ==:FIRST 6 ==:LAST + 7 ==:FLOATBEGIN 8 ==:TERMBEGIN + + { ==b ==a [ + [ SPLIT 1 a len 2 add ] + a _ len dearray + [ JUMP b len 1 add ] + b _ len dearray + ] } /alternative deffd + + |cat /sequence deffd + + { ==?a [ # TODO measure separate + implementation performance impact + [ JUMP a len 1 add ] + a _ len dearray + [ SPLIT a len neg 1 ] + ] } /star deffd + + { ==?p [ + [ TERM p ] + ] } /terminal deffd + + { -- 1 }" terminal ==:TERMANY + { { eq }_ terminal } =*:TERMCHAR + + { ==?i ==?a [ + [ SAVE i 2 mul ] + a _ len dearray + [ SAVE i 2 mul 1 add ] + ] } /capture deffd + + { [ ] } /empty deffd + + { ==?str + str len 0 eq { + 1 neg + } { + 0 str * + } ? * + } /head deffd + + { 1 -01 str .postfix } /tail deffd + + { 0 -01 * -101 head eq } "^" deffd + { deffd }' /install deffd + [ "(" ")" "[" "]" "-" "|" "^" "*" "+" "." "$" "\\" "?" ] { ==?c + { _ head 0 c * eq } "^" c cat install + } each + + { 0 -01 * }" /threadGetPC deffd + { 1 -01 * }" /threadGetCaptures deffd + + { #==thread ==newpc + [ -021 threadGetCaptures ] + }" /cloneThread deffd + + { #==thread ==newpc + 0 -1201 =[] + }" /updateThread deffd + + { #==thread ==newpc + [ -0201 threadGetCaptures _ len dearray ] ] + }" /fullCloneThread deffd + + |add /origadd deffd + str .|bitTest /bitTest deffd + str .|bitSet /bitSet deffd + str .|zero /zero deffd + + # TODO think about implementation efficiency + { ==maxSize + < + 0 ==size + [ maxSize { 0 }" rep ] =*get + maxSize 1 sub 8 udiv 1 add 8 mul str .alloc _ zero ==pcUsed + + { # ==thread + _ threadGetPC pcUsed bitTest { -- }" { + _ size |get =[] + threadGetPC pcUsed bitSet + size 1 origadd =size + }" ? * + }' /add deffst + + { + size 1 sub _ =size + get + }' /pop deffst + + { + 0 =size + pcUsed zero + }' /clear deffst + > } /threadList deffd + + { + 0 ==?currentCapture + + { # "(parse) re: " -101 cat dump + + seq ==?a + ^| { + tail parse ==?b + a b alternative =a + } rep + a + } /parse deffst + + { # "(seq) re: " -101 cat dump + + empty _ ==?a + ==?l + + { # "(seq loop) re: " -101 cat dump + _ head 1 neg eq -01 + ^| -01 + ^) -01 + -0321 or or not + } { + [ { ^* } { + l star =l + tail + } { ^+ } { + l l star sequence =l + tail + } { ^? } { + l empty alternative =l + tail + } { 1 } { + a l sequence =a + atom =l + } ] conds + } loop + a l sequence + } /seq deffst + + { # "(atom) re: " -101 cat dump + empty ==?a + + [ { ^( } { + currentCapture ==thisCapture + currentCapture 1 add =currentCapture + tail parse thisCapture capture =a + ^) not { ") expected" die } rep + tail + } { ^[ } { + tail + ^^ { + tail chars =*nset + { nset not }' ==?set + ^] not { "] expected" die } rep + tail + }' { + chars ==?set + ^] not { "] expected" die } rep + tail + }' ? * + set terminal =a + } { ^. } { + TERMANY =a + tail + } { ^^ } { + [ [ FIRST ] ] =a + tail + } { ^$ } { + [ [ LAST ] ] =a + tail + } { ^\ } { + tail + [ { ^d } { + { _ 0 "0" * ge -01 0 "9" * le and }" terminal =a + tail + } { ^n } { + { 0 "\n" * eq }" terminal =a + tail + } + [ "." "[" "]" "?" "*" "+" "$" "^" "\\" "(" ")" ] { ==c + { _ head 0 c * eq } { + { 0 c * eq } terminal =a + tail + } + } each + { 1 } { + "invalid character '" "' after \\ in regex" -120 cat cat die + } ] conds + } { 1 } { + _ head TERMCHAR =a + tail + } ] conds + + # "(atom end) re: " -101 cat dump + a + } /atom deffst + + { # "(chars) re: " -101 cat dump + ^] { + tail chars2 =*s + { _ s -01 0 "]" * eq or }' ==?set + }' { + chars2 ==?set + }' ? * + set + } /chars deffst + + { # "(chars2) re: " -101 cat dump + ^- { + tail chars2 =*s + { _ s -01 0 "-" * eq or }' ==?set + }' { + charsR ==?set + }' ? * + set + } /chars2 deffst + + { # "(charsR) re: " -101 cat dump + charsN ==?set + { ^] not } { + set =*s1 + charsN =*s2 + { _ s1 -01 s2 or }' =set + } loop + set + } /charsR deffst + + { # "(charsN) re: " -101 cat dump + _ head ==?start + ^\ { + tail + [ { ^\ } { + 0 "\\" * =start + } { ^n } { + 0 "\n" * =start + } { 1 } { + "invalid character '" "' after \\ in regex" -120 cat cat die + } ] conds + } rep + tail + ^- { + tail + _ head ==?end + ^\ { + tail + [ { ^\ } { + 0 "\\" * =end + } { ^n } { + 0 "\n" * =end + } { 1 } { + "invalid character '" "' after \\ in regex" -120 cat cat die + } ] conds + } rep + tail + { _ start ge -01 end le and }' ==?set + }' { + { start eq }' ==?set + }' ? * + set + } /charsN deffst + + { [ + 0 # pc + [ currentCapture { 0 0 } rep ] # captures + ] }" /newThread deff + + # TODO: reconsider clist/ilist and also reconsider optimisation potential + # { ==string + # 0 ==position + # string len ==maxPosition + # 0 ==matched + # [ ] ==matchedThread + + # clist .clear + # nlist .clear + # ilist .clear + + # newThread _ ==thread clist .add + + # 0 ==pc + # [ ] =*code + + # ilist .|add =*iPush + # ilist .|pop =*iPop + + # { + # { ilist .size }" { + # iPop _ =thread + # threadGetPC _ =pc + # prog * =code + # 0 code codeSemantics * + # }" loop + # }" /runIList deffst + + [ + { -- [[ # MATCH + # some stack hacking to return directly from regex function + 8 /rsp :addqImm8Reg + + # generate capture group contents (if any) + currentCapture 0 gt { + 16 /r15 :subqImmReg + + 24 /rsi :subqImm8Reg # rsi == string matched + /rsi 8 /r15 :movqRegMemDisp8 + /rbp /r15 :movqRegMem + + 0 currentCapture range reverse { ==i + /r15 /rbp :movqMemReg + i 16 mul /rbp /rax :movqMemDisp32Reg + 63 /rax :btsqImm8Reg + /rax :pushqReg + i 16 mul 8 add /rbp /rax :movqMemDisp32Reg + 63 /rax :btsqImm8Reg + /rax :pushqReg + 8 /r15 :pushqMemDisp8 + + str .|infix sys .asm .rawAddress /rax :movqImmReg + 24 /rax /rax :movqMemDisp8Reg + 16 /rax :addqImm8Reg + /rax :callqReg + } each + + 16 /r15 :addqImmReg + } rep + + 1 /rax :movqImmReg + 63 /rax :btsqImm8Reg + /rax :pushqReg + + /r15 :pushqMem + 8 /r15 :addqImm8Reg + :retn + ]] } + { 1 -01* ==?p [[ # TERM + 0 256 range p grep ==acceptedChars + # TODO: optimize a few common cases instead of always rendering bitmaps + + # rcx == length of string + # rdx == index of character under test + + /rdx /rcx :cmpqRegReg + /stringExhausted :jleLbl8 + + /rax /rax :xorqRegReg + /rdx /rsi /al :movbMemIndexReg + /rax /bitfield :btqRegRipLbl32 + /notMatched :jncLbl8 + + programCounter 1 add /rdi :movqImmReg + pushToNlist sys .asm .rawAddress 24 add /rax :movqImmReg + /rax :callqReg + + @stringExhausted + @notMatched + :retn + + @bitfield + 0 32 range { ==i + [ 0 8 range { i 8 mul add p * } each ] 2 math .unbase + } each + # position maxPosition lt { + # position string * 1 code * { pc 1 add thread updateThread nlist .add }" rep + # }" rep + ]] } + # }" { # TERM + # position maxPosition lt { + # position string * 1 code * { pc 1 add thread updateThread nlist .add }" rep + # }" rep + { ==instr [[ # JUMP + programCounter 1 instr * add 8 mul /r10 /rax :movqMemDisp32Reg + 24 /rax :addqImm8Reg + /rax :jmpqReg + + # pc 1 code add thread cloneThread iPush + ]] } + { ==instr [[ # SPLIT + prog len 64 add programCounter 1 instr * add add /rdi :movqImmReg + /rdi /r9 :btsqRegMem + /firstTargetRedundant :jcLbl8 + programCounter 1 instr * add 8 mul /r10 /rax :movqMemDisp32Reg + 24 /rax :addqImm8Reg + /rax :callqReg + + @firstTargetRedundant + prog len 64 add programCounter 2 instr * add add /rdi :movqImmReg + /rdi /r9 :btsqRegMem + /secondTargetRedundant :jcLbl8 + programCounter 2 instr * add 8 mul /r10 /rax :movqMemDisp32Reg + 24 /rax :addqImm8Reg + /rax :jmpqReg + + @secondTargetRedundant + :retn + # pc 2 code add thread cloneThread iPush + # pc 1 code add thread cloneThread iPush + ]] } + { ==instr [[ # SAVE + allocateCaptureGroup sys .asm .rawAddress 24 add /rax :movqImmReg + /rax :callqReg + # rax == newly allocated capture object + + /r11 /rbx :movqRegReg + 4 /rbx :shlqImm8Reg + /rax entryPointsOffset 8 add 1 /rbx /r8 :movqRegMemIndexScaleDisp32 + + 0 captureInfoSize 8 div range { 8 mul ==offset + offset /rbp /rbx :movqMemDisp32Reg + /rbx offset /rax :movqRegMemDisp32 + } each + /rax /rbp :movqRegReg # rbp capture object (as usual) + + # actually save new position into capture info + /rdx 1 instr * 8 mul /rbp :movqRegMemDisp32 + + programCounter 1 add 8 mul /r10 /rax :movqMemDisp32Reg + 24 /rax :addqImm8Reg + /rax :jmpqReg + # pc 1 add thread fullCloneThread + # position 1 code -2102 threadGetCaptures =[] + # iPush + ]] } + { /TODO:FIRST die } + # }" { # FIRST + # position 0 eq { pc 1 add thread cloneThread iPush }" rep + { /TODO:LAST die } + # }" { # LAST + # position maxPosition eq { pc 1 add thread cloneThread iPush }" rep + { -- [[ # FLOATBEGIN + programCounter 1 add 8 mul /r10 /rax :movqMemDisp32Reg + 24 /rax :addqImm8Reg + /rax :callqReg + + programCounter /rdi :movqImmReg + pushToNlist sys .asm .rawAddress 24 add /rax :movqImmReg + /rax :callqReg + :retn + ]] } + { /TODO:TERMBEGIN die } + # }" { # TERMBEGIN + # 1 code /p deffs + # nlist .size not clist .size 1 eq and { + # { position maxPosition lt { position string * p not }" { 0 }" ? * }" { + # position 1 add =position + # }" loop + # }" rep + + # position maxPosition lt { + # position string * p { + # pc 1 add thread cloneThread nlist .add + # }" rep + # thread nlist .add + # }" rep + # }" + ] =*codeSemantics + + # 0 ==i + # { position maxPosition le }" { + # 0 =i + + # { i clist .size lt }" { + # i clist .get _ =thread + # threadGetPC _ =pc + # prog * =code + # 0 code codeSemantics * + # i 1 add =i + + # runIList + # }" loop + + # # "Next input character ========" dump + # clist nlist =clist =nlist + # nlist .clear + # ilist .clear + # position 1 add =position + # }" loop + + # matched { + # currentCapture ==i + # { i } { i 1 sub =i + # i 2 mul matchedThread threadGetCaptures * + # i 2 mul 1 add matchedThread threadGetCaptures * + # string str .infix + # } loop + # } rep + # matched + # }' /execute defvst + + parse ==prog -- + # handling of pattern starts, TODO: reenable + # prog 0 -01 * 0 -01 * FIRST eq { + # [ + # 1 prog len range { prog * } each + # ] =prog + # } { + # prog 0 -01 * 0 -01 * TERM eq { + # [ + # [ TERMBEGIN 1 0 prog * * ] + # 1 prog len range { prog * } each + # ] =prog + # } { + [ + [ FLOATBEGIN ] + ] prog cat =prog + # } ? * + # } ? * + + prog [ + [ MATCH ] + ] cat =prog + + # prog dump + + # data format for thread lists + # 0: current fill + # 8: entry point already present in list bitfield + # 8 + proglen / 8: entry point split to in previous iteration + # (this is just a handy memory area to use for this, see SPLIT code) + # 8 + to_8_bytes(proglen / 4): + # 0: thread entry point (as index into prog/progCode) + # 8: thread capture pointer + 8 + prog len 2 mul 1 sub 64 div 1 add 8 mul add _ ==entryPointsOffset + prog len 16 mul add + _ str .alloc ==clist + str .alloc ==nlist + + # data for capture groups, this is a memory buffer managed as a free + # list which persists across regex invocations. + # 0: address of first free capture group, or 0 if no free block is known + # 8, 16: start, end index in string (or address of next free group) + # TODO: Measure memory impact and think whether total count can be reduced, + # e.g. to number of SAVE instructions times capture entries + 8 + currentCapture 16 mul _ ==captureInfoSize prog len _ ==captureInfoCount mul + add str .alloc _ str .zero ==captures + + [[ + # rdi == target program instruction index + # rbp == address of capture object to use + 64 /rdi /rax :leaqMemDisp8Reg + /rax /r9 :btsqRegMem + /alreadyQueued :jcLbl8 + + /r9 /rax :movqMemReg + /r9 :incqMem + 4 /rax :shlqImm8Reg + entryPointsOffset /rax :addqImm32Reg + /rdi /rax /r9 :movqRegMemIndex + /rbp 8 1 /rax /r9 :movqRegMemIndexScaleDisp32 + + @alreadyQueued + :retn + ]] str .fromArray ==pushToNlist + + [[ currentCapture 0 gt { + @restartAfterCollection + captures sys .asm .rawAddress 24 add /rdi :movqImmReg + /rax /rax :xorqRegReg + /rdi /rax :orqMemReg + /runCollection :jzLbl8 + + /rax /rbx :movqMemReg + /rbx /rdi :movqRegMem + :retn + + @runCollection + # Mark all blocks used by entries from clist and nlist + clist sys .asm .rawAddress 24 add /rdi :movqImmReg + /markCaptureInfo :callqLbl32 + nlist sys .asm .rawAddress 24 add /rdi :movqImmReg + /markCaptureInfo :callqLbl32 + + # Collect unmarked blocks into free list + captures sys .asm .rawAddress 24 add /rdi :movqImmReg + 8 captureInfoSize captureInfoCount mul add /rdi /rbx :leaqMemDisp32Reg # rbx == end of capture memory + 8 /rdi /rax :leaqMemDisp8Reg # rax == block tested for mark bit + + @collectLoop + /rbx /rax :cmpqRegReg + /collectLoopDone :jgeLbl8 + + 63 /rax :btqImm8Mem + /blockNotFree :jcLbl8 + + /rdi /rdi :movqMemReg + /rdi /rax :movqRegMem + captures sys .asm .rawAddress 24 add /rdi :movqImmReg + /rax /rdi :movqRegMem + + @blockNotFree + captureInfoSize /rax :addqImm32Reg + /collectLoop :jmpLbl8 + @collectLoopDone + + # Reset all mark bits + 8 /rdi /rax :leaqMemDisp8Reg # rax == block to reset mark bit in + @resetLoop + /rbx /rax :cmpqRegReg + /resetLoopDone :jgeLbl8 + 63 /rax :btrqImm8Mem + captureInfoSize /rax :addqImm32Reg + /resetLoop :jmpLbl8 + @resetLoopDone + /restartAfterCollection :jmpLbl32 + + @markCaptureInfo + /rdi /rbx :movqMemReg # rbx == number of entries + 4 /rbx :shlqImm8Reg + entryPointsOffset /rdi /rbx :leaqMemDisp32Reg # rbx == end of list + entryPointsOffset 8 add /rdi :addqImm32Reg # rdi == capture group of first entry + + @markLoop + /rdi /rbx :cmpqRegReg + /markLoopDone :jleLbl8 + /rdi /rax :movqMemReg # rax == address of capture information + 63 /rax :btsqImm8Mem # mark topmost bit + 8 /rdi :addqImm8Reg + /markLoop :jmpLbl8 + @markLoopDone + :retn + } rep ]] str .fromArray ==allocateCaptureGroup + + 0 ==programCounter # available during assembling + [ prog { + 0 -101* codeSemantics * str .fromArray programCounter 1 add =programCounter + } each ] ==progCode + + [[ + 8 /r15 :subqImm8Reg + /r15 :popqMem + + clist sys .asm .rawAddress 24 add /r8 :movqImmReg # r8 == list of current threads + nlist sys .asm .rawAddress 24 add /r9 :movqImmReg # r9 == list of next threads + progCode sys .asm .rawAddress 8 add /r10 :movqImmReg # r10 == start of program list + + /r8 /rdi :movqRegReg + /clearThreadList :callqLbl32 + /r9 /rdi :movqRegReg + /clearThreadList :callqLbl32 + + # add thread starting at start of program + currentCapture 0 gt { + allocateCaptureGroup sys .asm .rawAddress 24 add /rax :movqImmReg + /rax :callqReg + /rax entryPointsOffset 8 add /r8 :movqRegMemDisp32 + + 0 captureInfoSize 8 div range { 8 mul ==offset + 0 offset /rax :andqImm8MemDisp32 + } each + } rep + + /r8 :incqMem + 1 8 /r8 :addqImm8MemDisp8 + 0 entryPointsOffset /r8 :andqImm8MemDisp32 + + /rsi :popqReg # string to match + 16 /rsi /rcx :movqMemDisp8Reg # rcx == length of string + 24 /rsi /rsi :leaqMemDisp8Reg # rsi == string to match (actual content) + /rdx /rdx :xorqRegReg # rdx == index of character under test + + @matchLoop + # r8 / r9: current/next thread lists + # rsi, rdx: string buffer, character index + # rcx: length of string + /rdx /rcx :cmpqRegReg + /stringExhausted :jngeLbl8 + + /r11 /r11 :xorqRegReg # r11 == index in clist to be run next + + @clistLoop + /r11 /r8 :cmpqRegMem + /clistFinished :jleLbl8 + + /r11 /rax :movqRegReg + 4 /rax :shlqImm8Reg + entryPointsOffset 1 /rax /r8 /r12 :movqMemIndexScaleDisp32Reg + entryPointsOffset 8 add 1 /rax /r8 /rbp :movqMemIndexScaleDisp32Reg # rbp == capture object + 8 /r12 /r10 /rax :movqMemIndexScaleReg + 24 /rax :addqImm8Reg + /rax :callqReg + + /r11 :incqReg + /clistLoop :jmpLbl8 + + @clistFinished + /r8 /r9 :xchgqRegReg + /r9 /rdi :movqRegReg + /clearThreadList :callqLbl32 + + /rdx :incqReg + /matchLoop :jmpLbl32 + + @stringExhausted + + /rax /rax :xorqRegReg # TODO fake resultg + 63 /rax :btsqImm8Reg # TODO fake resultg + /rax :pushqReg # TODO fake resultg + + /r15 :pushqMem + 8 /r15 :addqImm8Reg + :retn + + @clearThreadList # TODO: maybe inline + # rdi == thread list to clear + # clobbered: rax, rcx + /rax /rax :xorqRegReg + /rcx :pushqReg + 16 prog len 2 mul 1 sub 8 div 1 add add 1 sub 8 div 1 add /rcx :movqImmReg + :reprcx + :stosq + /rcx :popqReg + :retn + ]] [ scope ] sys .asm .createFunction + } +> -- /enregex deffd + +{ + quoted { + _ sys .typed .type 1 eq { + enregex + } { |enregex "*" | } ? * + } { enregex * } ? * +} /regex defq + +# { -1010 dump dump eq not { "ASSERT" die } rep } /assert deffst +# "d" "." regex 1 assert +# +# "a" "b" regex 0 assert +# "a" "a" regex 1 assert +# "abc" "ac" regex 0 assert +# "abc" "abc" regex 1 assert +# "abc" "bc" regex 1 assert +# "b" "a|b|c" regex 1 assert +# "d" "a|b|c" regex 0 assert +# "d" "." regex 1 assert +# "a" "(a)" regex 1 assert "a" assert +# +# "foo bar" "(.*) b(ar|yolo)" regex 1 assert "foo" assert "ar" assert +# "foo zar" "(.*) b(ar|yolo)" regex 0 assert +# "fofoobaz" "foo" regex 1 assert + +# Target time: Perl takes 0.32s +# Old regex engine takes: 33.252s +2000000 { "aoetauhsontuhaoeuhnathoesuhasonetuhaohteustaohesutahoseathsoeuahoeaunoheutahoseunathoeutha oeusaoeuhsaoteuhsatoheusaotneuhsatueh " " " regex -- } rep + +# vim: syn=elymas |
