diff options
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | compiler/elymasAsm.ey | 17 | ||||
| -rw-r--r-- | compiler/elymasAsmLib.ey | 167 | ||||
| -rw-r--r-- | compiler/elymasGlobal.ey | 188 | ||||
| -rw-r--r-- | compiler/elymasGlobalSys.ey | 1 | ||||
| -rw-r--r-- | compiler/elymasGlobalSysOpt.ey | 144 | ||||
| -rw-r--r-- | compiler/standardClient.ey | 42 | ||||
| -rw-r--r-- | elymas/Makefile | 7 | ||||
| -rw-r--r-- | elymas/lib/sys/opt.ey | 413 | ||||
| -rw-r--r-- | elymas/optimized.ey | 8 | ||||
| -rw-r--r-- | notes | 1 |
11 files changed, 766 insertions, 226 deletions
@@ -1,7 +1,7 @@ all: elymas/loaded -elymas/loaded: elymas/interpreter $(shell find elymas/lib/ -name '*.ey' ) - cd elymas && $(MAKE) loaded +elymas/loaded: elymas/interpreter $(shell find elymas/ -name '*.ey' ) + cd elymas && $(MAKE) elymas/interpreter: elymas/interpreter.ey compiler/*.ey interpreter/Metal.so interpreter/ACME cd compiler && \ diff --git a/compiler/elymasAsm.ey b/compiler/elymasAsm.ey index e1f7d5a..ca3bdd7 100644 --- a/compiler/elymasAsm.ey +++ b/compiler/elymasAsm.ey @@ -62,7 +62,7 @@ -01 0 gt %08 mul add } /rex deff - { =mem =reg + { ==mem ==reg mem [ /spl /sp /esp /rsp /bpl /bp /ebp /rbp ] streq any { "modrm00 not possible on rsp / rbp and their partial registers" die } rep @@ -72,7 +72,7 @@ reg regno 8 mul %38 band add } /modrm00 deff - { =mem =reg + { ==mem ==reg mem [ /spl /sp /esp /rsp ] streq any not assert %40 @@ -80,7 +80,7 @@ reg regno 8 mul %38 band add } /modrm01 deff - { =mem =reg + { ==mem ==reg mem [ /spl /sp /esp /rsp ] streq any not assert %80 @@ -88,7 +88,7 @@ reg regno 8 mul %38 band add } /modrm10 deff - { =mem =reg + { ==mem ==reg %C0 mem regno %07 band add reg regno 8 mul %38 band add @@ -1091,6 +1091,15 @@ %0B } /ud2 deff + { ==regb ==rega + rega bit64assert + regb bit64assert + + 1 rega /none regb rex + %87 + rega regb modrm11 + } /xchgqRegReg deff + { ==mem ==reg reg bit64assert mem bit64assert diff --git a/compiler/elymasAsmLib.ey b/compiler/elymasAsmLib.ey index f2f597f..e54c5c0 100644 --- a/compiler/elymasAsmLib.ey +++ b/compiler/elymasAsmLib.ey @@ -82,7 +82,7 @@ { %00 %00 %00 %00 %00 %50 %00 %00 } /BLOCKBASE deff { %00 %00 %00 %00 %00 %40 %00 %00 } /MARKBASE deff # 4096 16 mul 8 mul ==ALLOCCHUNKSIZE # minimum ALLOCCHUNKSIZE - 4096 16 mul 8 mul 16 mul ==ALLOCCHUNKSIZE # FIXME: there is still some wonkyness with freezing + 4096 16 mul 8 mul 256 mul ==ALLOCCHUNKSIZE # FIXME: there is still some wonkyness with freezing < # current end of heap memory (grows upwards) @@ -178,8 +178,8 @@ [[ /rbx :pushqReg # /rdi :pushqReg # TODO remove these three lines once load-testing the GC seems unnecessary - # /markAndSweep :callqLbl32 - # /rdi :popqReg + # /markAndSweep :callqLbl32 # load testing + # /rdi :popqReg # load testing /rdi :pushqReg /searchForFreeBlock :callqLbl32 /rax /rax :andqRegReg @@ -231,32 +231,31 @@ /rsi /rsi :xorqRegReg # rsi > 0 => currently counting block extent of free block - @testBlockBitLoop + @searchFreeBlockStart /rbx /r8 :btqRegMem # test block bitmap - /nonFreeBlock :jcLbl8 # block not free - /rsi /rsi :andqRegReg - /notCurrentlyCounting :jzLbl8 + /nonFreeBlockStartFound :jcLbl8 # block not free + /rbx /r9 :btqRegMem # test mark bitmap + /freeBlockStartFound :jcLbl8 # block marked, i.e. truly free block start - @currentlyCounting - 16 /rsi :addqImm8Reg - /rsi /rdi :cmpqRegReg - /freeBlockFound :jbeLbl8 + @resumeFreeBlockStartSearch + /rsi /rsi :xorqRegReg + @nonFreeBlockStartFound /rbx :incqReg /rbx /rbp :cmpqRegReg - /testBlockBitLoop :jaLbl8 + /searchFreeBlockStart :jaLbl8 /noFreeBlockAvailable :jmpLbl32 - @notCurrentlyCounting - /rbx /r9 :btqRegMem # test mark bitmap - /currentlyCounting :jcLbl8 # block marked, i.e. truly free block start - - @nonFreeBlock - /rsi /rsi :xorqRegReg + @freeBlockStartFound + 16 /rsi :addqImm8Reg + /rsi /rdi :cmpqRegReg + /freeBlockFound :jbeLbl8 /rbx :incqReg /rbx /rbp :cmpqRegReg - /testBlockBitLoop :jaLbl8 - /noFreeBlockAvailable :jmpLbl32 + /noFreeBlockAvailable :jbeLbl32 + /rbx /r8 :btqRegMem # test block bitmap + /resumeFreeBlockStartSearch :jcLbl8 # block not free + /freeBlockStartFound :jmpLbl8 @freeBlockFound # rdi == size of block to allocate @@ -407,7 +406,7 @@ @loopThroughMainStack 8 /rsi :subqImm8Reg /rsi /rdi :movqMemReg - /markObject :callqLbl32 + /markStackObject :callqLbl32 /rsi /rsp :cmpqRegReg /loopThroughMainStack :jbLbl8 @@ -415,7 +414,7 @@ @loopThroughCallStack 8 /rsi :subqImm8Reg /rsi /rdi :movqMemReg - /markObject :callqLbl32 + /markStackObject :callqLbl32 /rsi /r15 :cmpqRegReg /loopThroughCallStack :jbLbl8 @@ -424,7 +423,7 @@ 0 /rsi :cmpqImm8Mem /quoteEncodingBufferUnused :jzLbl8 - :STACKSIZE 8 sub /rcx :movqImmReg + :STACKSIZE 8 sub 8 div /rcx :movqImmReg @loopThroughEncodingBuffer /rsi /rdi :movqMemReg /markObject :callqLbl32 @@ -460,6 +459,33 @@ /r8 :popqReg :retn + # recursively mark this object reachable + # guaranteed not to clobber rcx, rsi (because it is used in many loops) + @markStackObject + # rdi == address of a reachable object, of reachable code within a code block or some random bits + /rdi /r8 :cmpqRegReg + /markObjectDone :jaLbl32 # pointing below the heap + /rdi /r11 :cmpqRegReg + /markObjectDone :jbeLbl32 # pointing above the heap + # rdi == address of a reachable object or of reachable code within a code block + # scan for object downwards + /r8 /rdi :subqRegReg + # rdi == byte offset relative to heap begin + 4 /rdi :shrqImm8Reg + # rdi == cell index of first 16-byte cell of possible object + + @searchStackObject + /rdi /r9 :btqRegMem # test block bit + /rdi :decqReg + /searchStackObject :jncLbl8 + /rdi :incqReg + # rdi == cell index of first 16-byte cell of object + + # TODO optimize this by jumping right into markObject + 4 /rdi :shlqImm8Reg + /r8 /rdi :addqRegReg + /markObject :jmpLbl8 + @markObjectDone :retn @@ -1025,6 +1051,60 @@ ] /internalAllocateString defv > { defv }' allocateOffsetStruct + < + # allocate a new code block from the encoding buffers + # rdi -> end of allocated code (address after last used opcode in quoteEncodingBufferCode) + # rbp -> end of allocated object pointers (address after last used address in quoteEncodingBufferObjects) + # rax <- resulting code object + [[ + :quoteEncodingBufferCode /rax :movqImmReg + /rax /rdi :subqRegReg + + /rdi :decqReg + 3 /rdi :shrqImm8Reg + /rdi :incqReg + 3 /rdi :shlqImm8Reg + + :quoteEncodingBufferObjects /rax :movqImmReg + /rax /rbp :subqRegReg + + /rbp /rbp :andqRegReg + /atLeastOnePointer :jnzLbl8 + + /rbp /rax :movqRegMem # store fake zero pointer + 8 /rbp :addqImm8Reg + + @atLeastOnePointer + + /rbp :pushqReg # store pointer byte count + /rdi :pushqReg # store opcode byte count (rounded up to 8 byte) + + /rbp /rdi :addqRegReg + internalAllocateCode /rax :movqImmReg + /rax :callqReg + + # rax == code block on heap + + # copy opcodes + :quoteEncodingBufferCode /rsi :movqImmReg + 16 /rax /rdi :leaqMemDisp8Reg + /rcx :popqReg + /rcx 8 /rax :movqRegMemDisp8 + :reprcx :movsb + + # copy object pointers + :quoteEncodingBufferObjects /rsi :movqImmReg + /rcx :popqReg + :reprcx :movsb + + # mark buffer unused + :quoteEncodingBufferObjects /rdi :movqImmReg + /rcx /rdi :movqRegMem # rcx is conveniently zero + + :retn + ]] /internalAllocateCodeFromEncodingBuffer defv + > { defv }' allocateOffsetStruct + [ 8 /r15 :subqImm8Reg /r15 :popqMem @@ -1059,47 +1139,6 @@ :retn ] /unscopingFunctionFooter defv - # rdi -> end of allocated code (address after last used opcode in quoteEncodingBufferCode) - # rbp -> end of allocated object pointers (address after last used address in quoteEncodingBufferObjects) - # rax <- resulting code object - { - :quoteEncodingBufferCode /rax :movqImmReg - /rax /rdi :subqRegReg - - :quoteEncodingBufferObjects /rax :movqImmReg - /rax /rbp :subqRegReg - - /rdi :decqReg - 3 /rdi :shrqImm8Reg - /rdi :incqReg - 3 /rdi :shlqImm8Reg - - /rbp :pushqReg # store pointer byte count - /rdi :pushqReg # store opcode byte count (rounded up to 8 byte) - - /rbp /rdi :addqRegReg - internalAllocateCode /rax :movqImmReg - /rax :callqReg - - # rax == code block on heap - - # copy opcodes - :quoteEncodingBufferCode /rsi :movqImmReg - 16 /rax /rdi :leaqMemDisp8Reg - /rcx :popqReg - /rcx 8 /rax :movqRegMemDisp8 - :reprcx :movsb - - # copy object pointers - :quoteEncodingBufferObjects /rsi :movqImmReg - /rcx :popqReg - :reprcx :movsb - - # mark buffer unused - :quoteEncodingBufferObjects /rdi :movqImmReg - /rcx /rdi :movqRegMem # rcx is conveniently zero - } /allocateCodeFromEncodingBuffer deff - { strToUTF8Bytes _ =*v len _ ==exactLength 1 sub 8 div 4 add 8 mul ==memoryLength diff --git a/compiler/elymasGlobal.ey b/compiler/elymasGlobal.ey index 2918a19..eb5fe8b 100644 --- a/compiler/elymasGlobal.ey +++ b/compiler/elymasGlobal.ey @@ -150,6 +150,7 @@ /rdx :popqReg /rax /rax :testqRegReg /nameOffsetKnown :jnzLbl8 + /nameSearch :jmpLbl8 # if not exists, insert @nameUndefined @@ -1448,138 +1449,48 @@ /rdi :jmpqReg # continue with freshly patched code ]] /internalExecuteIdentifierUnquotedAndPatchLateResolve defv - # optimize function code by - # * patching double redirects, # FIXME correctly detect out-of-heap even if below heap + # call function optimize hook on code block # 0 -> address of code block [[ - /rbx :popqReg - /rsi :popqReg -# FIXME FIXME FIXME this should be reenabled soonish -# 8 /rsi :addqImm8Reg # move to start of code -# /rsi :pushqReg -# :quoteEncodingBuffer /rdi :movqImmReg -# -# %34 /rcx :movqImmReg # skip to generated code -# :reprcx :movsb -# -# @parseLoop -# 0 [ 0 :callqRel32 ] * 0 /rsi :cmpbImmMemDisp8 -# /parseFooter :jzLbl32 -# 1 [ 8 /rax :addqImm8Reg ] * 11 /rsi :cmpbImmMemDisp8 -# /parseFunctionCall :jzLbl32 -# 0 [ /rax :pushqReg ] * 10 /rsi :cmpbImmMemDisp8 -# /parseConstantPush :jzLbl32 -# 0 [ /rax :callqReg ] * 10 /rsi :cmpbImmMemDisp8 -# /parseConstantCall :jzLbl32 -# -# "unknown assembly instruction during internalOptimizeGeneratedScopedFunction" ::outputError -# :ud2 -# -# @parseFunctionCall -# 2 /rsi /rdx :movqMemDisp8Reg # load function address -# 1 [ 0 /rax :movqImmReg ] * 9 /rdx :cmpbImmMemDisp8 -# /parseFunctionCallSkip :jnzLbl32 -# 0 [ /rax :jmpqReg ] * 18 /rdx :cmpbImmMemDisp8 -# /parseFunctionCallSkip :jnzLbl32 -# 1 [ /rax :jmpqReg ] * 19 /rdx :cmpbImmMemDisp8 -# /parseFunctionCallSkip :jnzLbl32 -# -# 10 /rdx /rcx :movqMemDisp8Reg # load final function address -# /rax :movqImmOOBReg ::HEAPEND -# /rax /rcx :cmpqRegReg -# /parseFunctionUnGCable :jaeLbl8 -# -# # @parseFunctionGCable -# [ -# /rax :movqImmOOBReg -# ] ::loadToRdi -# 8 /rcx :subqImm8Reg # make address point to code block object -# /rcx /rdi :movqRegMem # load final function address -# 8 /rdi :addqImm8Reg -# [ -# 8 /rax :addqImm8Reg # skip code block object header -# /rax :callqReg -# ] ::loadToRdi -# /parseFunctionCallDone :jmpLbl8 -# -# @parseFunctionUnGCable -# [ -# /rax :movqImmOOBReg -# ] ::loadToRdi -# /rcx /rdi :movqRegMem # load final function address -# 8 /rdi :addqImm8Reg -# [ -# /rax :callqReg -# ] ::loadToRdi -# -# @parseFunctionCallDone -# 16 /rsi :addqImm8Reg -# /parseLoop :jmpLbl32 -# -# @parseFunctionCallSkip -# :movsq # verbatim copy -# :movsq -# /parseLoop :jmpLbl32 -# -# @parseConstantPush -# :movsq # verbatim copy -# :movsw -# :movsb -# /parseLoop :jmpLbl32 -# -# @parseConstantCall -# :movsq # verbatim copy -# :movsl -# /parseLoop :jmpLbl32 -# -# @parseFooter -# ::scopingFunctionFooter ::loadToRdi -# ::allocateCodeFromEncodingBuffer -# # rax == optimized code object on heap -# -# # patch indirection to new code into old -# /rsi :popqReg # rsi == old code start -# /rsi /rdi :movqRegReg -# [ -# /rax :movqImmOOBReg -# ] ::loadToRdi -# 8 /rax :addqImm8Reg -# /rax /rdi :movqRegMem -# 8 /rax :subqImm8Reg -# 8 /rdi :addqImm8Reg -# [ -# /rax :jmpqReg -# /rax :movqImmOOBReg -# ] ::loadToRdi -# /rax /rdi :movqRegMem -# -# # TODO kill remaining opcodes to remove GC-followable memory addresses -# # possible solution: updatable exact-length field for code blocks -# # but think of the return stack - /rbx :jmpqReg - ]] /internalOptimizeGeneratedScopedFunction defv - - # optimize function code by - # * TODO patching double redirects - # 0 -> address of code block - [[ - # :ud2 # FIXME enable here to debug function optimization - /rbx :popqReg - /rax :popqReg # FIXME do something useful here - /rbx :pushqReg - :retn - ]] /internalOptimizeGeneratedUnscopedFunction defv + 8 /r15 :subqImm8Reg + /r15 :popqMem - # optimize function code by - # * TODO patching double redirects - # 0 -> address of code block - [[ - # :ud2 # FIXME enable here to debug function optimization - /rbx :popqReg - /rax :popqReg # FIXME do something useful here - /rbx :pushqReg + /rax :popqReg + 58 /rax :btsqImm8Mem + /alreadyOptimized :jcLbl8 + /rax :pushqReg + /rax :pushqReg + + /rax :movqImmOOBReg "sys" ::string + /rax :pushqReg + /rax :movqImmOOBReg "internalCallOptimizeHook" "ey|" ::linkAbs64 + /rax :callqReg + + /rax :movqImmOOBReg "opt" ::string + /rax :pushqReg + /rax :movqImmOOBReg "internalCallOptimizeHook" "ey." ::linkAbs64 + /rax :callqReg + + /rax :movqImmOOBReg "hook" ::string + /rax :pushqReg + /rax :movqImmOOBReg "internalCallOptimizeHook" "ey." ::linkAbs64 + /rax :callqReg + + /rcx :popqReg # result object (not necessarily new code object though) + /rax :popqReg # old object + + /rax /rcx :cmpqRegReg + /optimizationHappened :jnzLbl8 + + 58 /rax :btrqImm8Mem # reset optimized bit + + @optimizationHappened + @alreadyOptimized + + /r15 :pushqMem + 8 /r15 :addqImm8Reg :retn - ]] /internalOptimizeGeneratedUncapturingFunction defv + ]] /internalCallOptimizeHook defv > { defv }' ::allocateOffsetStruct < @@ -1683,7 +1594,8 @@ /rax :callqReg ] ::unscopingFunctionFooter cat ::loadToRdi - ::allocateCodeFromEncodingBuffer + ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg + /rax :callqReg # rax == code block on heap # create non-capturing function object @@ -1777,7 +1689,8 @@ /rax :callqReg ] ::unscopingFunctionFooter cat ::loadToRdi - ::allocateCodeFromEncodingBuffer + ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg + /rax :callqReg # rax == code block on heap # create non-capturing function object @@ -2348,7 +2261,7 @@ ] ::loadToRdi :quoteEncodingBufferCode /rbx :movqImmReg /rdi /rax :movqRegReg - 5 /rax :addqImm8Reg + 13 /rax :addqImm8Reg /rbx /rax :subqRegReg /rax /rdi :movqRegMem 8 /rdi :addqImm8Reg @@ -2361,7 +2274,8 @@ ] ::loadToRdi footer ::loadToRdi - ::allocateCodeFromEncodingBuffer + ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg + /rax :callqReg # rax == code block on heap @@ -2421,7 +2335,8 @@ /rax :pushqReg ] ::unscopingFunctionFooter cat ::loadToRdi - ::allocateCodeFromEncodingBuffer + ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg + /rax :callqReg # rax == code object on heap @@ -2436,9 +2351,10 @@ /done :jmpLbl32 ]] - } _ 1 ::scopingFunctionHeader ::scopingFunctionFooter internalOptimizeGeneratedScopedFunction -32104* /ey} defv - _ 1 ::unscopingFunctionHeader ::unscopingFunctionFooter internalOptimizeGeneratedUnscopedFunction -32104* /ey}' defv - _ 0 ::unscopingFunctionHeader ::unscopingFunctionFooter internalOptimizeGeneratedUncapturingFunction -32104* /ey}" defv + } _ 1 ::scopingFunctionHeader ::scopingFunctionFooter internalCallOptimizeHook -32104* /ey} defv + _ 1 ::unscopingFunctionHeader ::unscopingFunctionFooter internalCallOptimizeHook -32104* /ey}' defv + _ 0 ::unscopingFunctionHeader ::unscopingFunctionFooter internalCallOptimizeHook -32104* /ey}" defv + _ 1 ::scopingFunctionHeader ::scopingFunctionFooter "ey--" | -32104* /ey}~ defv # TODO just to test the optimizer, remove this and optimize the optimizer while optimizing once everything works -- > _ ==globalMacros { defv }' ::allocateOffsetStruct diff --git a/compiler/elymasGlobalSys.ey b/compiler/elymasGlobalSys.ey index 2f2b763..3a1c988 100644 --- a/compiler/elymasGlobalSys.ey +++ b/compiler/elymasGlobalSys.ey @@ -37,6 +37,7 @@ ] :execute "elymasGlobalSysAsm.ey" include + "elymasGlobalSysOpt.ey" include "elymasGlobalSysTyped.ey" include leaveSubScope diff --git a/compiler/elymasGlobalSysOpt.ey b/compiler/elymasGlobalSysOpt.ey new file mode 100644 index 0000000..01fb1c0 --- /dev/null +++ b/compiler/elymasGlobalSysOpt.ey @@ -0,0 +1,144 @@ +< + "opt" enterSubScope + + < + # stub + # 0 -> code object + # 0 <- same code object (if different object, optimization would have taken place) + [[ + :retn + ]] /eyhook defv + + # replace code block by better code + # 0 -> code block to replate (and patch) + # 1 -> new opcodes as integer array + # 2 -> new references as array + [[ + 32 /r15 :subqImm8Reg + 24 /r15 :popqMemDisp8 # store return address + 16 /r15 :popqMemDisp8 # code block to patch + 8 /r15 :popqMemDisp8 # new opcode source + /r15 :popqMem # new references source + + # copy new opcodes + :quoteEncodingBufferCode /rdi :movqImmReg + + 8 /r15 /rsi :movqMemDisp8Reg + /rsi /ecx :movlMemReg + 8 /rsi :addqImm8Reg + 3 /rcx :shrqImm8Reg + /rcx :decqReg + /noOpcodesToCopy :jzLbl8 + + @opcodeCopyLoop + /rsi /rax :movqMemReg + 8 /rax /rax :movqMemDisp8Reg + :stosb + 8 /rsi :addqImm8Reg + /opcodeCopyLoop :loopLbl8 + + @noOpcodesToCopy + /rdi /rbp :movqRegReg + + # copy new references + :quoteEncodingBufferObjects /rdi :movqImmReg + + /r15 /rsi :movqMemReg + /rsi /ecx :movlMemReg + 8 /rsi :addqImm8Reg + 3 /rcx :shrqImm8Reg + /rcx :decqReg + /noReferencesToCopy :jzLbl8 + + @referencesCopyLoop + /rsi /rax :movqMemReg + 8 /rax /rax :movqMemDisp8Reg + :stosq + 8 /rsi :addqImm8Reg + /referencesCopyLoop :loopLbl8 + + @noReferencesToCopy + /rbp /rdi :xchgqRegReg + + ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg + /rax :callqReg + + # patch old code + 16 /r15 /rdi :movqMemDisp8Reg + 8 /rdi /rbp :movqMemDisp8Reg + 16 1 /rbp /rdi /rbp :leaqMemIndexScaleDisp8Reg + + # CHECK + /rdi /ecx :movlMemReg + /rdi /rcx :addqRegReg + /rcx /rbp :cmpqRegReg + /patchSpaceAvailable :jbLbl8 + + "attepmting to patch reference, but no space is available" ::outputError + :ud2 + + @patchSpaceAvailable + # END CHECK + + 16 /rdi :addqImm8Reg + + [ + /rax :movqImmOOBReg + ] ::loadToRdi + 16 /rax :addqImm8Reg + /rax /rdi :movqRegMem + 16 /rax :subqImm8Reg + /rax 0 /rbp :movqRegMemDisp8 + 8 /rdi :addqImm8Reg + 8 /rbp :addqImm8Reg + [ + /rax :jmpqReg + ] ::loadToRdi + + 24 /r15 :pushqMemDisp8 + 32 /r15 :addqImm8Reg + :retn + ]] /eyreplace defv + + # returns currentScope into userspace + # 0 <- ::currentScope as integer + [[ + /rbx :popqReg + + ::internalAllocateInteger /rax :movqImmReg + /rax :callqReg + /rax :pushqReg + + ::currentScope /rdx :movqImmReg + /rdx 8 /rax :movqRegMemDisp8 + + /rbx :pushqReg + :retn + ]] /eycurrentScope defv + + # returns internalAllocateScope into userspace + # 0 <- ::internalAllocateScope as integer + [[ + /rbx :popqReg + + ::internalAllocateInteger /rax :movqImmReg + /rax :callqReg + /rax :pushqReg + + ::internalAllocateScope /rdx :movqImmReg + /rdx 8 /rax :movqRegMemDisp8 + + /rbx :pushqReg + :retn + ]] /eyinternalAllocateScope defv + > _ ==globalFunctions { defv }' ::allocateOffsetStruct + + [ + globalFunctions keys eydeff { | }' createScopeEntries + createScopeExtensionEntries + ] :execute + + leaveSubScope +> -- + +# vim: syn=elymas diff --git a/compiler/standardClient.ey b/compiler/standardClient.ey index dffbbb3..87f2ab2 100644 --- a/compiler/standardClient.ey +++ b/compiler/standardClient.ey @@ -646,16 +646,17 @@ } each ] ==?toBeAbstractedTypes - "toBeAbstractedTypes: " dump - toBeAbstractedTypes dump + # "toBeAbstractedTypes: " dump + # toBeAbstractedTypes dump [ toBeAbstractedTypes { len } each ] any not { # no types need to be abstracted, function can be called - concreteArgs _ dump _ len dearray f - "attempting to call function (w.o. abstraction)" dump + concreteArgs # _ dump + _ len dearray f + # "attempting to call function (w.o. abstraction)" dump 0 concreteArgs len range { ==?i - i concreteArgs * sys .typed .type _ dump - i inputs * sys .typed .type _ dump + i concreteArgs * sys .typed .type # _ dump + i inputs * sys .typed .type # _ dump neq { "invalid input type at argument index " dump i dump "" die } rep } each * @@ -695,14 +696,14 @@ } loop } each - "concreteArgs: " dump - concreteArgs dump - "stageCalls: " dump - stageCalls dump - "argTypes: " dump - argTypes dump - "loops: " dump - loops dump + # "concreteArgs: " dump + # concreteArgs dump + # "stageCalls: " dump + # stageCalls dump + # "argTypes: " dump + # argTypes dump + # "loops: " dump + # loops dump { ==?loops ==?argTypes ==?stageCalls ==?concreteArgs stageCalls len not { @@ -725,7 +726,7 @@ [ concreteArgsCopy stageCalls argTypes loops unravel ] results -01 cat =results - results dump + # results dump # TODO: think about a single function returning multiple values # should be solved by producing two arrays side by side @@ -751,7 +752,7 @@ concreteArgs stageCalls argTypes loops unravel - "execution complete" dump + # "execution complete" dump } ? * } /execute sys .typed .deff > -- @@ -779,7 +780,7 @@ invalid # name table invalid # extension area { "<function: " o sys .asm .rawAddress base16encode64 cat ">" cat sys .err .writeall } - invalid # function code + { "<function code: " o sys .asm .rawAddress base16encode64 cat ">" cat sys .err .writeall } { "[\n" sys .err .writeall o { indent 1 add dumpIndented } each @@ -801,7 +802,7 @@ { 0 dumpIndented } > -- /dump deffd -{ ==filename # ==?f (left on the stack and executed from sys .asm .programStart) +{ ==filename # ==f (left on the stack and executed from sys .asm .programStart) sys .asm .patchProgramStart ==frozenAllocationCount # hex decoding @@ -998,6 +999,11 @@ } each out .close + + sys .asm .patchProgramStart frozenAllocationCount neq { + "freezing allocated new memory chunks, retrying..." dump + filename sys .freeze + } } /freeze sys .deff # no long-term stack use here as the executed program uses it as well diff --git a/elymas/Makefile b/elymas/Makefile index a5c13fb..08c7e69 100644 --- a/elymas/Makefile +++ b/elymas/Makefile @@ -1,2 +1,5 @@ -loaded: interpreter loaded.ey $(shell find lib -name '*.ey') - ./interpreter < loaded.ey +loaded: optimized loaded.ey $(shell find lib -name '*.ey') + ./optimized < loaded.ey || true + +optimized: interpreter optimized.ey $(shell find lib -name '*.ey') + ./interpreter < optimized.ey || true diff --git a/elymas/lib/sys/opt.ey b/elymas/lib/sys/opt.ey new file mode 100644 index 0000000..907b3fd --- /dev/null +++ b/elymas/lib/sys/opt.ey @@ -0,0 +1,413 @@ +< + "../compiler/elymasAsm.ey" include + + assembler ":" via + + [ + 8 /r15 :subqImm8Reg + /r15 :popqMem + ] ==:generalHeaderPattern + + [ + 8 /r15 :subqImm8Reg + 0 /rbx :movqImmReg + /rbx /rsi :movqMemReg + /rsi /r15 :movqRegMem + 8 /rdi :movqImmReg + 0 /rax :movqImmReg + /rax :callqReg + /rax /rbx :movqRegMem + ] ==:scopingHeaderPattern + + [ + 0 /rax :movqImmReg + /rax :pushqReg + ] ==:pushConstantPattern + + [ + 0 /rax :movqImmReg + /rax :callqReg + ] ==:callConstantPattern + + [ + 0 :callqRel32 + ] ==:footerPattern + + [ + /rbx :popqReg + 0 /rax :movqImmReg + /rax :pushqReg + /rbx :pushqReg + 0 /rax :movqImmReg + /rax :jmpqReg + ] ==:constantActiveGeneralPattern + + [ + 0 /rsi :movqImmReg + 16 /r15 :subqImm8Reg + 8 /r15 :popqMemDisp8 + + 0 /rax :movqImmReg + /rsi /rax :xchgqRegMem + /rsi /r15 :movqRegMem + 0 /rdi :movqImmReg + /rdi :callqReg + + /r15 /rsi :movqMemReg + 0 /rax :movqImmReg + /rsi /rax :movqRegMem + + 8 /r15 :pushqMemDisp8 + 16 /r15 :addqImm8Reg + :retn + ] ==:constantNormalFunctionScopedUntypedPattern + + [ + 0 /rax :movqImmReg + /rax :jmpqReg + ] ==:constantNormalFunctionUnscopedUntypedPattern + + [ + /rbx :popqReg + 0 /rax :movqImmReg + /rax :pushqReg + /rbx :jmpqReg + ] ==:constantPassivePattern + + [ + 0 /rax :movqImmReg + /rax /rax :movqMemReg + ] ==:staticLoadPattern + + [ + 16 /rax /rax :movqMemDisp8Reg + ] ==:staticLoadParentPattern + + [ + /rbx :popqReg + 0 /rax :pushqMemDisp32 + /rbx :jmpqReg + ] ==:staticLoadPassiveFromScopePattern + + [ + 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer + /rax /edx :movlMemReg # load scope length + /rdx :negqReg # prepare for substraction + /rbx :popqReg + 0 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack + /rbx :jmpqReg + ] ==:staticLoadPassiveFromExtensionPattern + + [ + /rbx :popqReg + 0 /rax :pushqMemDisp32 + /rbx :pushqReg + 0 /rax :movqImmReg + /rax :jmpqReg + ] ==:staticLoadActiveFromScopePattern + + [ + 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer + /rax /edx :movlMemReg # load scope length + /rdx :negqReg # prepare for substraction + /rbx :popqReg + 0 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack + /rbx :pushqReg + 0 /rax :movqImmReg + /rax :jmpqReg + ] ==:staticLoadActiveFromExtensionPattern + + [ + 8 /r15 :subqImm8Reg + /r15 :popqMem + ] ==:customFunctionHeaderPattern + + { ==o + sys .asm "+" via + sys .asm .|peek ==:peek + sys .opt "::" via + + o +rawAddress ==addr + # "Addr: " dump addr dump + [ addr _ 4 add range peek each ] 256 math .unbase ==totalLength + [ addr 8 add _ 4 add range peek each ] 256 math .unbase ==codeLength + # "Total length: " dump totalLength dump + # "Code length: " dump codeLength dump + addr 16 add ==i + + [ ] ==newOpcodes + { newOpcodes -01 cat =newOpcodes }' /emitOpcodes deffst + [ ] ==newReferences + { newReferences [ -102 ] cat =newReferences }' /emitReference deffst + + { =*ops =*set =*get ==pattern + 1 ==found + get ==j + pattern { _ j ops -01 { eq not { 0 =found }' rep }" { -- -- }" ? * j 1 add =j }' each + found _ { j set }' { }" ? * + } /generalMatch deff + + { { i }' { =i }' peek generalMatch }' /match deff + + generalHeaderPattern match not { "failure while matching generalHeaderPattern" die }" rep + scopingHeaderPattern match ==isScoping + + # [ :ud2 ] emitOpcodes # enable for further development + + isScoping { + # "scoping function" dump + [ + 8 /r15 :subqImm8Reg + /r15 :popqMem + 8 /r15 :subqImm8Reg + ::currentScope /rbx :movqImmReg + /rbx /rsi :movqMemReg + /rsi /r15 :movqRegMem + 8 /rdi :movqImmReg + ::internalAllocateScope /rax :movqImmReg + /rax :callqReg + /rax /rbx :movqRegMem + ] emitOpcodes + }" { + # "unscoping function" dump + [ + 8 /r15 :subqImm8Reg + /r15 :popqMem + ] emitOpcodes + }" ? * + + 1 ==continueParsing + + { continueParsing }' { i ==s + [ + { footerPattern match }' { + # "footerPattern matched" dump + 0 =continueParsing + } + + { pushConstantPattern match }' { + # "pushConstantPattern matched" dump + [ s 2 add _ 8 add range peek each ] 256 math .unbase ==pushedConstant + # "pushedConstant: " dump pushedConstant dump + + [ + pushedConstant /rax :movqImmReg + /rax :pushqReg + ] emitOpcodes + + pushedConstant emitReference + } + + { callConstantPattern match }' { + # "callConstantPattern matched" dump + [ s 2 add _ 8 add range peek each ] 256 math .unbase ==calledAddress + # "calledAddress: " dump calledAddress dump + calledAddress ==j + { { j }' { =j }' peek generalMatch }' /callTargetMatch deff + [ + { constantActiveGeneralPattern callTargetMatch }' { + # "constantActiveGeneralPattern matched" dump + [ calledAddress 3 add _ 8 add range peek each ] 256 math .unbase ==calledConstant + # "calledConstant: " dump calledConstant dump + + [ + calledConstant /rax :movqImmReg + /rax :pushqReg + "*" | +rawCodeAddress /rax :movqImmReg + /rax :callqReg + ] emitOpcodes + + calledConstant emitReference + } + + { constantNormalFunctionScopedUntypedPattern callTargetMatch }' { + # "constantNormalFunctionScopedUntypedPattern matched" dump + [ calledAddress 2 add _ 8 add range peek each ] 256 math .unbase ==functionScope + [ calledAddress 36 add _ 8 add range peek each ] 256 math .unbase ==finalAddress + # "functionScope: " dump functionScope dump + # "finalAddress: " dump finalAddress dump + + [ + functionScope /rsi :movqImmReg + 8 /r15 :subqImm8Reg + + ::currentScope /rax :movqImmReg + /rsi /rax :xchgqRegMem + /rsi /r15 :movqRegMem + finalAddress /rdi :movqImmReg + /rdi :callqReg + + /r15 /rsi :movqMemReg + ::currentScope /rax :movqImmReg + /rsi /rax :movqRegMem + + 8 /r15 :addqImm8Reg + ] emitOpcodes + + functionScope emitReference + finalAddress 16 sub emitReference + } + + { constantNormalFunctionUnscopedUntypedPattern callTargetMatch }' { + # "constantNormalFunctionUnscopedUntypedPattern matched" dump + [ calledAddress 2 add _ 8 add range peek each ] 256 math .unbase ==finalAddress + # "finalAddress: " dump finalAddress dump + + [ + finalAddress /rax :movqImmReg + /rax :callqReg + ] emitOpcodes + + finalAddress 16 sub emitReference + } + + { constantPassivePattern callTargetMatch }' { + # "constantPassivePattern matched" dump + [ calledAddress 3 add _ 8 add range peek each ] 256 math .unbase ==pushedConstant + # "pushedConstant: " dump pushedConstant dump + + [ + pushedConstant /rax :movqImmReg + /rax :pushqReg + ] emitOpcodes + + pushedConstant emitReference + } + + { staticLoadPattern callTargetMatch }' { + # "staticLoadPattern matched" dump + + [ + ::currentScope /rax :movqImmReg + /rax /rax :movqMemReg + ] emitOpcodes + + { staticLoadParentPattern callTargetMatch }' { + # "staticLoadParentPattern matched" dump + + [ + 16 /rax /rax :movqMemDisp8Reg + ] emitOpcodes + } loop + + j ==loadStart + + [ + { staticLoadPassiveFromScopePattern callTargetMatch }' { + # "staticLoadPassiveFromScopePattern" dump + [ loadStart 3 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope + # "loadStart: " dump loadStart dump + # "offsetInScope: " dump offsetInScope dump + + [ + offsetInScope /rax :pushqMemDisp32 + ] emitOpcodes + } + + { staticLoadPassiveFromExtensionPattern callTargetMatch }' { + # "staticLoadPassiveFromExtensionPattern" dump + [ loadStart 13 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope + # "loadStart: " dump loadStart dump + # "offsetInScope: " dump offsetInScope dump + + [ + 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer + /rax /edx :movlMemReg # load scope length + /rdx :negqReg # prepare for substraction + offsetInScope 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack + ] emitOpcodes + } + + { staticLoadActiveFromScopePattern callTargetMatch }' { + # "staticLoadActiveFromScopePattern" dump + [ loadStart 3 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope + # "loadStart: " dump loadStart dump + # "offsetInScope: " dump offsetInScope dump + + [ + offsetInScope /rax :pushqMemDisp32 + "*" | +rawCodeAddress /rax :movqImmReg + /rax :callqReg + ] emitOpcodes + } + + { staticLoadActiveFromExtensionPattern callTargetMatch }' { + # "staticLoadActiveFromExtensionPattern" dump + [ loadStart 13 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope + # "loadStart: " dump loadStart dump + # "offsetInScope: " dump offsetInScope dump + + [ + 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer + /rax /edx :movlMemReg # load scope length + /rdx :negqReg # prepare for substraction + offsetInScope 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack + "*" | +rawCodeAddress /rax :movqImmReg + /rax :callqReg + ] emitOpcodes + } + + { 1 }' { + [ j j 16 add range peek each ] dump + o dump + j dump + "unparsed static load opcodes in sys .opt .hook (optimizing version)" die + } + ] conds + } + + { customFunctionHeaderPattern callTargetMatch }' { + # "customFunctionHeaderPattern matched" dump + + [ + calledAddress /rax :movqImmReg + /rax :callqReg + ] emitOpcodes + + calledAddress 16 sub emitReference + } + + { 1 }' { + [ j j 16 add range peek each ] dump + o dump + j dump + "unparsed call target opcodes in sys .opt .hook (optimizing version)" die + } + ] conds + } + + { 1 }' { + [ i i 16 add range peek each ] dump + o dump + i dump + "unparsed opcodes in sys .opt .hook (optimizing version)" die + } + ] conds + } loop + + isScoping { + [ + /r15 /rcx :movqMemReg + ::currentScope /rax :movqImmReg + /rcx /rax :movqRegMem + 8 /r15 :addqImm8Reg + /r15 :pushqMem + 8 /r15 :addqImm8Reg + :retn + ] emitOpcodes + }' { + [ + /r15 :pushqMem + 8 /r15 :addqImm8Reg + :retn + ] emitOpcodes + }' ? * + + # "optimization finished" dump + + newReferences newOpcodes o ::replace + 1 # return something different from o to signal successful optimization + } /hook sys .opt .deff +> -- + +# vim: syn=elymas diff --git a/elymas/optimized.ey b/elymas/optimized.ey new file mode 100644 index 0000000..c2961d4 --- /dev/null +++ b/elymas/optimized.ey @@ -0,0 +1,8 @@ +#!/usr/bin/env elymas + +[ + "lib/math.ey" + "lib/sys/opt.ey" +] { _ dump include }' each + +{ "/proc/self/fd/0" include }' "optimized" sys .freeze @@ -183,6 +183,7 @@ Small set in between * Length in bytes (including header) bit 63-60: 0 1 1 0 bit 59: reserved for GC + bit 58: optimized or being optimized * Length of opcode block (rounded to 8 byte) * [ <opcode> ]* * [ <object pointer> ]* |
