aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDrahflow <drahflow@gmx.de>2013-09-20 14:51:02 +0200
committerDrahflow <drahflow@gmx.de>2013-09-20 14:51:02 +0200
commiteea02bbc59f4f0ece2756cdc846d351238d2b6a7 (patch)
treec849207f9373bbbc47abb40ed8fc7b6708b1e48e
parentcb5d3e47e54a188ecbb5385f80b0bdadf9297b33 (diff)
Client-side optimizer
-rw-r--r--Makefile4
-rw-r--r--compiler/elymasAsm.ey17
-rw-r--r--compiler/elymasAsmLib.ey167
-rw-r--r--compiler/elymasGlobal.ey188
-rw-r--r--compiler/elymasGlobalSys.ey1
-rw-r--r--compiler/elymasGlobalSysOpt.ey144
-rw-r--r--compiler/standardClient.ey42
-rw-r--r--elymas/Makefile7
-rw-r--r--elymas/lib/sys/opt.ey413
-rw-r--r--elymas/optimized.ey8
-rw-r--r--notes1
11 files changed, 766 insertions, 226 deletions
diff --git a/Makefile b/Makefile
index d171b79..3cb2f35 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
all: elymas/loaded
-elymas/loaded: elymas/interpreter $(shell find elymas/lib/ -name '*.ey' )
- cd elymas && $(MAKE) loaded
+elymas/loaded: elymas/interpreter $(shell find elymas/ -name '*.ey' )
+ cd elymas && $(MAKE)
elymas/interpreter: elymas/interpreter.ey compiler/*.ey interpreter/Metal.so interpreter/ACME
cd compiler && \
diff --git a/compiler/elymasAsm.ey b/compiler/elymasAsm.ey
index e1f7d5a..ca3bdd7 100644
--- a/compiler/elymasAsm.ey
+++ b/compiler/elymasAsm.ey
@@ -62,7 +62,7 @@
-01 0 gt %08 mul add
} /rex deff
- { =mem =reg
+ { ==mem ==reg
mem [ /spl /sp /esp /rsp /bpl /bp /ebp /rbp ] streq any {
"modrm00 not possible on rsp / rbp and their partial registers" die
} rep
@@ -72,7 +72,7 @@
reg regno 8 mul %38 band add
} /modrm00 deff
- { =mem =reg
+ { ==mem ==reg
mem [ /spl /sp /esp /rsp ] streq any not assert
%40
@@ -80,7 +80,7 @@
reg regno 8 mul %38 band add
} /modrm01 deff
- { =mem =reg
+ { ==mem ==reg
mem [ /spl /sp /esp /rsp ] streq any not assert
%80
@@ -88,7 +88,7 @@
reg regno 8 mul %38 band add
} /modrm10 deff
- { =mem =reg
+ { ==mem ==reg
%C0
mem regno %07 band add
reg regno 8 mul %38 band add
@@ -1091,6 +1091,15 @@
%0B
} /ud2 deff
+ { ==regb ==rega
+ rega bit64assert
+ regb bit64assert
+
+ 1 rega /none regb rex
+ %87
+ rega regb modrm11
+ } /xchgqRegReg deff
+
{ ==mem ==reg
reg bit64assert
mem bit64assert
diff --git a/compiler/elymasAsmLib.ey b/compiler/elymasAsmLib.ey
index f2f597f..e54c5c0 100644
--- a/compiler/elymasAsmLib.ey
+++ b/compiler/elymasAsmLib.ey
@@ -82,7 +82,7 @@
{ %00 %00 %00 %00 %00 %50 %00 %00 } /BLOCKBASE deff
{ %00 %00 %00 %00 %00 %40 %00 %00 } /MARKBASE deff
# 4096 16 mul 8 mul ==ALLOCCHUNKSIZE # minimum ALLOCCHUNKSIZE
- 4096 16 mul 8 mul 16 mul ==ALLOCCHUNKSIZE # FIXME: there is still some wonkyness with freezing
+ 4096 16 mul 8 mul 256 mul ==ALLOCCHUNKSIZE # FIXME: there is still some wonkyness with freezing
<
# current end of heap memory (grows upwards)
@@ -178,8 +178,8 @@
[[
/rbx :pushqReg
# /rdi :pushqReg # TODO remove these three lines once load-testing the GC seems unnecessary
- # /markAndSweep :callqLbl32
- # /rdi :popqReg
+ # /markAndSweep :callqLbl32 # load testing
+ # /rdi :popqReg # load testing
/rdi :pushqReg
/searchForFreeBlock :callqLbl32
/rax /rax :andqRegReg
@@ -231,32 +231,31 @@
/rsi /rsi :xorqRegReg
# rsi > 0 => currently counting block extent of free block
- @testBlockBitLoop
+ @searchFreeBlockStart
/rbx /r8 :btqRegMem # test block bitmap
- /nonFreeBlock :jcLbl8 # block not free
- /rsi /rsi :andqRegReg
- /notCurrentlyCounting :jzLbl8
+ /nonFreeBlockStartFound :jcLbl8 # block not free
+ /rbx /r9 :btqRegMem # test mark bitmap
+ /freeBlockStartFound :jcLbl8 # block marked, i.e. truly free block start
- @currentlyCounting
- 16 /rsi :addqImm8Reg
- /rsi /rdi :cmpqRegReg
- /freeBlockFound :jbeLbl8
+ @resumeFreeBlockStartSearch
+ /rsi /rsi :xorqRegReg
+ @nonFreeBlockStartFound
/rbx :incqReg
/rbx /rbp :cmpqRegReg
- /testBlockBitLoop :jaLbl8
+ /searchFreeBlockStart :jaLbl8
/noFreeBlockAvailable :jmpLbl32
- @notCurrentlyCounting
- /rbx /r9 :btqRegMem # test mark bitmap
- /currentlyCounting :jcLbl8 # block marked, i.e. truly free block start
-
- @nonFreeBlock
- /rsi /rsi :xorqRegReg
+ @freeBlockStartFound
+ 16 /rsi :addqImm8Reg
+ /rsi /rdi :cmpqRegReg
+ /freeBlockFound :jbeLbl8
/rbx :incqReg
/rbx /rbp :cmpqRegReg
- /testBlockBitLoop :jaLbl8
- /noFreeBlockAvailable :jmpLbl32
+ /noFreeBlockAvailable :jbeLbl32
+ /rbx /r8 :btqRegMem # test block bitmap
+ /resumeFreeBlockStartSearch :jcLbl8 # block not free
+ /freeBlockStartFound :jmpLbl8
@freeBlockFound
# rdi == size of block to allocate
@@ -407,7 +406,7 @@
@loopThroughMainStack
8 /rsi :subqImm8Reg
/rsi /rdi :movqMemReg
- /markObject :callqLbl32
+ /markStackObject :callqLbl32
/rsi /rsp :cmpqRegReg
/loopThroughMainStack :jbLbl8
@@ -415,7 +414,7 @@
@loopThroughCallStack
8 /rsi :subqImm8Reg
/rsi /rdi :movqMemReg
- /markObject :callqLbl32
+ /markStackObject :callqLbl32
/rsi /r15 :cmpqRegReg
/loopThroughCallStack :jbLbl8
@@ -424,7 +423,7 @@
0 /rsi :cmpqImm8Mem
/quoteEncodingBufferUnused :jzLbl8
- :STACKSIZE 8 sub /rcx :movqImmReg
+ :STACKSIZE 8 sub 8 div /rcx :movqImmReg
@loopThroughEncodingBuffer
/rsi /rdi :movqMemReg
/markObject :callqLbl32
@@ -460,6 +459,33 @@
/r8 :popqReg
:retn
+ # recursively mark this object reachable
+ # guaranteed not to clobber rcx, rsi (because it is used in many loops)
+ @markStackObject
+ # rdi == address of a reachable object, of reachable code within a code block or some random bits
+ /rdi /r8 :cmpqRegReg
+ /markObjectDone :jaLbl32 # pointing below the heap
+ /rdi /r11 :cmpqRegReg
+ /markObjectDone :jbeLbl32 # pointing above the heap
+ # rdi == address of a reachable object or of reachable code within a code block
+ # scan for object downwards
+ /r8 /rdi :subqRegReg
+ # rdi == byte offset relative to heap begin
+ 4 /rdi :shrqImm8Reg
+ # rdi == cell index of first 16-byte cell of possible object
+
+ @searchStackObject
+ /rdi /r9 :btqRegMem # test block bit
+ /rdi :decqReg
+ /searchStackObject :jncLbl8
+ /rdi :incqReg
+ # rdi == cell index of first 16-byte cell of object
+
+ # TODO optimize this by jumping right into markObject
+ 4 /rdi :shlqImm8Reg
+ /r8 /rdi :addqRegReg
+ /markObject :jmpLbl8
+
@markObjectDone
:retn
@@ -1025,6 +1051,60 @@
] /internalAllocateString defv
> { defv }' allocateOffsetStruct
+ <
+ # allocate a new code block from the encoding buffers
+ # rdi -> end of allocated code (address after last used opcode in quoteEncodingBufferCode)
+ # rbp -> end of allocated object pointers (address after last used address in quoteEncodingBufferObjects)
+ # rax <- resulting code object
+ [[
+ :quoteEncodingBufferCode /rax :movqImmReg
+ /rax /rdi :subqRegReg
+
+ /rdi :decqReg
+ 3 /rdi :shrqImm8Reg
+ /rdi :incqReg
+ 3 /rdi :shlqImm8Reg
+
+ :quoteEncodingBufferObjects /rax :movqImmReg
+ /rax /rbp :subqRegReg
+
+ /rbp /rbp :andqRegReg
+ /atLeastOnePointer :jnzLbl8
+
+ /rbp /rax :movqRegMem # store fake zero pointer
+ 8 /rbp :addqImm8Reg
+
+ @atLeastOnePointer
+
+ /rbp :pushqReg # store pointer byte count
+ /rdi :pushqReg # store opcode byte count (rounded up to 8 byte)
+
+ /rbp /rdi :addqRegReg
+ internalAllocateCode /rax :movqImmReg
+ /rax :callqReg
+
+ # rax == code block on heap
+
+ # copy opcodes
+ :quoteEncodingBufferCode /rsi :movqImmReg
+ 16 /rax /rdi :leaqMemDisp8Reg
+ /rcx :popqReg
+ /rcx 8 /rax :movqRegMemDisp8
+ :reprcx :movsb
+
+ # copy object pointers
+ :quoteEncodingBufferObjects /rsi :movqImmReg
+ /rcx :popqReg
+ :reprcx :movsb
+
+ # mark buffer unused
+ :quoteEncodingBufferObjects /rdi :movqImmReg
+ /rcx /rdi :movqRegMem # rcx is conveniently zero
+
+ :retn
+ ]] /internalAllocateCodeFromEncodingBuffer defv
+ > { defv }' allocateOffsetStruct
+
[
8 /r15 :subqImm8Reg
/r15 :popqMem
@@ -1059,47 +1139,6 @@
:retn
] /unscopingFunctionFooter defv
- # rdi -> end of allocated code (address after last used opcode in quoteEncodingBufferCode)
- # rbp -> end of allocated object pointers (address after last used address in quoteEncodingBufferObjects)
- # rax <- resulting code object
- {
- :quoteEncodingBufferCode /rax :movqImmReg
- /rax /rdi :subqRegReg
-
- :quoteEncodingBufferObjects /rax :movqImmReg
- /rax /rbp :subqRegReg
-
- /rdi :decqReg
- 3 /rdi :shrqImm8Reg
- /rdi :incqReg
- 3 /rdi :shlqImm8Reg
-
- /rbp :pushqReg # store pointer byte count
- /rdi :pushqReg # store opcode byte count (rounded up to 8 byte)
-
- /rbp /rdi :addqRegReg
- internalAllocateCode /rax :movqImmReg
- /rax :callqReg
-
- # rax == code block on heap
-
- # copy opcodes
- :quoteEncodingBufferCode /rsi :movqImmReg
- 16 /rax /rdi :leaqMemDisp8Reg
- /rcx :popqReg
- /rcx 8 /rax :movqRegMemDisp8
- :reprcx :movsb
-
- # copy object pointers
- :quoteEncodingBufferObjects /rsi :movqImmReg
- /rcx :popqReg
- :reprcx :movsb
-
- # mark buffer unused
- :quoteEncodingBufferObjects /rdi :movqImmReg
- /rcx /rdi :movqRegMem # rcx is conveniently zero
- } /allocateCodeFromEncodingBuffer deff
-
{ strToUTF8Bytes _ =*v len _ ==exactLength
1 sub 8 div 4 add 8 mul ==memoryLength
diff --git a/compiler/elymasGlobal.ey b/compiler/elymasGlobal.ey
index 2918a19..eb5fe8b 100644
--- a/compiler/elymasGlobal.ey
+++ b/compiler/elymasGlobal.ey
@@ -150,6 +150,7 @@
/rdx :popqReg
/rax /rax :testqRegReg
/nameOffsetKnown :jnzLbl8
+ /nameSearch :jmpLbl8
# if not exists, insert
@nameUndefined
@@ -1448,138 +1449,48 @@
/rdi :jmpqReg # continue with freshly patched code
]] /internalExecuteIdentifierUnquotedAndPatchLateResolve defv
- # optimize function code by
- # * patching double redirects, # FIXME correctly detect out-of-heap even if below heap
+ # call function optimize hook on code block
# 0 -> address of code block
[[
- /rbx :popqReg
- /rsi :popqReg
-# FIXME FIXME FIXME this should be reenabled soonish
-# 8 /rsi :addqImm8Reg # move to start of code
-# /rsi :pushqReg
-# :quoteEncodingBuffer /rdi :movqImmReg
-#
-# %34 /rcx :movqImmReg # skip to generated code
-# :reprcx :movsb
-#
-# @parseLoop
-# 0 [ 0 :callqRel32 ] * 0 /rsi :cmpbImmMemDisp8
-# /parseFooter :jzLbl32
-# 1 [ 8 /rax :addqImm8Reg ] * 11 /rsi :cmpbImmMemDisp8
-# /parseFunctionCall :jzLbl32
-# 0 [ /rax :pushqReg ] * 10 /rsi :cmpbImmMemDisp8
-# /parseConstantPush :jzLbl32
-# 0 [ /rax :callqReg ] * 10 /rsi :cmpbImmMemDisp8
-# /parseConstantCall :jzLbl32
-#
-# "unknown assembly instruction during internalOptimizeGeneratedScopedFunction" ::outputError
-# :ud2
-#
-# @parseFunctionCall
-# 2 /rsi /rdx :movqMemDisp8Reg # load function address
-# 1 [ 0 /rax :movqImmReg ] * 9 /rdx :cmpbImmMemDisp8
-# /parseFunctionCallSkip :jnzLbl32
-# 0 [ /rax :jmpqReg ] * 18 /rdx :cmpbImmMemDisp8
-# /parseFunctionCallSkip :jnzLbl32
-# 1 [ /rax :jmpqReg ] * 19 /rdx :cmpbImmMemDisp8
-# /parseFunctionCallSkip :jnzLbl32
-#
-# 10 /rdx /rcx :movqMemDisp8Reg # load final function address
-# /rax :movqImmOOBReg ::HEAPEND
-# /rax /rcx :cmpqRegReg
-# /parseFunctionUnGCable :jaeLbl8
-#
-# # @parseFunctionGCable
-# [
-# /rax :movqImmOOBReg
-# ] ::loadToRdi
-# 8 /rcx :subqImm8Reg # make address point to code block object
-# /rcx /rdi :movqRegMem # load final function address
-# 8 /rdi :addqImm8Reg
-# [
-# 8 /rax :addqImm8Reg # skip code block object header
-# /rax :callqReg
-# ] ::loadToRdi
-# /parseFunctionCallDone :jmpLbl8
-#
-# @parseFunctionUnGCable
-# [
-# /rax :movqImmOOBReg
-# ] ::loadToRdi
-# /rcx /rdi :movqRegMem # load final function address
-# 8 /rdi :addqImm8Reg
-# [
-# /rax :callqReg
-# ] ::loadToRdi
-#
-# @parseFunctionCallDone
-# 16 /rsi :addqImm8Reg
-# /parseLoop :jmpLbl32
-#
-# @parseFunctionCallSkip
-# :movsq # verbatim copy
-# :movsq
-# /parseLoop :jmpLbl32
-#
-# @parseConstantPush
-# :movsq # verbatim copy
-# :movsw
-# :movsb
-# /parseLoop :jmpLbl32
-#
-# @parseConstantCall
-# :movsq # verbatim copy
-# :movsl
-# /parseLoop :jmpLbl32
-#
-# @parseFooter
-# ::scopingFunctionFooter ::loadToRdi
-# ::allocateCodeFromEncodingBuffer
-# # rax == optimized code object on heap
-#
-# # patch indirection to new code into old
-# /rsi :popqReg # rsi == old code start
-# /rsi /rdi :movqRegReg
-# [
-# /rax :movqImmOOBReg
-# ] ::loadToRdi
-# 8 /rax :addqImm8Reg
-# /rax /rdi :movqRegMem
-# 8 /rax :subqImm8Reg
-# 8 /rdi :addqImm8Reg
-# [
-# /rax :jmpqReg
-# /rax :movqImmOOBReg
-# ] ::loadToRdi
-# /rax /rdi :movqRegMem
-#
-# # TODO kill remaining opcodes to remove GC-followable memory addresses
-# # possible solution: updatable exact-length field for code blocks
-# # but think of the return stack
- /rbx :jmpqReg
- ]] /internalOptimizeGeneratedScopedFunction defv
-
- # optimize function code by
- # * TODO patching double redirects
- # 0 -> address of code block
- [[
- # :ud2 # FIXME enable here to debug function optimization
- /rbx :popqReg
- /rax :popqReg # FIXME do something useful here
- /rbx :pushqReg
- :retn
- ]] /internalOptimizeGeneratedUnscopedFunction defv
+ 8 /r15 :subqImm8Reg
+ /r15 :popqMem
- # optimize function code by
- # * TODO patching double redirects
- # 0 -> address of code block
- [[
- # :ud2 # FIXME enable here to debug function optimization
- /rbx :popqReg
- /rax :popqReg # FIXME do something useful here
- /rbx :pushqReg
+ /rax :popqReg
+ 58 /rax :btsqImm8Mem
+ /alreadyOptimized :jcLbl8
+ /rax :pushqReg
+ /rax :pushqReg
+
+ /rax :movqImmOOBReg "sys" ::string
+ /rax :pushqReg
+ /rax :movqImmOOBReg "internalCallOptimizeHook" "ey|" ::linkAbs64
+ /rax :callqReg
+
+ /rax :movqImmOOBReg "opt" ::string
+ /rax :pushqReg
+ /rax :movqImmOOBReg "internalCallOptimizeHook" "ey." ::linkAbs64
+ /rax :callqReg
+
+ /rax :movqImmOOBReg "hook" ::string
+ /rax :pushqReg
+ /rax :movqImmOOBReg "internalCallOptimizeHook" "ey." ::linkAbs64
+ /rax :callqReg
+
+ /rcx :popqReg # result object (not necessarily new code object though)
+ /rax :popqReg # old object
+
+ /rax /rcx :cmpqRegReg
+ /optimizationHappened :jnzLbl8
+
+ 58 /rax :btrqImm8Mem # reset optimized bit
+
+ @optimizationHappened
+ @alreadyOptimized
+
+ /r15 :pushqMem
+ 8 /r15 :addqImm8Reg
:retn
- ]] /internalOptimizeGeneratedUncapturingFunction defv
+ ]] /internalCallOptimizeHook defv
> { defv }' ::allocateOffsetStruct
<
@@ -1683,7 +1594,8 @@
/rax :callqReg
] ::unscopingFunctionFooter cat ::loadToRdi
- ::allocateCodeFromEncodingBuffer
+ ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg
+ /rax :callqReg
# rax == code block on heap
# create non-capturing function object
@@ -1777,7 +1689,8 @@
/rax :callqReg
] ::unscopingFunctionFooter cat ::loadToRdi
- ::allocateCodeFromEncodingBuffer
+ ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg
+ /rax :callqReg
# rax == code block on heap
# create non-capturing function object
@@ -2348,7 +2261,7 @@
] ::loadToRdi
:quoteEncodingBufferCode /rbx :movqImmReg
/rdi /rax :movqRegReg
- 5 /rax :addqImm8Reg
+ 13 /rax :addqImm8Reg
/rbx /rax :subqRegReg
/rax /rdi :movqRegMem
8 /rdi :addqImm8Reg
@@ -2361,7 +2274,8 @@
] ::loadToRdi
footer ::loadToRdi
- ::allocateCodeFromEncodingBuffer
+ ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg
+ /rax :callqReg
# rax == code block on heap
@@ -2421,7 +2335,8 @@
/rax :pushqReg
] ::unscopingFunctionFooter cat ::loadToRdi
- ::allocateCodeFromEncodingBuffer
+ ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg
+ /rax :callqReg
# rax == code object on heap
@@ -2436,9 +2351,10 @@
/done :jmpLbl32
]]
- } _ 1 ::scopingFunctionHeader ::scopingFunctionFooter internalOptimizeGeneratedScopedFunction -32104* /ey} defv
- _ 1 ::unscopingFunctionHeader ::unscopingFunctionFooter internalOptimizeGeneratedUnscopedFunction -32104* /ey}' defv
- _ 0 ::unscopingFunctionHeader ::unscopingFunctionFooter internalOptimizeGeneratedUncapturingFunction -32104* /ey}" defv
+ } _ 1 ::scopingFunctionHeader ::scopingFunctionFooter internalCallOptimizeHook -32104* /ey} defv
+ _ 1 ::unscopingFunctionHeader ::unscopingFunctionFooter internalCallOptimizeHook -32104* /ey}' defv
+ _ 0 ::unscopingFunctionHeader ::unscopingFunctionFooter internalCallOptimizeHook -32104* /ey}" defv
+ _ 1 ::scopingFunctionHeader ::scopingFunctionFooter "ey--" | -32104* /ey}~ defv # TODO just to test the optimizer, remove this and optimize the optimizer while optimizing once everything works
--
> _ ==globalMacros { defv }' ::allocateOffsetStruct
diff --git a/compiler/elymasGlobalSys.ey b/compiler/elymasGlobalSys.ey
index 2f2b763..3a1c988 100644
--- a/compiler/elymasGlobalSys.ey
+++ b/compiler/elymasGlobalSys.ey
@@ -37,6 +37,7 @@
] :execute
"elymasGlobalSysAsm.ey" include
+ "elymasGlobalSysOpt.ey" include
"elymasGlobalSysTyped.ey" include
leaveSubScope
diff --git a/compiler/elymasGlobalSysOpt.ey b/compiler/elymasGlobalSysOpt.ey
new file mode 100644
index 0000000..01fb1c0
--- /dev/null
+++ b/compiler/elymasGlobalSysOpt.ey
@@ -0,0 +1,144 @@
+<
+ "opt" enterSubScope
+
+ <
+ # stub
+ # 0 -> code object
+ # 0 <- same code object (if different object, optimization would have taken place)
+ [[
+ :retn
+ ]] /eyhook defv
+
+ # replace code block by better code
+ # 0 -> code block to replate (and patch)
+ # 1 -> new opcodes as integer array
+ # 2 -> new references as array
+ [[
+ 32 /r15 :subqImm8Reg
+ 24 /r15 :popqMemDisp8 # store return address
+ 16 /r15 :popqMemDisp8 # code block to patch
+ 8 /r15 :popqMemDisp8 # new opcode source
+ /r15 :popqMem # new references source
+
+ # copy new opcodes
+ :quoteEncodingBufferCode /rdi :movqImmReg
+
+ 8 /r15 /rsi :movqMemDisp8Reg
+ /rsi /ecx :movlMemReg
+ 8 /rsi :addqImm8Reg
+ 3 /rcx :shrqImm8Reg
+ /rcx :decqReg
+ /noOpcodesToCopy :jzLbl8
+
+ @opcodeCopyLoop
+ /rsi /rax :movqMemReg
+ 8 /rax /rax :movqMemDisp8Reg
+ :stosb
+ 8 /rsi :addqImm8Reg
+ /opcodeCopyLoop :loopLbl8
+
+ @noOpcodesToCopy
+ /rdi /rbp :movqRegReg
+
+ # copy new references
+ :quoteEncodingBufferObjects /rdi :movqImmReg
+
+ /r15 /rsi :movqMemReg
+ /rsi /ecx :movlMemReg
+ 8 /rsi :addqImm8Reg
+ 3 /rcx :shrqImm8Reg
+ /rcx :decqReg
+ /noReferencesToCopy :jzLbl8
+
+ @referencesCopyLoop
+ /rsi /rax :movqMemReg
+ 8 /rax /rax :movqMemDisp8Reg
+ :stosq
+ 8 /rsi :addqImm8Reg
+ /referencesCopyLoop :loopLbl8
+
+ @noReferencesToCopy
+ /rbp /rdi :xchgqRegReg
+
+ ::internalAllocateCodeFromEncodingBuffer /rax :movqImmReg
+ /rax :callqReg
+
+ # patch old code
+ 16 /r15 /rdi :movqMemDisp8Reg
+ 8 /rdi /rbp :movqMemDisp8Reg
+ 16 1 /rbp /rdi /rbp :leaqMemIndexScaleDisp8Reg
+
+ # CHECK
+ /rdi /ecx :movlMemReg
+ /rdi /rcx :addqRegReg
+ /rcx /rbp :cmpqRegReg
+ /patchSpaceAvailable :jbLbl8
+
+ "attepmting to patch reference, but no space is available" ::outputError
+ :ud2
+
+ @patchSpaceAvailable
+ # END CHECK
+
+ 16 /rdi :addqImm8Reg
+
+ [
+ /rax :movqImmOOBReg
+ ] ::loadToRdi
+ 16 /rax :addqImm8Reg
+ /rax /rdi :movqRegMem
+ 16 /rax :subqImm8Reg
+ /rax 0 /rbp :movqRegMemDisp8
+ 8 /rdi :addqImm8Reg
+ 8 /rbp :addqImm8Reg
+ [
+ /rax :jmpqReg
+ ] ::loadToRdi
+
+ 24 /r15 :pushqMemDisp8
+ 32 /r15 :addqImm8Reg
+ :retn
+ ]] /eyreplace defv
+
+ # returns currentScope into userspace
+ # 0 <- ::currentScope as integer
+ [[
+ /rbx :popqReg
+
+ ::internalAllocateInteger /rax :movqImmReg
+ /rax :callqReg
+ /rax :pushqReg
+
+ ::currentScope /rdx :movqImmReg
+ /rdx 8 /rax :movqRegMemDisp8
+
+ /rbx :pushqReg
+ :retn
+ ]] /eycurrentScope defv
+
+ # returns internalAllocateScope into userspace
+ # 0 <- ::internalAllocateScope as integer
+ [[
+ /rbx :popqReg
+
+ ::internalAllocateInteger /rax :movqImmReg
+ /rax :callqReg
+ /rax :pushqReg
+
+ ::internalAllocateScope /rdx :movqImmReg
+ /rdx 8 /rax :movqRegMemDisp8
+
+ /rbx :pushqReg
+ :retn
+ ]] /eyinternalAllocateScope defv
+ > _ ==globalFunctions { defv }' ::allocateOffsetStruct
+
+ [
+ globalFunctions keys eydeff { | }' createScopeEntries
+ createScopeExtensionEntries
+ ] :execute
+
+ leaveSubScope
+> --
+
+# vim: syn=elymas
diff --git a/compiler/standardClient.ey b/compiler/standardClient.ey
index dffbbb3..87f2ab2 100644
--- a/compiler/standardClient.ey
+++ b/compiler/standardClient.ey
@@ -646,16 +646,17 @@
} each
] ==?toBeAbstractedTypes
- "toBeAbstractedTypes: " dump
- toBeAbstractedTypes dump
+ # "toBeAbstractedTypes: " dump
+ # toBeAbstractedTypes dump
[ toBeAbstractedTypes { len } each ] any not {
# no types need to be abstracted, function can be called
- concreteArgs _ dump _ len dearray f
- "attempting to call function (w.o. abstraction)" dump
+ concreteArgs # _ dump
+ _ len dearray f
+ # "attempting to call function (w.o. abstraction)" dump
0 concreteArgs len range { ==?i
- i concreteArgs * sys .typed .type _ dump
- i inputs * sys .typed .type _ dump
+ i concreteArgs * sys .typed .type # _ dump
+ i inputs * sys .typed .type # _ dump
neq { "invalid input type at argument index " dump i dump "" die } rep
} each
*
@@ -695,14 +696,14 @@
} loop
} each
- "concreteArgs: " dump
- concreteArgs dump
- "stageCalls: " dump
- stageCalls dump
- "argTypes: " dump
- argTypes dump
- "loops: " dump
- loops dump
+ # "concreteArgs: " dump
+ # concreteArgs dump
+ # "stageCalls: " dump
+ # stageCalls dump
+ # "argTypes: " dump
+ # argTypes dump
+ # "loops: " dump
+ # loops dump
{ ==?loops ==?argTypes ==?stageCalls ==?concreteArgs
stageCalls len not {
@@ -725,7 +726,7 @@
[ concreteArgsCopy stageCalls argTypes loops unravel ]
results -01 cat =results
- results dump
+ # results dump
# TODO: think about a single function returning multiple values
# should be solved by producing two arrays side by side
@@ -751,7 +752,7 @@
concreteArgs stageCalls argTypes loops unravel
- "execution complete" dump
+ # "execution complete" dump
} ? *
} /execute sys .typed .deff
> --
@@ -779,7 +780,7 @@
invalid # name table
invalid # extension area
{ "<function: " o sys .asm .rawAddress base16encode64 cat ">" cat sys .err .writeall }
- invalid # function code
+ { "<function code: " o sys .asm .rawAddress base16encode64 cat ">" cat sys .err .writeall }
{
"[\n" sys .err .writeall
o { indent 1 add dumpIndented } each
@@ -801,7 +802,7 @@
{ 0 dumpIndented }
> -- /dump deffd
-{ ==filename # ==?f (left on the stack and executed from sys .asm .programStart)
+{ ==filename # ==f (left on the stack and executed from sys .asm .programStart)
sys .asm .patchProgramStart ==frozenAllocationCount
# hex decoding
@@ -998,6 +999,11 @@
} each
out .close
+
+ sys .asm .patchProgramStart frozenAllocationCount neq {
+ "freezing allocated new memory chunks, retrying..." dump
+ filename sys .freeze
+ }
} /freeze sys .deff
# no long-term stack use here as the executed program uses it as well
diff --git a/elymas/Makefile b/elymas/Makefile
index a5c13fb..08c7e69 100644
--- a/elymas/Makefile
+++ b/elymas/Makefile
@@ -1,2 +1,5 @@
-loaded: interpreter loaded.ey $(shell find lib -name '*.ey')
- ./interpreter < loaded.ey
+loaded: optimized loaded.ey $(shell find lib -name '*.ey')
+ ./optimized < loaded.ey || true
+
+optimized: interpreter optimized.ey $(shell find lib -name '*.ey')
+ ./interpreter < optimized.ey || true
diff --git a/elymas/lib/sys/opt.ey b/elymas/lib/sys/opt.ey
new file mode 100644
index 0000000..907b3fd
--- /dev/null
+++ b/elymas/lib/sys/opt.ey
@@ -0,0 +1,413 @@
+<
+ "../compiler/elymasAsm.ey" include
+
+ assembler ":" via
+
+ [
+ 8 /r15 :subqImm8Reg
+ /r15 :popqMem
+ ] ==:generalHeaderPattern
+
+ [
+ 8 /r15 :subqImm8Reg
+ 0 /rbx :movqImmReg
+ /rbx /rsi :movqMemReg
+ /rsi /r15 :movqRegMem
+ 8 /rdi :movqImmReg
+ 0 /rax :movqImmReg
+ /rax :callqReg
+ /rax /rbx :movqRegMem
+ ] ==:scopingHeaderPattern
+
+ [
+ 0 /rax :movqImmReg
+ /rax :pushqReg
+ ] ==:pushConstantPattern
+
+ [
+ 0 /rax :movqImmReg
+ /rax :callqReg
+ ] ==:callConstantPattern
+
+ [
+ 0 :callqRel32
+ ] ==:footerPattern
+
+ [
+ /rbx :popqReg
+ 0 /rax :movqImmReg
+ /rax :pushqReg
+ /rbx :pushqReg
+ 0 /rax :movqImmReg
+ /rax :jmpqReg
+ ] ==:constantActiveGeneralPattern
+
+ [
+ 0 /rsi :movqImmReg
+ 16 /r15 :subqImm8Reg
+ 8 /r15 :popqMemDisp8
+
+ 0 /rax :movqImmReg
+ /rsi /rax :xchgqRegMem
+ /rsi /r15 :movqRegMem
+ 0 /rdi :movqImmReg
+ /rdi :callqReg
+
+ /r15 /rsi :movqMemReg
+ 0 /rax :movqImmReg
+ /rsi /rax :movqRegMem
+
+ 8 /r15 :pushqMemDisp8
+ 16 /r15 :addqImm8Reg
+ :retn
+ ] ==:constantNormalFunctionScopedUntypedPattern
+
+ [
+ 0 /rax :movqImmReg
+ /rax :jmpqReg
+ ] ==:constantNormalFunctionUnscopedUntypedPattern
+
+ [
+ /rbx :popqReg
+ 0 /rax :movqImmReg
+ /rax :pushqReg
+ /rbx :jmpqReg
+ ] ==:constantPassivePattern
+
+ [
+ 0 /rax :movqImmReg
+ /rax /rax :movqMemReg
+ ] ==:staticLoadPattern
+
+ [
+ 16 /rax /rax :movqMemDisp8Reg
+ ] ==:staticLoadParentPattern
+
+ [
+ /rbx :popqReg
+ 0 /rax :pushqMemDisp32
+ /rbx :jmpqReg
+ ] ==:staticLoadPassiveFromScopePattern
+
+ [
+ 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer
+ /rax /edx :movlMemReg # load scope length
+ /rdx :negqReg # prepare for substraction
+ /rbx :popqReg
+ 0 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack
+ /rbx :jmpqReg
+ ] ==:staticLoadPassiveFromExtensionPattern
+
+ [
+ /rbx :popqReg
+ 0 /rax :pushqMemDisp32
+ /rbx :pushqReg
+ 0 /rax :movqImmReg
+ /rax :jmpqReg
+ ] ==:staticLoadActiveFromScopePattern
+
+ [
+ 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer
+ /rax /edx :movlMemReg # load scope length
+ /rdx :negqReg # prepare for substraction
+ /rbx :popqReg
+ 0 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack
+ /rbx :pushqReg
+ 0 /rax :movqImmReg
+ /rax :jmpqReg
+ ] ==:staticLoadActiveFromExtensionPattern
+
+ [
+ 8 /r15 :subqImm8Reg
+ /r15 :popqMem
+ ] ==:customFunctionHeaderPattern
+
+ { ==o
+ sys .asm "+" via
+ sys .asm .|peek ==:peek
+ sys .opt "::" via
+
+ o +rawAddress ==addr
+ # "Addr: " dump addr dump
+ [ addr _ 4 add range peek each ] 256 math .unbase ==totalLength
+ [ addr 8 add _ 4 add range peek each ] 256 math .unbase ==codeLength
+ # "Total length: " dump totalLength dump
+ # "Code length: " dump codeLength dump
+ addr 16 add ==i
+
+ [ ] ==newOpcodes
+ { newOpcodes -01 cat =newOpcodes }' /emitOpcodes deffst
+ [ ] ==newReferences
+ { newReferences [ -102 ] cat =newReferences }' /emitReference deffst
+
+ { =*ops =*set =*get ==pattern
+ 1 ==found
+ get ==j
+ pattern { _ j ops -01 { eq not { 0 =found }' rep }" { -- -- }" ? * j 1 add =j }' each
+ found _ { j set }' { }" ? *
+ } /generalMatch deff
+
+ { { i }' { =i }' peek generalMatch }' /match deff
+
+ generalHeaderPattern match not { "failure while matching generalHeaderPattern" die }" rep
+ scopingHeaderPattern match ==isScoping
+
+ # [ :ud2 ] emitOpcodes # enable for further development
+
+ isScoping {
+ # "scoping function" dump
+ [
+ 8 /r15 :subqImm8Reg
+ /r15 :popqMem
+ 8 /r15 :subqImm8Reg
+ ::currentScope /rbx :movqImmReg
+ /rbx /rsi :movqMemReg
+ /rsi /r15 :movqRegMem
+ 8 /rdi :movqImmReg
+ ::internalAllocateScope /rax :movqImmReg
+ /rax :callqReg
+ /rax /rbx :movqRegMem
+ ] emitOpcodes
+ }" {
+ # "unscoping function" dump
+ [
+ 8 /r15 :subqImm8Reg
+ /r15 :popqMem
+ ] emitOpcodes
+ }" ? *
+
+ 1 ==continueParsing
+
+ { continueParsing }' { i ==s
+ [
+ { footerPattern match }' {
+ # "footerPattern matched" dump
+ 0 =continueParsing
+ }
+
+ { pushConstantPattern match }' {
+ # "pushConstantPattern matched" dump
+ [ s 2 add _ 8 add range peek each ] 256 math .unbase ==pushedConstant
+ # "pushedConstant: " dump pushedConstant dump
+
+ [
+ pushedConstant /rax :movqImmReg
+ /rax :pushqReg
+ ] emitOpcodes
+
+ pushedConstant emitReference
+ }
+
+ { callConstantPattern match }' {
+ # "callConstantPattern matched" dump
+ [ s 2 add _ 8 add range peek each ] 256 math .unbase ==calledAddress
+ # "calledAddress: " dump calledAddress dump
+ calledAddress ==j
+ { { j }' { =j }' peek generalMatch }' /callTargetMatch deff
+ [
+ { constantActiveGeneralPattern callTargetMatch }' {
+ # "constantActiveGeneralPattern matched" dump
+ [ calledAddress 3 add _ 8 add range peek each ] 256 math .unbase ==calledConstant
+ # "calledConstant: " dump calledConstant dump
+
+ [
+ calledConstant /rax :movqImmReg
+ /rax :pushqReg
+ "*" | +rawCodeAddress /rax :movqImmReg
+ /rax :callqReg
+ ] emitOpcodes
+
+ calledConstant emitReference
+ }
+
+ { constantNormalFunctionScopedUntypedPattern callTargetMatch }' {
+ # "constantNormalFunctionScopedUntypedPattern matched" dump
+ [ calledAddress 2 add _ 8 add range peek each ] 256 math .unbase ==functionScope
+ [ calledAddress 36 add _ 8 add range peek each ] 256 math .unbase ==finalAddress
+ # "functionScope: " dump functionScope dump
+ # "finalAddress: " dump finalAddress dump
+
+ [
+ functionScope /rsi :movqImmReg
+ 8 /r15 :subqImm8Reg
+
+ ::currentScope /rax :movqImmReg
+ /rsi /rax :xchgqRegMem
+ /rsi /r15 :movqRegMem
+ finalAddress /rdi :movqImmReg
+ /rdi :callqReg
+
+ /r15 /rsi :movqMemReg
+ ::currentScope /rax :movqImmReg
+ /rsi /rax :movqRegMem
+
+ 8 /r15 :addqImm8Reg
+ ] emitOpcodes
+
+ functionScope emitReference
+ finalAddress 16 sub emitReference
+ }
+
+ { constantNormalFunctionUnscopedUntypedPattern callTargetMatch }' {
+ # "constantNormalFunctionUnscopedUntypedPattern matched" dump
+ [ calledAddress 2 add _ 8 add range peek each ] 256 math .unbase ==finalAddress
+ # "finalAddress: " dump finalAddress dump
+
+ [
+ finalAddress /rax :movqImmReg
+ /rax :callqReg
+ ] emitOpcodes
+
+ finalAddress 16 sub emitReference
+ }
+
+ { constantPassivePattern callTargetMatch }' {
+ # "constantPassivePattern matched" dump
+ [ calledAddress 3 add _ 8 add range peek each ] 256 math .unbase ==pushedConstant
+ # "pushedConstant: " dump pushedConstant dump
+
+ [
+ pushedConstant /rax :movqImmReg
+ /rax :pushqReg
+ ] emitOpcodes
+
+ pushedConstant emitReference
+ }
+
+ { staticLoadPattern callTargetMatch }' {
+ # "staticLoadPattern matched" dump
+
+ [
+ ::currentScope /rax :movqImmReg
+ /rax /rax :movqMemReg
+ ] emitOpcodes
+
+ { staticLoadParentPattern callTargetMatch }' {
+ # "staticLoadParentPattern matched" dump
+
+ [
+ 16 /rax /rax :movqMemDisp8Reg
+ ] emitOpcodes
+ } loop
+
+ j ==loadStart
+
+ [
+ { staticLoadPassiveFromScopePattern callTargetMatch }' {
+ # "staticLoadPassiveFromScopePattern" dump
+ [ loadStart 3 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope
+ # "loadStart: " dump loadStart dump
+ # "offsetInScope: " dump offsetInScope dump
+
+ [
+ offsetInScope /rax :pushqMemDisp32
+ ] emitOpcodes
+ }
+
+ { staticLoadPassiveFromExtensionPattern callTargetMatch }' {
+ # "staticLoadPassiveFromExtensionPattern" dump
+ [ loadStart 13 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope
+ # "loadStart: " dump loadStart dump
+ # "offsetInScope: " dump offsetInScope dump
+
+ [
+ 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer
+ /rax /edx :movlMemReg # load scope length
+ /rdx :negqReg # prepare for substraction
+ offsetInScope 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack
+ ] emitOpcodes
+ }
+
+ { staticLoadActiveFromScopePattern callTargetMatch }' {
+ # "staticLoadActiveFromScopePattern" dump
+ [ loadStart 3 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope
+ # "loadStart: " dump loadStart dump
+ # "offsetInScope: " dump offsetInScope dump
+
+ [
+ offsetInScope /rax :pushqMemDisp32
+ "*" | +rawCodeAddress /rax :movqImmReg
+ /rax :callqReg
+ ] emitOpcodes
+ }
+
+ { staticLoadActiveFromExtensionPattern callTargetMatch }' {
+ # "staticLoadActiveFromExtensionPattern" dump
+ [ loadStart 13 add _ 4 add range peek each ] 256 math .unbase ==offsetInScope
+ # "loadStart: " dump loadStart dump
+ # "offsetInScope: " dump offsetInScope dump
+
+ [
+ 24 /rax /rcx :movqMemDisp8Reg # load extension area pointer
+ /rax /edx :movlMemReg # load scope length
+ /rdx :negqReg # prepare for substraction
+ offsetInScope 1 /rdx /rcx :pushqMemIndexScaleDisp32 # push loaded entry to stack
+ "*" | +rawCodeAddress /rax :movqImmReg
+ /rax :callqReg
+ ] emitOpcodes
+ }
+
+ { 1 }' {
+ [ j j 16 add range peek each ] dump
+ o dump
+ j dump
+ "unparsed static load opcodes in sys .opt .hook (optimizing version)" die
+ }
+ ] conds
+ }
+
+ { customFunctionHeaderPattern callTargetMatch }' {
+ # "customFunctionHeaderPattern matched" dump
+
+ [
+ calledAddress /rax :movqImmReg
+ /rax :callqReg
+ ] emitOpcodes
+
+ calledAddress 16 sub emitReference
+ }
+
+ { 1 }' {
+ [ j j 16 add range peek each ] dump
+ o dump
+ j dump
+ "unparsed call target opcodes in sys .opt .hook (optimizing version)" die
+ }
+ ] conds
+ }
+
+ { 1 }' {
+ [ i i 16 add range peek each ] dump
+ o dump
+ i dump
+ "unparsed opcodes in sys .opt .hook (optimizing version)" die
+ }
+ ] conds
+ } loop
+
+ isScoping {
+ [
+ /r15 /rcx :movqMemReg
+ ::currentScope /rax :movqImmReg
+ /rcx /rax :movqRegMem
+ 8 /r15 :addqImm8Reg
+ /r15 :pushqMem
+ 8 /r15 :addqImm8Reg
+ :retn
+ ] emitOpcodes
+ }' {
+ [
+ /r15 :pushqMem
+ 8 /r15 :addqImm8Reg
+ :retn
+ ] emitOpcodes
+ }' ? *
+
+ # "optimization finished" dump
+
+ newReferences newOpcodes o ::replace
+ 1 # return something different from o to signal successful optimization
+ } /hook sys .opt .deff
+> --
+
+# vim: syn=elymas
diff --git a/elymas/optimized.ey b/elymas/optimized.ey
new file mode 100644
index 0000000..c2961d4
--- /dev/null
+++ b/elymas/optimized.ey
@@ -0,0 +1,8 @@
+#!/usr/bin/env elymas
+
+[
+ "lib/math.ey"
+ "lib/sys/opt.ey"
+] { _ dump include }' each
+
+{ "/proc/self/fd/0" include }' "optimized" sys .freeze
diff --git a/notes b/notes
index 7db6b73..540f60c 100644
--- a/notes
+++ b/notes
@@ -183,6 +183,7 @@ Small set in between
* Length in bytes (including header)
bit 63-60: 0 1 1 0
bit 59: reserved for GC
+ bit 58: optimized or being optimized
* Length of opcode block (rounded to 8 byte)
* [ <opcode> ]*
* [ <object pointer> ]*