diff options
Diffstat (limited to 'notes')
| -rw-r--r-- | notes | 478 |
1 files changed, 0 insertions, 478 deletions
@@ -1,478 +0,0 @@ -= Expressions = - -0 1 2... -> just push themselves on the stack -"string" -> pushes "string" on the stack -<non-bareword><bareword> -> "bareword" <non-bareword> - /bareword -> "bareword" / - |bareword -> "bareword" | - \bareword -> "bareword" \ - -bareword -> lookup "bareword" in current scopes - -> passive -> push value on the stack - -> active -> call value on current stack - -> quote -> call value on current stack, even in quoted mode - -/ -> nop -"string" | -> resolve "string" in current scope, push value -"string" \ -> resolve "string" in current scope, call value on current stack - -[ -> push array begin marker on stack -] -> since the topmost array marker, create array from stack values - -( -> push tuple begin marker on stack -) -> since the topmost tuple marker, create tuple from stack values - -{ -> push quote begin marker on stack, increase parser quote count -} -> since the topmost quote marker, everything becomes one closure (deduce type), decrease parser quote count - -add -> adds -|add -> push add function pointer -\add -> adds - -|f |g ; -> { f g } (modulo subtle scoping differences) -{ ...1 } variable ; { ...2 } ; -> { ...1 variable } { ...2 } ; -> { ...1 variable ...2 } -1 2 |add * -> 3 - -|f * -> f - -StructuredData.field -> StructuredData "field" . -> dereference field in structured data, if passive push, if active do -SD "field" .| -> dereference, push value -SD "field" .\ -> dereference, call - -v "name" deff -> define name as active name, assign v -v "name" defv -> define name as passive name, assign v -v =name -> set current value of name to v, leave modes intact - -< -> push new scope -> -> pop scope, structured data object now on stack -< 0 =a 0 =b > -> something which has a and b is on top of stack -< ... parent >' -> push scope with non-local parent (at the end only) - -0 1 2 _ -> 0 1 2 2 -0 1 2 -0 -> 0 1 2 -0 1 2 -000 -> 0 1 2 2 2 -0 1 2 -020 -> 2 0 2 -0 1 2 -1 -> 0 1 -0 1 2 -2 -> 0 - -exch = -01 -pop = -- -dup = _ - --<digits> -> delete stack contents up to largest digit, recreate according to digits - -= Characters = - -!: co-routines and threads -": string quote -#: line comment, complex scope types -$: <open> -%: <open> -&: <open> -': type assignment -(: tuple begin -): tuple end -*: apply function -+: <open> -,: <open> --: stack manipulation -.: field dereference -/: stringify -0-9: numbers -:: <open> -;: function composition -<: scope start -=: assignment ->: scope end -?: ternary operator, error functions -@: <open> -A-Z: bareword characters -[: array begin -\: callify -]: array end -^: <open> -_: stack manipulation -`: <open> -a-z: bareword characters -{: quote begin -|: passify -}: quote end -~: <open> - -= Register Meanings = - -r15: call stack and internal storage -rsp: data stack -r14: current scope -r13: current coroutine - -= Stuff on the Stack = - -0: zero -1: one -?: data -x: don't care - -1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx???????????????????????????????? -unboxed integer - -00000000000000000110???????????????????????????????????????????? -reference to heap object - -0000000000000000000000000000000000000000000000000000000000000001 -array begin marker - -0000000000000000000000000000000000000000000000000000000000000010 -quote begin marker - -0101010101010101010101010101010101010101010101010101010101010101 -bottom of stack marker - -0000000000000000000000000000000000000000000000000000000000000100 -error data marker (only for external debugging) - -0000000000000000000000000000000000000000000000000000000000000101 -dynamic variable binding marker (call stack only) -next two cells (upwards into the stack) -* reference to dynamic variable object -* reference to current value - -= Memory Management = - -== Currently implemented == - -Inspiration: http://wiki.luajit.org/new-garbage-collector - -0x6???????????: Heap memory ("16 TB should be enough for everyone.") -0x5???????????: GC block bitmap -0x4???????????: GC mark bitmap -0x3???????????: miscellaneous allocations (initial assembly in particular) - -== Musings about possibly better schemes == - -0x500?????????: GC block bitmap -0x501?????????: GC block bitmap -0x502?????????: GC mark bitmap -0x503?????????: GC mark bitmap -0x504?????????: GC free bitmap -0x505?????????: GC free bitmap -0x506?????????: GC "all cells used"-tree -0x507?????????: GC "all cells used"-tree -0x508?????????: GC "any cells used"-tree -0x509?????????: GC "any cells used"-tree - -Large set of reachable, old objects -Large set of unreachable, new objects -Small set in between - -=== How to find free space === - -A valid state: - -1 16-cell element blocked -1 1 8-cell element blocked -1 0 1 1 4-cell element blocked -1 1 0 0 1 1 1 1 2-cell element blocked -1011000010101010 usage of cells - -Tree-Layout: - -E -6 D -2 5 9 C -013478AB - -Right child: 1 sub -Left child: 1 height level sub shl sub - -Algorithm of allocation (of byte-count n): - -Find correct level m = height-log_2(n/16) (from top). -Walk zeros in "all cells used"-tree down to level m-1. -Find zero in "any cells used"-tree on level m. -Set "all cells used"-bit on level m (or multiple bits on the lower levels according to object size) - Propagate as necessary. -Set "any cells used"-bits on levels m to 0. - -Overhead: 1/32. - -== Object Memory Layout == - -=== Int === -* Length in bytes (including header, always 16) - bit 63-60: 0 0 0 0 - bit 59: reserved for GC -* value - -=== String === -* Length in bytes (including header) - bit 63-60: 0 0 0 1 - bit 59: reserved for GC -* hash (0 if not yet calculated) -* Exact length -* data (UTF-8) - -=== Float === -* Length in bytes (including header, always 16) - bit 63-60: 0 0 1 0 - bit 59: reserved for GC -* double precision value - -=== Extension Area === -* Length in bytes (including header) - bit 63-60: 0 1 0 0 - bit 59: reserved for GC -* data - [ <reference> ]* - -=== Function === -* Length in bytes (including header) - bit 63-60: 0 1 0 1 - bit 59: reserved for GC -* captured scope pointer (0 if non-capturing function) -* type pointer (0 if untyped) -* code pointer - -=== Function Code === -* Length in bytes (including header) - bit 63-60: 0 1 1 0 - bit 59: reserved for GC - bit 58: optimized or being optimized or excluded from optimization -* Length of opcode block (rounded to 8 byte) -* [ <opcode> ]* -* [ <object pointer> ]* - -=== Array === -* Length in bytes (including header) - bit 63-60: 0 1 1 1 - bit 59: reserved for GC -* [ <object pointer> ]* - -=== Function Type === -* Length in bytes (including header) - bit 63-60: 1 0 0 0 - bit 59: reserved for GC -* Number of input stack elements -* [ <input type pointer> ]* -* Number of output stack elements -* [ <output type pointer> ]* - ( TODO: Think about representing this directly as array pointers ) - ( TODO: Think about moving the counts to the beginning ) - -=== Scope === -* Length in bytes (including header) - bit 63-60: 1 0 0 1 - bit 59: reserved for GC - bit 58: parent pointer exists (for scopes) (always 1 for now) - bit 57: extension area pointer exists (always 1 for now) -* name table (mapping entry names to offsets) -* parent scope (0 if no parent) -* extension area pointer (0 if no extra members (yet)) -* data - [ <reference> ]* - -=== Name Table === -( TODO: create a hash-table based implementation here ) -* Length in bytes (including header) - bit 63-60: 1 0 1 0 - bit 59: reserved for GC -* End of fill in bytes (i.e. if this field == length, table is full) -* data - [ <string reference>, <execution mode> ]* - ( TODO: Oh, the performance... ) - The 0th string maps to 0 and so on. - -=== Stack === -* Length in bytes (including header) - bit 63-60: 1 0 1 1 - bit 59: reserved for GC -* Current stack pointer value -* <data ...> -* bottom of stack marker - -=== Coroutine State === -* Length in bytes (including header) - bit 63-60: 1 1 0 0 - bit 59: reserved for GC -* Instruction pointer -* Current scope pointer -* Call stack object pointer (may be zero to indicate empty stack) -* Data stack object pointer (may be zero to indicate empty stack) - -= Musings About The Optimizer = - -Main problem: How do we find out if the names mean what we believe they do? - -== Runtime Checking == -* Optimize for the _current_ values of names -* Check whether changes have occured - * maybe some kind of version numbering in the scopes -+ semantically identical to fully dynamic scopes -- slow - -== Dynamic / Static / Constant Scopes, Guessing == -* A scope can be static (i.e. names don't change activation mode and no new names) -* A scope can be constant (i.e. names don't change values and it is static) -* The compiler guesses what is correct - * The user can override -* Conflicting runtime-changes raise errors -+ should be doable as long as no joker redefines =, defv, deff -+ should result in fast code for most methods -- surprising runtime errors - -== Dynamic / Static / Constant Scopes, User Specified == -* A scope can be static (i.e. names don't change activation mode and no new names) -* A scope can be constant (i.e. names don't change values and it is static) -* Default scopes are dynamic, but the user can specify otherwise -- many methods would not get optimized because the user forgot to flag the scope - -== Dynamic / Static / Constant Names (implemented) == -* A name can be static (i.e. it is never overwritten after being resolved in a child scope and remains at the same scope index) -* A name can be constant (i.e. its value never changes after being resolved in a child scope) -* An optimized version of the code can be generated whenever it is called (using current name values) -+ fine grained control, sensible defaults possible -- even more assignment semantics -=== Name Properties === -* [d] deeply constant: objects referenced (indirectly) from this name can be assumed constant (constant implied) -* [c] constant: the object reference of this name can be assumed constant (code constant implied) -* [t] code / type constant: the function type can be assumed constant (including whether this function has no captured scope) -* [s] static: the path from local scope to where the name resolves can be assumed constant - the name index within the scope can be assumed constant - the activation level can be assumed constant -* [] dynamic: nothing can be assumed -* [v] inactive: push value -* [f] active: execute unless quoted -* [q] quote-active: execute on first resolution (execution is instant) -+---------------------------------------------------+ -| | | s | t | st | c | d | -+---+------+-------+-------+--------+-------+-------+ -| v | defv | defvs | defvt | defvst | defvc | defvd | -| f | deff | deffs | defft | deffst | deffc | deffd | -| q | defq | | | | | | -+---+------+-------+-------+--------+-------+-------+ -* defaults: |defvs "==" deffd |deffs "=*" deffd -* it is possible to globally turn checking (after dynamic resolution) on - -== Sticky Resolution == -* Active names are considered constant after being resolved for the first time -* Passive names are considered static after being resolved for the first time -- fails to correctly handle the { =*array ... } use case - -= Musings about Types = - -1 [ 2 3 ] add -> [ 3 4 ] -[ [ 1 ] [ 2 ] ] len -> 2 # scanning for applicable base type from top - -A->int B->int add -> B->A->int -A->int A->int add -> A->int - -# argument order to '' is from stack-top to stack-lower -[ 1 2 3 ] [ /foo /bar /quux ] { defv }' [ "" 1 ] [ ] '' * - -[ /foo /bar ] [ 2 3 ] { "Key %s -> %d" format sys .out .writeall } [ "" "" ] [ ] '' * -[ /a /b /c ] _ { cat } * # -> [ /a /b /c /a /b /c ] -[ /a /b /c ] _ { cat } [ 1 1 ] [ "" ] '' * # -> [ /aa /bb /cc ] -[ /a /b /c ] _ [ 0 ] [ "" ] '' cat # -> [ [ /aa /ba /ca ] [ /ab /bb /cb ] [ /ac /bc /cc ] ] -[ /a /b /c ] _ |cat cross - -Types are represented by concrete values of that type. -Scalars (i.e. integers, strings, floats) are all represented by integers. - -Co-iterability is decided by type-match and -* integers: co-iteration only for equal non-zero integers - -Question: Is it better to handle scalars uniformly or handle strings (or floats) as a separate type? -Question: How to deal with functions like eq, which can take all scalars? - -== Variant: Scalar-Type (implemented) == - -eq [ sys .typed .SCALAR _ ] [ Int ] '' -{ 1 eq } { "foo" eq } and # [ sys .typed .SCALAR ] [ Int ] # -- -- 0 -{ 1 eq } '1 { "foo" eq } '2 and # [ sys .typed .SCALAR _ ] [ Int ] # 1 eq -01 "foo" eq and - -== Variant: Type-Variables == - -eq [ sys .typed .var _ ] [ Int ] '' -{ 1 eq } { "foo" eq } and # [ $X $Y ] [ Int ] # 1 eq -01 "foo" eq and - -= Musings about function composition operator = - -Maybe ; should act differently when getting a string, or use another single char for category operations - -Uses: Categorical operators over Set -Anyway: This should be handled by a library, use escape mechanism for ; - -= Musings about the API = - -sys .file -sys .net -sys .net .tcpip - - "127.0.0.1" 80 connect - "www.google.de" 80 connect - -sys .poll - create -> p - file userdata p .add - file p .remove - timeout p .poll - -sys .linux - -str - ... "format" sprintf - string "format" sscanf - end string prefix - { 0 end string infix } - start string postfix - { start string len infix } - start end string infix - { start end range string * } - -str .encode - string from to encode - -str .utf8 - -bin - ... "format" bprintf - string "format" bscanf - -math - [ 1 2 3 ] 16 unbase -> 1+2*16+3*16*16 - 12345 10 base -> [ 5 4 3 2 1 ] - -= Musing about asm regex engine = - -Pseudocode: x86 assembly. -Program counter: Instruction offset relative to start of pseudocode. -Thread: Instruction counter + capture data -Threadlist: -[ - <string header> - <size> - <bitfield of contained PCs> - [ # thread data - <pc> - <capture data> - ]* -] - -= Musing about co-routines = - -* Includes data stack, call stack, rip - -{ ... } !! # create co-routine with empty data and call stack -{ ...2 } { ...1 } !!' # create co-routine with full copy, starting with ...1, push it to stack, then continue execution at ...2 -{ ... } !!" # create co-routine with shared data stack and empty call stack -CR !!_ # clone co-routine, full copy of all stacks - -... CR n ! # pass control and n stack elements to CR, passes "calling" CR object on top of stack -... CR * # pass control to CR, don't switch data stack, passes "calling" CR object on top of stack -... f n ! # pass control to function f, ignore n -... f * # pass control to function f - -{ { ==badError ==error - badError error doStuffWhichFailsInVariousWays -} { "a bad error occured, but we caught it" dump } !!' -} { "a non-bad error occured, but we caught it" dump } !!' |
