From fa1df92de70f162de709bdff1f4c125c0d50247a Mon Sep 17 00:00:00 2001 From: Drahflow Date: Thu, 6 Mar 2014 10:55:37 +0100 Subject: Moved notes to doc directory --- doc/notes | 478 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 478 insertions(+) create mode 100644 doc/notes (limited to 'doc') diff --git a/doc/notes b/doc/notes new file mode 100644 index 0000000..455fc81 --- /dev/null +++ b/doc/notes @@ -0,0 +1,478 @@ += Expressions = + +0 1 2... -> just push themselves on the stack +"string" -> pushes "string" on the stack + -> "bareword" + /bareword -> "bareword" / + |bareword -> "bareword" | + \bareword -> "bareword" \ + +bareword -> lookup "bareword" in current scopes + -> passive -> push value on the stack + -> active -> call value on current stack + -> quote -> call value on current stack, even in quoted mode + +/ -> nop +"string" | -> resolve "string" in current scope, push value +"string" \ -> resolve "string" in current scope, call value on current stack + +[ -> push array begin marker on stack +] -> since the topmost array marker, create array from stack values + +( -> push tuple begin marker on stack +) -> since the topmost tuple marker, create tuple from stack values + +{ -> push quote begin marker on stack, increase parser quote count +} -> since the topmost quote marker, everything becomes one closure (deduce type), decrease parser quote count + +add -> adds +|add -> push add function pointer +\add -> adds + +|f |g ; -> { f g } (modulo subtle scoping differences) +{ ...1 } variable ; { ...2 } ; -> { ...1 variable } { ...2 } ; -> { ...1 variable ...2 } +1 2 |add * -> 3 + +|f * -> f + +StructuredData.field -> StructuredData "field" . -> dereference field in structured data, if passive push, if active do +SD "field" .| -> dereference, push value +SD "field" .\ -> dereference, call + +v "name" deff -> define name as active name, assign v +v "name" defv -> define name as passive name, assign v +v =name -> set current value of name to v, leave modes intact + +< -> push new scope +> -> pop scope, structured data object now on stack +< 0 =a 0 =b > -> something which has a and b is on top of stack +< ... parent >' -> push scope with non-local parent (at the end only) + +0 1 2 _ -> 0 1 2 2 +0 1 2 -0 -> 0 1 2 +0 1 2 -000 -> 0 1 2 2 2 +0 1 2 -020 -> 2 0 2 +0 1 2 -1 -> 0 1 +0 1 2 -2 -> 0 + +exch = -01 +pop = -- +dup = _ + +- -> delete stack contents up to largest digit, recreate according to digits + += Characters = + +!: co-routines and threads +": string quote +#: line comment, complex scope types +$: +%: +&: +': type assignment +(: tuple begin +): tuple end +*: apply function ++: +,: +-: stack manipulation +.: field dereference +/: stringify +0-9: numbers +:: +;: function composition +<: scope start +=: assignment +>: scope end +?: ternary operator, error functions +@: +A-Z: bareword characters +[: array begin +\: callify +]: array end +^: +_: stack manipulation +`: +a-z: bareword characters +{: quote begin +|: passify +}: quote end +~: + += Register Meanings = + +r15: call stack and internal storage +rsp: data stack +r14: current scope +r13: current coroutine + += Stuff on the Stack = + +0: zero +1: one +?: data +x: don't care + +1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx???????????????????????????????? +unboxed integer + +00000000000000000110???????????????????????????????????????????? +reference to heap object + +0000000000000000000000000000000000000000000000000000000000000001 +array begin marker + +0000000000000000000000000000000000000000000000000000000000000010 +quote begin marker + +0101010101010101010101010101010101010101010101010101010101010101 +bottom of stack marker + +0000000000000000000000000000000000000000000000000000000000000100 +error data marker (only for external debugging) + +0000000000000000000000000000000000000000000000000000000000000101 +dynamic variable binding marker (call stack only) +next two cells (upwards into the stack) +* reference to dynamic variable object +* reference to current value + += Memory Management = + +== Currently implemented == + +Inspiration: http://wiki.luajit.org/new-garbage-collector + +0x6???????????: Heap memory ("16 TB should be enough for everyone.") +0x5???????????: GC block bitmap +0x4???????????: GC mark bitmap +0x3???????????: miscellaneous allocations (initial assembly in particular) + +== Musings about possibly better schemes == + +0x500?????????: GC block bitmap +0x501?????????: GC block bitmap +0x502?????????: GC mark bitmap +0x503?????????: GC mark bitmap +0x504?????????: GC free bitmap +0x505?????????: GC free bitmap +0x506?????????: GC "all cells used"-tree +0x507?????????: GC "all cells used"-tree +0x508?????????: GC "any cells used"-tree +0x509?????????: GC "any cells used"-tree + +Large set of reachable, old objects +Large set of unreachable, new objects +Small set in between + +=== How to find free space === + +A valid state: + +1 16-cell element blocked +1 1 8-cell element blocked +1 0 1 1 4-cell element blocked +1 1 0 0 1 1 1 1 2-cell element blocked +1011000010101010 usage of cells + +Tree-Layout: + +E +6 D +2 5 9 C +013478AB + +Right child: 1 sub +Left child: 1 height level sub shl sub + +Algorithm of allocation (of byte-count n): + +Find correct level m = height-log_2(n/16) (from top). +Walk zeros in "all cells used"-tree down to level m-1. +Find zero in "any cells used"-tree on level m. +Set "all cells used"-bit on level m (or multiple bits on the lower levels according to object size) + Propagate as necessary. +Set "any cells used"-bits on levels m to 0. + +Overhead: 1/32. + +== Object Memory Layout == + +=== Int === +* Length in bytes (including header, always 16) + bit 63-60: 0 0 0 0 + bit 59: reserved for GC +* value + +=== String === +* Length in bytes (including header) + bit 63-60: 0 0 0 1 + bit 59: reserved for GC +* hash (0 if not yet calculated) +* Exact length +* data (UTF-8) + +=== Float === +* Length in bytes (including header, always 16) + bit 63-60: 0 0 1 0 + bit 59: reserved for GC +* double precision value + +=== Extension Area === +* Length in bytes (including header) + bit 63-60: 0 1 0 0 + bit 59: reserved for GC +* data + [ ]* + +=== Function === +* Length in bytes (including header) + bit 63-60: 0 1 0 1 + bit 59: reserved for GC +* captured scope pointer (0 if non-capturing function) +* type pointer (0 if untyped) +* code pointer + +=== Function Code === +* Length in bytes (including header) + bit 63-60: 0 1 1 0 + bit 59: reserved for GC + bit 58: optimized or being optimized or excluded from optimization +* Length of opcode block (rounded to 8 byte) +* [ ]* +* [ ]* + +=== Array === +* Length in bytes (including header) + bit 63-60: 0 1 1 1 + bit 59: reserved for GC +* [ ]* + +=== Function Type === +* Length in bytes (including header) + bit 63-60: 1 0 0 0 + bit 59: reserved for GC +* Number of input stack elements +* [ ]* +* Number of output stack elements +* [ ]* + ( TODO: Think about representing this directly as array pointers ) + ( TODO: Think about moving the counts to the beginning ) + +=== Scope === +* Length in bytes (including header) + bit 63-60: 1 0 0 1 + bit 59: reserved for GC + bit 58: parent pointer exists (for scopes) (always 1 for now) + bit 57: extension area pointer exists (always 1 for now) +* name table (mapping entry names to offsets) +* parent scope (0 if no parent) +* extension area pointer (0 if no extra members (yet)) +* data + [ ]* + +=== Name Table === +( TODO: create a hash-table based implementation here ) +* Length in bytes (including header) + bit 63-60: 1 0 1 0 + bit 59: reserved for GC +* End of fill in bytes (i.e. if this field == length, table is full) +* data + [ , ]* + ( TODO: Oh, the performance... ) + The 0th string maps to 0 and so on. + +=== Stack === +* Length in bytes (including header) + bit 63-60: 1 0 1 1 + bit 59: reserved for GC +* Current stack pointer value +* +* bottom of stack marker + +=== Coroutine State === +* Length in bytes (including header) + bit 63-60: 1 1 0 0 + bit 59: reserved for GC +* Instruction pointer +* Current scope pointer +* Call stack object pointer (may be zero to indicate empty stack) +* Data stack object pointer (may be zero to indicate empty stack) + += Musings About The Optimizer = + +Main problem: How do we find out if the names mean what we believe they do? + +== Runtime Checking == +* Optimize for the _current_ values of names +* Check whether changes have occured + * maybe some kind of version numbering in the scopes ++ semantically identical to fully dynamic scopes +- slow + +== Dynamic / Static / Constant Scopes, Guessing == +* A scope can be static (i.e. names don't change activation mode and no new names) +* A scope can be constant (i.e. names don't change values and it is static) +* The compiler guesses what is correct + * The user can override +* Conflicting runtime-changes raise errors ++ should be doable as long as no joker redefines =, defv, deff ++ should result in fast code for most methods +- surprising runtime errors + +== Dynamic / Static / Constant Scopes, User Specified == +* A scope can be static (i.e. names don't change activation mode and no new names) +* A scope can be constant (i.e. names don't change values and it is static) +* Default scopes are dynamic, but the user can specify otherwise +- many methods would not get optimized because the user forgot to flag the scope + +== Dynamic / Static / Constant Names (implemented) == +* A name can be static (i.e. it is never overwritten after being resolved in a child scope and remains at the same scope index) +* A name can be constant (i.e. its value never changes after being resolved in a child scope) +* An optimized version of the code can be generated whenever it is called (using current name values) ++ fine grained control, sensible defaults possible +- even more assignment semantics +=== Name Properties === +* [d] deeply constant: objects referenced (indirectly) from this name can be assumed constant (constant implied) +* [c] constant: the object reference of this name can be assumed constant (code constant implied) +* [t] code / type constant: the function type can be assumed constant (including whether this function has no captured scope) +* [s] static: the path from local scope to where the name resolves can be assumed constant + the name index within the scope can be assumed constant + the activation level can be assumed constant +* [] dynamic: nothing can be assumed +* [v] inactive: push value +* [f] active: execute unless quoted +* [q] quote-active: execute on first resolution (execution is instant) ++---------------------------------------------------+ +| | | s | t | st | c | d | ++---+------+-------+-------+--------+-------+-------+ +| v | defv | defvs | defvt | defvst | defvc | defvd | +| f | deff | deffs | defft | deffst | deffc | deffd | +| q | defq | | | | | | ++---+------+-------+-------+--------+-------+-------+ +* defaults: |defvs "==" deffd |deffs "=*" deffd +* it is possible to globally turn checking (after dynamic resolution) on + +== Sticky Resolution == +* Active names are considered constant after being resolved for the first time +* Passive names are considered static after being resolved for the first time +- fails to correctly handle the { =*array ... } use case + += Musings about Types = + +1 [ 2 3 ] add -> [ 3 4 ] +[ [ 1 ] [ 2 ] ] len -> 2 # scanning for applicable base type from top + +A->int B->int add -> B->A->int +A->int A->int add -> A->int + +# argument order to '' is from stack-top to stack-lower +[ 1 2 3 ] [ /foo /bar /quux ] { defv }' [ "" 1 ] [ ] '' * + +[ /foo /bar ] [ 2 3 ] { "Key %s -> %d" format sys .out .writeall } [ "" "" ] [ ] '' * +[ /a /b /c ] _ { cat } * # -> [ /a /b /c /a /b /c ] +[ /a /b /c ] _ { cat } [ 1 1 ] [ "" ] '' * # -> [ /aa /bb /cc ] +[ /a /b /c ] _ [ 0 ] [ "" ] '' cat # -> [ [ /aa /ba /ca ] [ /ab /bb /cb ] [ /ac /bc /cc ] ] +[ /a /b /c ] _ |cat cross + +Types are represented by concrete values of that type. +Scalars (i.e. integers, strings, floats) are all represented by integers. + +Co-iterability is decided by type-match and +* integers: co-iteration only for equal non-zero integers + +Question: Is it better to handle scalars uniformly or handle strings (or floats) as a separate type? +Question: How to deal with functions like eq, which can take all scalars? + +== Variant: Scalar-Type (implemented) == + +eq [ sys .typed .SCALAR _ ] [ Int ] '' +{ 1 eq } { "foo" eq } and # [ sys .typed .SCALAR ] [ Int ] # -- -- 0 +{ 1 eq } '1 { "foo" eq } '2 and # [ sys .typed .SCALAR _ ] [ Int ] # 1 eq -01 "foo" eq and + +== Variant: Type-Variables == + +eq [ sys .typed .var _ ] [ Int ] '' +{ 1 eq } { "foo" eq } and # [ $X $Y ] [ Int ] # 1 eq -01 "foo" eq and + += Musings about function composition operator = + +Maybe ; should act differently when getting a string, or use another single char for category operations + +Uses: Categorical operators over Set +Anyway: This should be handled by a library, use escape mechanism for ; + += Musings about the API = + +sys .file +sys .net +sys .net .tcpip + + "127.0.0.1" 80 connect + "www.google.de" 80 connect + +sys .poll + create -> p + file userdata p .add + file p .remove + timeout p .poll + +sys .linux + +str + ... "format" sprintf + string "format" sscanf + end string prefix + { 0 end string infix } + start string postfix + { start string len infix } + start end string infix + { start end range string * } + +str .encode + string from to encode + +str .utf8 + +bin + ... "format" bprintf + string "format" bscanf + +math + [ 1 2 3 ] 16 unbase -> 1+2*16+3*16*16 + 12345 10 base -> [ 5 4 3 2 1 ] + += Musing about asm regex engine = + +Pseudocode: x86 assembly. +Program counter: Instruction offset relative to start of pseudocode. +Thread: Instruction counter + capture data +Threadlist: +[ + + + + [ # thread data + + + ]* +] + += Musing about co-routines = + +* Includes data stack, call stack, rip + +{ ... } !! # create co-routine with empty data and call stack +{ ...2 } { ...1 } !!' # create co-routine with full copy, starting with ...1, push it to stack, then continue execution at ...2 +{ ... } !!" # create co-routine with shared data stack and empty call stack +CR !!_ # clone co-routine, full copy of all stacks + +... CR n ! # pass control and n stack elements to CR, passes "calling" CR object on top of stack +... CR * # pass control to CR, don't switch data stack, passes "calling" CR object on top of stack +... f n ! # pass control to function f, ignore n +... f * # pass control to function f + +{ { ==badError ==error + badError error doStuffWhichFailsInVariousWays +} { "a bad error occured, but we caught it" dump } !!' +} { "a non-bad error occured, but we caught it" dump } !!' -- cgit v1.2.3