# Comiler simplified once # Full compiler minus error checking and index tracking # Syntax simplified to avoid _ · ‿ ' 𝕏𝕎𝔽𝔾𝕗 and double quote in comment # Already didn't have . [] :;? ⇐ 𝕣𝕤𝕘 # Blocks are functions; no empty list literals; no useless parentheses na←¯1⊑≢alph←("aA"+⌜↕26)∾˘"àÀ"+⌜(↕23)∾24+↕7 lf←@+⟨10,13⟩ ⟨charSet,cgl⟩←(∾ ⋈ ≠¨)⟨ "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function mod1←"˙˜˘¨⌜⁼´˝`" # 1-modifier "∘○⊸⟜⌾⊘◶⎉⚇⍟⎊" # 2-modifier "⋄,"∾lf # Separator ":;?" # Header punctuation "⇐←↩" # Gets "(){}⟨⟩[]" # Bracket "‿" # Ligature "·" # nOthing "𝕊𝕏𝕎𝔽𝔾𝕤𝕩𝕨𝕣𝕗𝕘" # Input ".¯π∞" # Numeric (⊑"0")+↕10 # Digit "_"∾˜⥊alph # Alphabetic "• "∾@+9 # Whitespace ⟨cc,qs,qd,nc⟩←"#'""@" # Preprocessed characters ⟩ ⟨bF,b1,b2,bS,bH,bG,bB,bL,bO,bX,bN,bD,bA,bW,bP⟩←⋈¨˜⟜(0»+`)cgl M←1⊸⊑(0⊸≤∧>)-⟜⊑ # ∊ for an init,length pair 𝕩 as above sep←⊑bS pred←2+⊑bH bI←bX+⋈⟜-5⋄bR←8+⊑bX # Convert characters to numbers, mostly the same as tokens cgf←(cgr←⍋charSet)⊏charSet CharCode←cgr⊏˜1-˜1⌈cgf⍋⊢ swapundo←CharCode∊⟜mod1⊸/"˜⁼" vd←1+vi←⊑bN # Start of identifier numbering (plus dot) charRole←4∾˜∾⥊¨˜⟜(≠↑cgl˙)⟨1,2,3,¯1,¯1,¯3,⟨¯1,0⟩,¯2,0,¬/⟨5,6⟩⟩ # For first vd chars T←⌈`× ⋄ IT←↕∘≠⊸T ⋄ I1T←(1+↕∘≠)⊸T PN←1(∾/∾˜)(∨/⊣) # Partitioned-none: partitions where 𝕨<𝕩 is never 1 # Source to ⟨tokens, roles, number of identifiers, literals⟩ # Identifiers then literal tokens are numbered starting at vi Tokenize←{⟨System,vars⟩←𝕨 # Resolve comments and strings c←𝕩=cc⋄s←/⟨0,0⟩⊸«⊸∧sm←𝕩=qs⋄d←/dm←𝕩=qd g←⍋q←∾⟨ s⋄¯1↓d⋄/c⟩ ⋄q↩g⊏q # Open indices e← g⊏∾⟨2+s⋄ 1↓d⋄-⟜»∘⊏⟜(0∾+`c)⊸//(𝕩∊lf)∾1⟩ # Matching close indices Se←≠(>/⊢)∾⟜≠{(⊏˜𝕨)𝕊⍟(≠○(¯1⊸⊑))𝕩∾𝕩⊏𝕨}⟨0⟩˙ # Find reachable openings St←(≠𝕩)↑(/⁼(Se q⍋e)⊸⊏) # All indices → reached mask a←St q⋄b←St e⋄f←1≠`ab←a∨b # Open/close masks; filter # Extract character and string literals u←f∧𝕩=nc⋄ci←/u∨»a∧sm chr←(⊏⟜𝕩-(nc-@)×⊏⟜u)ci # Characters (indices ci) f>↩qe←dm∧«a∧↩dm # Quote Escape str←𝕩⊔˜1-˜(si←a>»qe)(⊣+`⊸×○(∾⟜1)<)≠`dm∧ab # Strings (indices /si) # Extract words: identifiers and numbers t←CharCode f/𝕩 nd←(t=⊑bN)>«t M bD⋄rr←t=bR # Namespace dot; 𝕣 w←»⊸n←l∧(+`w)⊏0∾¬wi # Identifier/Number masks num←ReadNums○(((0∾us)<∨⟜«0∾n)/0⊸∾) t×l # Numbers ir←(us/˜«⊸us)×+`w>n # Identifier groups and first character fr←(1=wi/wt)rr)∾(ci∾/si)⊏+`»f # Indices in t k←id∾⟨num,chr,str⟩⋄k(⊢>¯1»⌈`)⊸/¨˜↩j←⊐¨k # IDs j into uniques k k↩System⌾(1⊸⊑)k # System value lookup wf←¬l∨t M bW # Index management for... t↩(w∨wf)/(vars≠⊸↓∾j++`vd»kk←≠¨k)⌾(ki⊸⊏)t # Add IDs; remove words/whitespace t-↩t(M×-⟜⊑)bS # Separators are equivalent p←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep # Separator group boundaries (excludes leading and trailing) eb←⟨3,5,7⟩+⊑bB # End brackets that allow separators sk←sb/˜p>∨⟜«(M⟜bH∨eb∊˜p⊸+)(sb-p)⊏t # Keep the first of each group that's not just inside a bracket t/˜↩1¨⌾(sk⊸⊏)t≠sep # Remove the rest im←(t=bR)∨t M vd⋈+´2↑kk # Identifier (or 𝕣) mask r←ir⌾(im⊸/)(vd⌊t)⊏charRole∾0 # Role t+↩(⊑bX)((⊢M⋈⟜5)×5+3⊸+⊸≤)t # Case-insensitive special names t-↩vi(<+10×=)t # Shift . to bX and variables back one ⟨t,r,k⟩ } # 𝕩 is a list of tokens that contains the numeric literals, each # preceded by 0. Return the numbers. ReadNums←{ ⟨e,d,n,p,i⟩←=⟜𝕩¨((⊑bA)+-´"ea")∾+⟜↕´bN # Masks for e.¯π∞ c←e∨z←0=𝕩⋄m←¬n∨c f←(17≥¬(⊢-T)+`)⊸∧g←(«≤(d<𝕩≠⊑bD)>○I1T¬)⊸∧m # No leading 0s; max 17 digits l←(¯1∾⟨π,1⟩∾↕10)⊏˜(¬d)/f×𝕩-1+⊑bN # Digit lookup, with ∞ as 1 to avoid ∞×0 v←(>⟜«0≤l)/0(0⊸≤××⟜10⊸+)`l # Numeric values—mantissas and exponents v×↩⟨1,¯1⟩⊏˜(r←>⟜»m)/»n # Negate if ¯ vm←c/z # Mask of mantissas in l dp←vm/f(--»⊸-(<×⊢)⊏⟜(I1T«d)⊸-)○(/>⟜«)g # Decimal position t←10⋆|ee←dp-˜vm/«v׬vm # Power of 10 t÷˜⌾((0>ee)⊸/)t×⌾((0«dc<0≤r+p # Role ¯4 for exports: ⊑bG is ⇐ sr←»⌾(((⍋⊏⟜dl)⊸⊏g)⊸⊏)sl←«⊸∨r=¯2⋄ns←¬sl∨sr # Strand right and left; not stranded cp←𝕩=1+⊑bB # Closed paren nr←(IT¬cp)⊏(𝕩=2+⊑bI)+2×𝕩=⊑bO # Nothingness role: 1 for 𝕨, 2 for · nx←0 g⊏˜↩⍋g⊏sdl←sl∨dl # Avoid reordering strands and dots in rev rp←≠⊸»⌾(g⊸⊏)↕≠r # Position of previous, for roles # Permutation to reverse each expression: *more* complicated than it looks rev←⍋+`¯1↓(¯1∾g)(⊣⍋⊸⊏⊏˜⟜⍋¬⊏˜)⍋+`⊸+1∾g⊏sdl∨r=¯1 gf←⍋fd←+`br←rev⊏p×𝕩M⟨2+⊑bB,2⟩ # Order by brace depth fd to de-nest blocks rev⊏˜↩gf⋄fd⊏˜↩gf⋄br⊏˜↩gf 𝕩⊏˜↩rev⋄dc⊏˜↩rev # Compute parsing ordering gr≡g⊏rev BE←=∨+⟜2⊸= # Bracket equals: match ⟨[ or ⟩] given ⟨ or ⟩ only g↩⍋+`p↩br-˜rev⊏p⋄bp←0(<⋈○(/⟜g)>)g⊏p # Order by non-brace bracket depth g⊏˜↩⍋g⊏«⊸∨dc⋄gr←g⊏rev # Now by dots sll←1+2÷˜0(<-○/>)gr⊏sr-sl⋄l←/g⊏𝕩BE˜5+⊑bB # Strand length; list starts b←br>0⋄c←/br<0⋄bp∾¨↩⟨/b,c⟩ # Block Begin (mask) and Close (index), in matching order g⊏˜↩gs←⍋gr⊏sl⋄gr↩g⊏rev⋄gi←⍋g # Send strand prefixes *‿ to the end # Headers hh←𝕩=⊑bH⋄cs←𝕩=1+⊑bH # Case header : and separator ; fi←+`cb←b∨cs⋄H←cb¬∘PN⊢ # Body index fi; which bodies Have a property cq←(H𝕩=pred)∨ch←H hh # ch: body has : header ; cq: or ? predicate cf←1∾¬co←cb/cs⋄cm←0∾∨⟜«co # cf: body is first; cm: body is one of multiple cc←(⍋⍋«co)⊏c∾/cs # Case close hi←/hf←hh⊏˜⟜IT⌾((⌽g)⊸⊏)cb∨hh # Header component indices un←0=us←swapundo(≠∘⊣-⊐)hi⊏𝕩 ut←un/»us⋄hi/˜↩0=us # Undo type: 0 normal, 1 ⁼, 2 ˜⁼ hr←(⊏⟜ns×⊏⟜r)rev⊏˜hi # Header component roles hl←2=hn←(1⊸»+«)hc←¯1=hr # hl: is label, hc: is : ho←(»∨(«(hr=3)∧⊢))hl<2≤hr # Header operands hm←¬ho∨ha←ho<(0=hr)∧1=hn # Mask for main name; header arguments hk←3|1-˜(+`bI∾nv)⍋hi⊏𝕩׬rev⊏sr # Kind: 0 special, 1 name, 2 compound hma←hm>hla←hl∧(0=hr)∧1≠hk⋄hr+↩hla⋄hl>↩hla # Lone non-name subject is 𝕩 with 𝕊 omitted hv←(hla+ha×1+«hc)+(ho×4+«3=hr)+hma×3×1-˜2⌊hr # Special name for position hk×↩¬hc∨hl∧0=hr # Treat subject labels like special names hm>↩hc⋄hr/˜↩hm⋄hx←(1»hc)/ha # Header-derived role hr and immediacy ¬hx ut-↩-⟜»ut×ho # Shift ⁼ from right operand to main name ut/˜↩hm⋄hx∨↩1=hr cwh←hc/»hl⌈ha×1+he←0≠hk # Body 𝕨 for just headers ut2←2=ut cw←(cwh⌈2×ut2)⌾(ch⊸/)1+-⟜«(»cq)<1(⊢<«)cf # Body 𝕨: 0 no, 1 allowed, 2 required hl/˜↩hm⋄hu←(¬he)⌾(hi⊸⊏)hf # hu: mask of header special names hj←gi⊏˜he/hi⋄hd←2=he/hk # hj: header assignments; hd: which ones destructure # Block properties ss←⟨0,3,5,6⟩⍋(⊢+(0sl # Indices of list literals lm←(0¨sll)∾˜(5+⊑bB)-˜l0⊏𝕩 # List merge, adding 2 for [] # Parsing part 1 a←(¯5⊸<∧≤⟜¯3)r⋄ps←a↩alm←ai⊏aa⋄al←alm/ai # aliases al ai/˜↩af⋄at/˜↩af∾1¨hj # Lexical resolution (independent of parsing part 2 below) di←/dm←»dc # Dots aren't scoped id←/(hu∨dm∨gi⊏«aa∧a)<(0⊸≤∧<⟜nv)xv # Identifier indices in xv sa←0)ia∾sa # Opcode idoc←⟨32¨hj,0¨hj,he/hv 64¨di,di⊏xv, ido,idd∾0¨sp,idi∾spi⟩ # Identifier bytecode: instruction, depth, slot # Parsing part 2 ta←tr∧2(>∨|)ps(⊢-T)+`¬ro # Train argument (first-level) fa←/(hg∨ta∨ro∨«⊸∨ps(2=ne)∨ls∨»r=¯5⋄rt←/fo # Drop (block separator) and return qp←/𝕩=pred # Predicate fl←(dy×⊏⟜os)⊸+fa+dy # Function application site dr∾↩((1+dy)×fn←2=fm←fa⊏ne)/fl # Turn function applications on · to drops fn↩¬fn⋄fa/˜↩fn⋄fl/˜↩fn # And remove them # Object code generation: numbers oc ordered by source location (after rev) oi ao←48+(0⌈(1+⊑bG)-˜ai⊏𝕩+ma+mm)∾-hd # Assignment opcode or←⍋idor∾g⊏˜∾⟨cn,cn,bk,bk,hq,api,2/l,at,dr,qp,al+1,al+1,oa+1⌈oa⊏os,fl,rt⟩ oc←or⊏∾idoc∾⟨0¨cn,ob,1¨bk,1+↕≠bk,43¨hq,44¨api,⥊⍉(11+lm+l⊏aa)≍ll,ao,6¨dr,42¨qp,66¨al,vi-˜(al-1)⊏𝕩 24+oa⊏r,16+(fn/dy+2×fm⌈1=ny)+4×0blk, <˘⍉>bdy⟩ }