# Compiler simplified twice # Suitable for compiling once-simplified compiler # Single-scope; no modified or list assignment; no 2-modifiers { charSet←∾chars←⟨ "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function "˙˜˘¨⌜⁼´˝`" # 1-modifier "∘○⊸⟜⌾⊘◶⎉⚇⍟⎊" # 2-modifier "⋄,"∾lf←@+10 # Separator "←↩" # Gets "(){}⟨⟩" # Bracket "𝕊𝕩𝕨" # Input "¯π∞" # Numeric (⊑"0")+↕10 # Digit ⥊"aA"+⌜↕na←26 # Alphabetic " " # Whitespace sc←"#""@" # Preprocessed characters ⟩ cm←((0»+`)⋈¨⊢)cgl←≠¨chars bS←3⊑cm⋄bG←4⊑cm⋄bB←5⊑cm⋄bI←6⊑cm⋄bN←7⊑cm⋄bA←9⊑cm⋄bW←10⊑cm sep←⊑bS vi←⊑bN # Start of identifier numbering charRole←∾cgl⥊¨⟨1,2,3,¯1,¯3,⟨¯1,0⟩,⟨1,0,0⟩,0,0,26/⟨0,1⟩,4,0⟩ TT←⌈`× ⋄ IT←(↕≠)TT⊢ ⋄ I1T←(1+(↕≠))TT⊢ # Comments and strings s←≠`dd←𝕩=1⊑sc f←s<(I1T s<𝕩=⊑sc)≤I1T𝕩=lf # Filter comments chr←@¨ci←/f∧𝕩=2⊑sc # Characters (indices ci) f↩f>qe←dd∧«sd←s∧dd # Quote Escape si←sd>»qe # String indices str←𝕩⊔˜1-˜(+`si∾1)×(si»l # Word chars l, start w wi←(⊑bA)≤w/t # Type: 0 number, 1 identifier r←t⊏charRole # Role t↩t-na×l∧t≥na+⊑bA # Case-insensitive i←l>n←l∧(+`w)⊏0∾¬wi # Identifier/Number masks ide←(1-˜i×+`w>n)⊔t⊏charSet # Identifiers # Numbers, at most 2 digits nt←((⊢∨«)0∾n)/0∾t×l # Number tokens separated by 0 nn←nt=⊑bN⋄m←¬nn∨0=nt # Mask for ¯; digits nl←(0∾⟨π,∞⟩∾↕10)⊏˜m×nt-⊑bN # Digit lookup ns←⟨1,¯1⟩⊏˜(m>»m)/»nn # Negate if ¯ num←ns×(m>«m)/nl+10×»nl # Numeric values # Deduplicate literals and identifiers; other cleanup # Identifiers then literal tokens are numbered starting at vi ki←((⍒wi)⊏/w)∾(ci∾/si)⊏+`»f # Indices in t k←⟨ide,⟨⟩,num,chr,str⟩ # Constants k↩k/¨˜(⊢>¯1»⌈`)¨j←⊐¨k # IDs j into uniques k wr←w∨¬l∨t=⊑bW⋄r↩wr/r⋄c←≠t t↩wr/(c↑⍋(⊢+c×⊒)ki∾↕c)⊏(∾j++`vi»≠¨k)∾t # Add IDs; remove words/whitespace t↩t-(t<+´bS)×(⊢×0≤⊢)t-⊑bS # Separators are equivalent pb←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep # Separator group boundaries (excludes leading and trailing) eb←⟨3,5⟩+⊑bB # End brackets that allow separators sk←sb/˜pb>(⊢∨«)eb∊˜pb+(sb-pb)⊏t # Keep the first of each group that's not just inside a bracket sr←((≠t)↑/⁼sk)∨t≠sep⋄r↩sr/r⋄t↩sr/t # Remove the rest 𝕩↩t⋄nv←≠⊑k # End of tokenization! # Bracket roles # Open brackets initially have role ¯1 and closed ones have role 0 gb←⍋+`p←(¯1-2×r)×(𝕩≥⊑bB)∧𝕩<+´bB # Paren (actually any bracket type) depth and grade r↩r+𝕩=3+⊑bB # Assume blocks are functions cp←𝕩=1+⊑bB # Closed paren rp←(⍋gb)⊏(≠gb)»gb # Position of previous, for roles ir←((IT cp≤⊢)⊏⊢)(rp⊏0∾˜3=r)∨00⋄bc←/br<0 # Block Begin (mask) and Close (index), in matching order 𝕩↩gr⊏𝕩⋄r↩gr⊏r⋄ir↩gr⊏ir # Parsing part 1 a←¯3=r⋄ps←a⊢)(⍋+`ig⊏d∾¯1¨dv)⊏ig # Last order by declaration depth d↩ig⊏d⋄id↩ig⊏id ia←0<(id∾sp)⊏ac # Which are assignments idd←(⊢-(IT d)⊏⊢)id⊏fd # Identifier frame depth idi←((¯1+`d)⊏(⍋⍋d/ig)⊏⊢)(⊒+⊢⊏fsc˙)df # Slot within frame spi←3+sp⊏xv # Special name index uu←(1«d)∧d((+`⊣)⊏1(∾/∾˜)(∨/⊣))0)ia,idd∾0¨sp,idi∾spi⟩ # Identifier bytecode: instruction, depth, slot # Parsing part 2 ta←tr∧2(>∨|)ps(⊢-TT)+`¬ro # Train argument (first-level) fa←/(ta∨ro∨(«∨⊢)psls⋄rt←/fo # Drop (block separator) and return fl←(⊢+dy×⊢⊏os˙)fa+dy # Function application site # Object code generation: numbers oc ordered by source location (after rev) oi or←⍋idor∾g⊏˜∾⟨cn,cn,bk,bk,2/lb,at,dr,oa+1⌈oa⊏os,fl,rt⟩ oc←or⊏∾idoc∾⟨0¨cn,ob,1¨bk,1+↕≠bk,⥊⍉(11+lb⊏aa)≍ll,48+ao,6¨dr 24+oa⊏r,16+dy+4×fa⊏tr,¯1↓rc←7¨fx⟩ # Output fz←⟨0¨fx,¬fx,↕≠fx⟩ # Per-function data cz←⟨/1∾or≥(≠oc)-≠rt,fsc+≠¨dn,dn,0¨¨dn⟩ # Per-body data ⟨oc∾¯1⊑rc,∾⟨u⊏𝕨⟩∾1↓k,<˘⍉>fz,<˘⍉>cz⟩ # Overall output }