func‿mod1‿mod2 ← •args lf←@+10 charSet‿cgl←(∾ ≍○< ≠¨)⟨ func # Function mod1 # 1-modifier mod2 # 2-modifier "⋄,"∾lf # Separator ":;?" # Header punctuation "⇐←↩" # Gets "(){}⟨⟩" # Bracket "‿" # Ligature "·" # nOthing # Use last character in case of UTF-16 (like dzaima/BQN) ¯1⊏˘11‿∘⥊"𝕊𝕏𝕎𝔽𝔾𝕤𝕩𝕨𝕣𝕗𝕘"# Input ".¯π∞" # Numeric '0'+↕10 # Digit "_"∾˜⥊"aA"+⌜↕na←26 # Alphabetic "•"∾(¯1↓"𝕨")∾" "∾@+9 # Whitespace (or special name prefix in UTF-16) "#'""@" # Preprocessed characters ⟩ bF‿b1‿b2‿bS‿bH‿bG‿bB‿bL‿bO‿bX‿bN‿bD‿bA‿bW‿bP←≍¨˜⟜(0»+`)cgl M←1⊸⊑(0⊸≤∧>)-⟜⊑ # ∊ for an init,length pair 𝕩 as above sep←⊑bS bI←bX+≍⟜-5⋄bR←8+⊑bX Pl←∾⟜("s"/˜1<≠) # Pluralize _tmpl←{∾𝕗{𝕎𝕩}¨<𝕩} # Template # Convert characters to numbers, mostly the same as tokens CharCode←charSet{ ErrUnknownChars←!⟨"Unknown character"⊸Pl,": ",⊢⟩_tmpl Chk ← ⊢⊣ErrUnknownChars∘(≠/⊣)⍟≢⟜(⊏⟜𝕗) g←⍋𝕗 ⋄ ⊢ Chk g⊏˜1-˜1⌈(g⊏𝕗)⍋⊢ } swap_undo←CharCode∊⟜mod1⊸/"˜⁼" vd←1+vi←⊑bN # Start of identifier numbering (plus dot) charRole←4∾˜∾⥊¨˜⟜(≠↑cgl˙)⟨1,2,3,¯1,¯1,¯3,¯1‿0,¯2,0,¬/5‿6⟩ # For first vd chars T←⌈`× ⋄ IT←↕∘≠⊸T ⋄ I1T←(1+↕∘≠)⊸T PN←1(∾/∾˜)(∨/⊣) # Partitioned-none: partitions where 𝕨<𝕩 is never 1 # Source to ⟨tokens, roles, number of identifiers, literals⟩ # Identifiers then literal tokens are numbered starting at vi Tokenize←{System‿vars←𝕨 # Resolve comments and strings c←𝕩='#'⋄s←/0‿0⊸«⊸∧sm←𝕩='''⋄d←/dm←𝕩='"' g←⍋q←∾⟨ s⋄¯1↓d⋄/c⟩ ⋄q↩g⊏q # Open indices e← g⊏∾⟨2+s⋄ 1↓d⋄-⟜»∘⊏⟜(0∾+`c)⊸//(𝕩=lf)∾1⟩ # Matching close indices Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟(0=¯1⊑⊢) # Mark reachable openings St←(≠𝕩)↑·/⁼((≠↑∾⟜≠Se 1∾0¨)q⍋e)⊸/ # All indices → reached mask a←St q⋄b←St e⋄f←1≠`ab←a∨b # Open/close masks; filter {!⟨⊑/𝕩,"Unclosed quote"⟩}⍟(∨´)(sm∨dm)∧b↩qe←dm∧«a∧↩dm # Quote Escape "" str←𝕩⊔˜1-˜(si←a>»qe)(⊣+`⊸×○(∾⟜1)<)≠`dm∧ab # Strings (indices /si) # Extract words: identifiers and numbers ie←/f⋄is←ie≠⊸↑/1»f # Token start and end is-↩is(-×⊏⟜c)ie # Comment → ending newline only t←CharCode ie⊏𝕩 nd←(t=⊑bN)>«t M bD⋄rr←t=bR # Namespace dot; 𝕣 w←»⊸us wk←(¬w/rr)×na⌊∘÷˜(⊑bA)-˜w/t # Kind of word from first char t-↩na×l∧t≥na+⊑bA # Case-insensitive {!⟨𝕩/is,"System dot with no name"⟩}⍟(∨´)sy>«l w≠↩»⊸∨sy # Start system word at dot wi←0n←l∧(+`w)⊏0∾¬wi # Identifier/Number masks num←is ReadNums○(((0∾us)<∨⟜«0∾n)/0⊸∾) t×l # Numbers ir←(us/˜«⊸us)×+`w>n # Identifier groups and first character fr←(1=wi/wt)us)∧(rr⊸≠∨if⊸<)ig⊏0∾fr {!⟨is⊏˜𝕩/𝕨,"Numbers can't start with underscores"⟩}⍟(∨´⊢)⟜(ws<(⊑bA)>⊏⟜t)/rrrr)∾(ci∾/si)⊏+`»f # Indices in t k←id∾num‿chr‿str⋄k(⊢>¯1»⌈`)⊸/¨˜↩j←⊐¨k # IDs j into uniques k k↩System⌾(1⊸⊑)k # System value lookup wf←¬l∨t M bW⋄is/˜↩wf∨w0⋄ie/˜↩wf∨>⟜«l # Index management for... t↩(w∨wf)/(vars≠⊸↓∾j++`vd»kk←≠¨k)⌾(ki⊸⊏)t # Add IDs; remove words/whitespace t-↩t(M×-⟜⊑)bS # Separators are equivalent p←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep # Separator group boundaries (excludes leading and trailing) sk←sb/˜p>∨⟜«(M⟜bH∨(3‿5+⊑bB)∊˜p⊸+)(sb-p)⊏t # Keep the first of each group that's not just inside a bracket t{is/˜↩𝕨⋄ie/˜↩𝕨⋄𝕨/𝕩}˜↩1¨⌾(sk⊸⊏)t≠sep # Remove the rest im←(t=bR)∨t M vd≍+´2↑kk # Identifier (or 𝕣) mask r←ir⌾(im⊸/)(vd⌊t)⊏charRole∾0 # Role t+↩(⊑bX)((⊢M≍⟜5)×5+3⊸+⊸≤)t # Case-insensitive special names t-↩vi(<+10×=)t # Shift . to bX and variables back one ⟨t,r,k,is,ie⟩ } # 𝕩 is a list of tokens that contains the numeric literals, each # preceded by 0. Return the numbers. ReadNums←{ _err_←{(!/⟜𝔾≍○<𝔽)⍟(∨´)} EChars←⟨"Letter"⊸Pl," """,⊏⟜charSet,""" not allowed in numbers"⟩_tmpl e‿d‿n‿p‿i←=⟜𝕩¨((⊑bA)+-´"ea")∾+⟜↕´bN # Masks for e.¯π∞ EChars∘(/⟜𝕩)_err_𝕨 (𝕩=bR)∨¬e∨𝕩<⊑bA s←d∨c←e∨z←0=𝕩⋄m←¬n∨c "Negative sign in the middle of a number"_err_𝕨 n>»c "Portion of a number is empty"_err_𝕨 (1«s)∧n∨s "Ill-formed decimal or exponent use"_err_(s/𝕎) ¬(0⊸=∨»⊸<)s/𝕩 "π and ∞ must occur alone"_err_𝕨 (p∨i)>1(»∧(p∧«e)∨«)z∨n>»e f←(17≥¬(⊢-T)+`)⊸∧g←(«≤(d<𝕩≠⊑bD)>○I1T¬)⊸∧m # No leading 0s; max 17 digits l←(¯1∾π‿1∾↕10)⊏˜(¬d)/f×𝕩-1+⊑bN # Digit lookup, with ∞ as 1 to avoid ∞×0 v←(>⟜«0≤l)/0(0⊸≤××⟜10⊸+)`l # Numeric values—mantissas and exponents v×↩1‿¯1⊏˜(r←>⟜»m)/»n # Negate if ¯ vm←c/z # Mask of mantissas in l dp←vm/f(--»⊸-(<×⊢)⊏⟜(I1T«d)⊸-)○(/>⟜«)g # Decimal position t←10⋆|ee←dp-˜vm/«v׬vm # Power of 10 t÷˜⌾((0>ee)⊸/)t×⌾((0○≠◶⟨"opening"_mis_ 0, "closing"_mis_ 1˜⟩ ! 𝕨 (Lcs<⌊○≠)◶⟨Msg,"Malformed bracket nesting"⟩ 𝕩 } _err_←{(!(∧∘⍉(i≍e˙)⊏⎉1˜/⟜𝔾)≍○<𝔽)⍟(∨´⍟=)} # Bracket and ligature validation and handling # Open brackets have role ¯1 and closed ones have role 0 "Empty program" ! 0<≠𝕩 g←⍋pd←+`p←(¯1-2×r)×𝕩 M bB⋄gb←g⊏r=¯1 # Paren (actually any bracket type) depth and grade (g⊏p)(>⟜0⊸/ErrMismatchedBrackets⍟≢1-˜<⟜0⊸/)gx←g⊏𝕩 "Swapped open and closed brackets" ! 0≤(⊑g)⊑pd "Parentheses can't contain separators"_err_(gb/G) ((⊑bB)⊸=»⊸∧(2+⊑bH)⊸=∨sep⊸=)gbx←gb/gx "Predicates can't be placed inside lists"_err_((sep≠GBX)/gb/G) ((4+⊑bB)⊸=»⊸∧(2+⊑bH)⊸=)sep⊸≠⊸/gbx {"Empty statement or expression"_err_(𝕩/0∾G) (4+⊑bB)≠𝕩/0∾gx}1⊸«⊸∧1∾gb dl←«⊸∨dc←r=4 # Dot left r-↩(𝕩=⊑bG)>ec←«dc<0≤r+p # Role ¯4 for exports: ⊑bG is ⇐ "Invalid assignment or stranding use"_err_(↕∘≠) ((¯4⊸<∧≤⟜¯2)r)>(ec∨𝕩=2+⊑bG)∧»dc<0≤r "Dot must be followed by a name"_err_(↕∘≠) dc>«𝕩 M vi‿nv sr←»⌾(((⍋⊏⟜dl)⊸⊏g)⊸⊏)sl←«⊸∨r=¯2⋄ns←¬sl∨sr # Strand right and left; not stranded cp←𝕩=1+⊑bB # Closed paren nr←(IT¬cp)⊏(𝕩=2+⊑bI)+2×𝕩=⊑bO # Nothingness role: 1 for 𝕨, 2 for · nx←0 ⋄ nei←↕≠nr ⋄ _nerr←{𝕗 _err_ nei 2=nx⌈↩𝕩} "Can't strand Nothing (·)"_nerr nr׬ns g⊏˜↩⍋g⊏sdl←sl∨dl # Avoid reordering strands and dots in rev rp←≠⊸»⌾(g⊸⊏)↕≠r # Position of previous, for roles # Permutation to reverse each expression: *more* complicated than it looks rev←⍋+`¯1↓(¯1∾g)(⊣⍋⊸⊏⊏˜⟜⍋¬⊏˜)⍋+`⊸+1∾g⊏sdl∨r=¯1 gf←⍋fd←+`br←rev⊏p×𝕩M⟨2+⊑bB,2⟩ # Order by brace depth fd to de-nest blocks rev⊏˜↩gf⋄fd⊏˜↩gf⋄br⊏˜↩gf 𝕩⊏˜↩rev⋄dc⊏˜↩rev⋄i⊏˜↩rev⋄e⊏˜↩rev # Compute parsing ordering gr≡g⊏rev g↩⍋+`p↩br-˜rev⊏p⋄g⊏˜↩⍋g⊏«⊸∨dc⋄gr←g⊏rev # Order by non-brace bracket depth, then dots sll←1+2÷˜0(<-○/>)gr⊏sr-sl⋄l←/g⊏𝕩=5+⊑bB # Strand length; list starts b←br>0⋄c←/br<0 # Block Begin (mask) and Close (index), in matching order bp←⟨/b,c⟩∾¨0(<≍○()g⊏p # Bracket pairs g⊏˜↩gs←⍋gr⊏sl⋄gr↩g⊏rev⋄gi←⍋g # Send strand prefixes *‿ to the end # Headers hh←𝕩=⊑bH⋄cs←𝕩=1+⊑bH⋄qm←𝕩=2+⊑bH # Case header : and separator ; and predicate ? "Punctuation : ; ? not allowed outside blocks"_err_(↕∘≠) (∨`b)¯1↓cq cc←(⍋⍋«co)⊏c∾/cs # Case close hi←/hf←hh⊏˜⟜IT⌾((⌽g)⊸⊏)cb∨hh # Header component indices un←0=us←swap_undo(≠∘⊣-⊐)hi⊏𝕩 "Invalid Undo header syntax"_err_(HI) un<(»⊸≥∨(1»un)∧2⊸=)us ut←un/»us⋄hi/˜↩0=us # Undo type: 0 normal, 1 ⁼, 2 ˜⁼ hr←(⊏⟜ns×⊏⟜r)rev⊏˜hi # Header component roles hl←2=hn←(1⊸»+«)hc←¯1=hr # hl: is label, hc: is : "Only one header per body allowed"_err_(hc/HI) (1+hc/hi)⊏hf ho←(»∨·«(hr=3)∧⊢)hlhla←hl∧(0=hr)∧1≠hk⋄hr+↩hla⋄hl>↩hla # Lone non-name subject is 𝕩 with 𝕊 omitted hv←(hla+ha×1+«hc)+(ho×4+«3=hr)+hma×3×1-˜2⌊hr # Special name for position "Incorrect special name"_err_(HI) (0=hk)∧ht≠hv+⊑bI hk×↩¬hc∨hl∧0=hr # Treat subject labels like special names hm>↩hc⋄hr/˜↩hm⋄hx←(1»hc)/ha # Header-derived role hr and immediacy ¬hx "Invalid Undo header syntax"_err_(HI) hm<×ut "Header left argument without right"_err_(HI) ha>hc+`⊸⊏hx∾0 "Header operation must be a plain name"_err_(HI) hma>hk≠2 ut/˜↩hm cwh←(2×2=ut)⌈hc/»hl⌈ha×1+he←0≠hk cw←cwh⌾(ch⊸/)1+-⟜«(»cq)<1(⊢<«)cf # Body 𝕨: 0 no, 1 allowed, 2 required hl/˜↩hm hx∨↩1=hr⋄hu←(¬he)⌾(hi⊸⊏)hf hj←gi⊏˜he/hi⋄hd←2=he/hk # Block properties ss←0‿3‿5‿6⍋(⊢+(0ch)fx fsc←(ft⊏0‿2‿3)+3×fx # Special name count hv-↩(»+`hc)⊏3׬ch/fx # Header variable slot # Propagate roles through parentheses # ir is the role of the expression ending at each position (truncated to the right) r↩sl-˜ns×(1↓cf/fr)⌾((c⊏rev)⊸⊏)r # Add block roles; make strand elements ¯1 pt←cp∧ns # Pass-through parentheses: not in strands pp←pt∧»es←rp⊏1∾˜r<0 # Parens enclosing one object (maybe with assignment) don't change roles ir←((rp⊏0∾˜(1+es)×3=⊢)⌈⊢-es<2≤⊢)r+pp×(IT¬pp)⊏r # Propagate modifier roles ir⌈↩(IT¬pt∧ir=0)((⊏-⊢)⟜(+`¬pp)(⊢⌊1⌈+)⊏)ir # ...and function roles r+↩pt×»ir # Roles at pt were 0; set them now nr×↩¬nx∨↩(0≠ir)∧1=nr # Assume 𝕎 can't be Nothing ir↩(ir×0=nr)-nr # Include nothingness r-↩(r=¯4)∧1»r=¯1 # Lone ⇐ to role ¯5 "Dot must be preceded by a subject"_err_(⍋∘rev) (r=4)>»r=0 r(×⟜¬-⊢)↩dl # Namespace and dot to ¯1 # Prep for lexical resolution before reordering 𝕩 xv←𝕩-vi {i↩(𝕨⊏i)⌾(𝕩⊸⊏)i⋄e↩(𝕩⊏e)⌾(𝕨⊸⊏)e}´bp # Highlight all contents of a pair when error reporting 𝕩⊏˜↩g⋄hg←g⊏hf⋄r⊏˜↩gr⋄ir⊏˜↩gr⋄l↩(l⊏⍋gs)∾/gr⊏sr>sl # Parsing part 1 a←(¯5⊸<∧≤⟜¯3)r⋄ps←a«𝕩=⊑bL)∨𝕩(=⟜(3+⊑)aa af>↩alm←ai⊏aa⋄al←alm/ai # aliases al ai/˜↩af⋄at/˜↩af∾1¨hj # Lexical resolution (independent of parsing part 2 below) di←/dm←»dc # Dots aren't scoped id←/(hu∨dm∨gi⊏«aa∧a)<(0⊸≤∧<⟜nv)xv # Identifier indices in xv sa←0)ia∾sa # Opcode idoc←⟨32¨hj,0¨hj,he/hv 64¨di,di⊏xv, ido,idd∾0¨sp,idi∾spi⟩ # Identifier bytecode: instruction, depth, slot # Parsing part 2 ta←tr∧2(>∨|)ps(⊢-T)+`¬ro # Train argument (first-level) fa←/(fe←hg∨ta∨ro∨«⊸∨psfe∨ff dy←2≠ny←fa⊏2«no⌈2׬(tr∧r≥0)∨ro(2=ne)∨ls∨»r=¯5⋄rt←/fo # Drop (block separator) and return qp←/𝕩=2+⊑bH # Predicate fl←(dy×⊏⟜os)⊸+fa+dy # Function application site dr∾↩((1+dy)×fn←2=fm←fa⊏ne)/fl # Turn function applications on · to drops fn↩¬fn⋄fa/˜↩fn⋄fl/˜↩fn # And remove them # Object code generation: numbers oc ordered by source location (after rev) oi ao←48+(0⌈(1+⊑bG)-˜ai⊏𝕩+ma+mm)∾-hd # Assignment opcode or←⍋oi←idor∾g⊏˜∾⟨cn,cn,bk,bk,hq,2/l,at,dr,qp,al+1,al+1,oa+1⌈oa⊏os,fl,rt⟩ oc←or⊏∾idoc∾⟨0¨cn,ob,1¨bk,1+↕≠bk,43¨hq,⥊⍉(11+l⊏aa)≍ll,ao,6¨dr,42¨qp,66¨al,vi-˜(al-1)⊏𝕩 24+oa⊏r,16+(fn/dy+2×fm⌈1=ny)+4×0blk, <˘⍉>bdy, oi, tx⟩ }