From cb8afb9d48d4e8b083094f5f676ef833a7e849af Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Wed, 25 Jan 2023 22:20:43 -0500 Subject: Don't require boot3 to be able to compile itself --- src/bootstrap/boot3.bqn | 57 +++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 28 deletions(-) (limited to 'src/bootstrap/boot3.bqn') diff --git a/src/bootstrap/boot3.bqn b/src/bootstrap/boot3.bqn index 5c400530..2ca3f731 100644 --- a/src/bootstrap/boot3.bqn +++ b/src/bootstrap/boot3.bqn @@ -1,6 +1,6 @@ -# Compiler simplified three times -# Suitable for compiling twice-simplified compiler -# Probably not any easier to compile, but it's shorter +# Compiles the twice-simplified compiler +# This file isn't part of the bootstrapping chain as its syntax is +# more complicated than boot2. It shows how boot2 can be compiled. { charSet←∾chars←⟨ "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function @@ -14,58 +14,59 @@ (⊑"0")+↕10 # Digit ⥊"aA"+⌜↕na←26 # Alphabetic " " # Whitespace - sc←@+⟨35,34,64⟩ # Preprocessed characters: hash, double quote, @ + sc←"#""@" # Preprocessed characters ⟩ - cm←((0»+`)⋈¨⊢)cgl←≠¨chars + cm←⋈¨˜⟜(0»+`)cgl←≠¨chars bS←3⊑cm⋄bG←4⊑cm⋄bB←5⊑cm⋄bI←6⊑cm⋄bN←7⊑cm⋄bA←9⊑cm⋄bW←10⊑cm + M←1⊸⊑(0⊸≤∧>)-⟜⊑ # ∊ for an init,length pair 𝕩 as above sep←⊑bS vi←⊑bN # Start of identifier numbering charRole←∾cgl⥊¨⟨1,2,3,¯1,¯3,⟨¯1,0⟩,⟨1,0,0⟩,0,0,26/⟨0,1⟩,4,0⟩ - TT←⌈`× ⋄ IT←(↕≠)TT⊢ ⋄ I1T←(1+(↕≠))TT⊢ + TT←⌈`× ⋄ IT←↕∘≠⊸TT # Comments and strings s←≠`dd←𝕩=1⊑sc⋄ss←s∧dd # Strings; string start - f←s<(I1T s<𝕩=⊑sc)≤I1T𝕩=lf # Filter comments + f←s<(s<𝕩=⊑sc)≤○((1+↕∘≠)⊸TT)𝕩=lf # Filter comments chr←@¨ci←/f∧𝕩=2⊑sc # Characters (indices ci) str←𝕩⊔˜1-˜(+`ss∾1)×(ss»l # Word chars l, start w + w←»⊸n)×+`w>n)⊔t⊏charSet # Identifiers # Numbers, at most 2 digits - nt←((⊢∨«)0∾n)/0∾t×l # Number tokens separated by 0 - nn←nt=⊑bN⋄m←¬nn∨0=nt # Mask for ¯; digits - nl←(0∾⟨π,∞⟩∾↕10)⊏˜m×nt-⊑bN # Digit lookup - ns←⟨1,¯1⟩⊏˜(m>»m)/»nn # Negate if ¯ - num←ns×(m>«m)/nl+10×»nl # Numeric values + nt←(∨⟜«0∾n)/0∾t×l # Number tokens separated by 0 + nn←nt=⊑bN⋄nm←¬nn∨0=nt # Mask for ¯; digits + nl←(0∾⟨π,∞⟩∾↕10)⊏˜nm×nt-⊑bN # Digit lookup + ns←⟨1,¯1⟩⊏˜(>⟜»nm)/»nn # Negate if ¯ + num←ns×(>⟜«nm)/(10×»)⊸+nl # Numeric values # Deduplicate literals and identifiers; other cleanup # Identifiers then literal tokens are numbered starting at vi - ki←((⍒wi)⊏/w)∾(ci∾/ss)⊏+`»f # Indices in t + ki←(wi⍒⊸⊏/w)∾(ci∾/ss)⊏+`»f # Indices in t k←⟨ide,⟨⟩,num,chr,str⟩ # Constants - k↩k/¨˜(⊢>¯1»⌈`)¨j←⊐¨k # IDs j into uniques k - wr←w∨¬l∨t=⊑bW⋄r↩wr/r⋄c←≠t - t↩wr/(c↑⍋(⊢+c×⊒)ki∾↕c)⊏(∾j++`vi»≠¨k)∾t # Add IDs; remove words/whitespace - t↩t-(t<+´bS)×(⊢×0≤⊢)t-⊑bS # Separators are equivalent + k↩k(⊢>¯1»⌈`)⊸/¨˜j←⊐¨k # IDs j into uniques k + wr←w∨¬l∨t M bW⋄r↩wr/r + t↩wr/(∾j++`vi»≠¨k)⌾(ki⊸⊏)t # Add IDs; remove words/whitespace + t↩t-t(M×-⟜⊑)bS # Separators are equivalent pb←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep # Separator group boundaries (excludes leading and trailing) eb←⟨3,5⟩+⊑bB # End brackets that allow separators - sk←sb/˜pb>(⊢∨«)eb∊˜pb+(sb-pb)⊏t # Keep the first of each group that's not just inside a bracket - sr←((≠t)↑/⁼sk)∨t≠sep⋄r↩sr/r⋄t↩sr/t # Remove the rest + sk←sb/˜pb>∨⟜«eb∊˜pb+(sb-pb)⊏t # Keep the first of each group that's not just inside a bracket + sr←(t≠⊸↑/⁼sk)∨t≠sep⋄r↩sr/r⋄t↩sr/t # Remove the rest 𝕩↩t⋄nv←≠⊑k # End of tokenization! # Bracket roles # Open brackets initially have role ¯1 and closed ones have role 0 - gb←⍋+`p←(¯1-2×r)×(𝕩≥⊑bB)∧𝕩<+´bB # Paren (actually any bracket type) depth and grade + gb←⍋+`p←(¯1-2×r)×𝕩 M bB # Paren (actually any bracket type) depth and grade r↩r+𝕩=3+⊑bB # Assume blocks are functions cp←𝕩=1+⊑bB # Closed paren - ir←((IT cp≤⊢)⊏⊢)0∨|)ps(⊢-TT)+`¬op # Train argument (first-level) - fa←/(ta∨op∨(«∨⊢)psls⋄rt←/fo # Drop (block separator) and return - os←⌽(↕≠op)(⊣-TT)⌽¬op # Operator skip: distance rightward to derived function start - fl←(⊢+dy×⊢⊏os˙)fa+dy # Function application site + os←⌽↕∘≠⊸(⊣-TT)⌽¬op # Operator skip: distance rightward to derived function start + fl←(dy×⊏⟜os)⊸+fa+dy # Function application site # Object code generation: numbers oc ordered by source location (after rev) oi fsc←3×fx←↕2 # Body immediacy ¬fx, special name count @@ -109,6 +110,6 @@ 24+oa⊏r,16+dy+4×fa⊏tr,¯1↓rc←7¨fx⟩ # Output fz←⟨0¨fx,¬fx,↕≠fx⟩ # Per-function data - cz←⟨/1∾or≥(≠oc)-≠rt,fsc+≠¨dn,dn,0¨¨dn⟩ # Per-body data + cz←⟨/1∾or≥oc-○≠rt,fsc+≠¨dn,dn,0¨¨dn⟩ # Per-body data ⟨oc∾¯1⊑rc,∾⟨u⊏𝕨⟩∾1↓k,<˘⍉>fz,<˘⍉>cz⟩ # Overall output } -- cgit v1.2.3