From 4569b21bae0c7056fb2199443a571b80c0931641 Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Wed, 25 Jan 2023 18:19:23 -0500 Subject: Add boot3 to take advantage of some boot2 simplifications --- src/bootstrap/boot2.bqn | 2 +- src/bootstrap/boot3.bqn | 116 +++++++++++++++++++++++++++++++++++++++++++++++ src/bootstrap/verify.bqn | 6 ++- 3 files changed, 121 insertions(+), 3 deletions(-) create mode 100644 src/bootstrap/boot3.bqn (limited to 'src') diff --git a/src/bootstrap/boot2.bqn b/src/bootstrap/boot2.bqn index e009eb6e..dfdc76ec 100644 --- a/src/bootstrap/boot2.bqn +++ b/src/bootstrap/boot2.bqn @@ -14,7 +14,7 @@ (⊑"0")+↕10 # Digit ⥊"aA"+⌜↕na←26 # Alphabetic " " # Whitespace - sc←⟨35,34,64⟩+@ # Preprocessed characters: hash, double quote, @ + sc←@+⟨35,34,64⟩ # Preprocessed characters: hash, double quote, @ ⟩ cm←((0»+`)⋈¨⊢)cgl←≠¨chars bS←3⊑cm⋄bG←4⊑cm⋄bB←5⊑cm⋄bN←7⊑cm⋄bA←9⊑cm⋄bW←10⊑cm diff --git a/src/bootstrap/boot3.bqn b/src/bootstrap/boot3.bqn new file mode 100644 index 00000000..ccbd2d28 --- /dev/null +++ b/src/bootstrap/boot3.bqn @@ -0,0 +1,116 @@ +# Compiler simplified three times +# Suitable for compiling twice-simplified compiler +# Probably not any easier to compile, but it's shorter +{ + charSet←∾chars←⟨ + "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function + "˙˜˘¨⌜⁼´˝`" # 1-modifier + "∘○⊸⟜⌾⊘◶⎉⚇⍟⎊" # 2-modifier + "⋄,"∾lf←@+10 # Separator + "←↩" # Gets + "(){}⟨⟩" # Bracket + "𝕊𝕩𝕨" # Input + "¯π∞" # Numeric + (⊑"0")+↕10 # Digit + ⥊"aA"+⌜↕na←26 # Alphabetic + " " # Whitespace + sc←@+⟨35,34,64⟩ # Preprocessed characters: hash, double quote, @ + ⟩ + cm←((0»+`)⋈¨⊢)cgl←≠¨chars + bS←3⊑cm⋄bG←4⊑cm⋄bB←5⊑cm⋄bI←6⊑cm⋄bN←7⊑cm⋄bA←9⊑cm⋄bW←10⊑cm + sep←⊑bS + vi←⊑bN # Start of identifier numbering + charRole←∾cgl⥊¨⟨1,2,3,¯1,¯3,⟨¯1,0⟩,⟨1,0,0⟩,0,0,26/⟨0,1⟩,4,0⟩ + TT←⌈`× ⋄ IT←(↕≠)TT⊢ ⋄ I1T←(1+(↕≠))TT⊢ + + # Comments and strings + s←≠`dd←𝕩=1⊑sc⋄ss←s∧dd # Strings; string start + f←s<(I1T s<𝕩=⊑sc)≤I1T𝕩=lf # Filter comments + chr←@¨ci←/f∧𝕩=2⊑sc # Characters (indices ci) + str←𝕩⊔˜1-˜(+`ss∾1)×(ss»l # Word chars l, start w + wi←(⊑bA)≤w/t # Type: 0 number, 1 identifier + t↩t-na×l∧r=1 # Case-insensitive + n←l∧(+`w)⊏0∾¬wi # Number mask + ide←(1-˜(l>n)×+`w>n)⊔t⊏charSet # Identifiers + + # Numbers, at most 2 digits + nt←((⊢∨«)0∾n)/0∾t×l # Number tokens separated by 0 + nn←nt=⊑bN⋄m←¬nn∨0=nt # Mask for ¯; digits + nl←(0∾⟨π,∞⟩∾↕10)⊏˜m×nt-⊑bN # Digit lookup + ns←⟨1,¯1⟩⊏˜(m>»m)/»nn # Negate if ¯ + num←ns×(m>«m)/nl+10×»nl # Numeric values + + # Deduplicate literals and identifiers; other cleanup + # Identifiers then literal tokens are numbered starting at vi + ki←((⍒wi)⊏/w)∾(ci∾/ss)⊏+`»f # Indices in t + k←⟨ide,⟨⟩,num,chr,str⟩ # Constants + k↩k/¨˜(⊢>¯1»⌈`)¨j←⊐¨k # IDs j into uniques k + wr←w∨¬l∨t=⊑bW⋄r↩wr/r⋄c←≠t + t↩wr/(c↑⍋(⊢+c×⊒)ki∾↕c)⊏(∾j++`vi»≠¨k)∾t # Add IDs; remove words/whitespace + t↩t-(t<+´bS)×(⊢×0≤⊢)t-⊑bS # Separators are equivalent + pb←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep # Separator group boundaries (excludes leading and trailing) + eb←⟨3,5⟩+⊑bB # End brackets that allow separators + sk←sb/˜pb>(⊢∨«)eb∊˜pb+(sb-pb)⊏t # Keep the first of each group that's not just inside a bracket + sr←((≠t)↑/⁼sk)∨t≠sep⋄r↩sr/r⋄t↩sr/t # Remove the rest + 𝕩↩t⋄nv←≠⊑k + # End of tokenization! + + # Bracket roles + # Open brackets initially have role ¯1 and closed ones have role 0 + gb←⍋+`p←(¯1-2×r)×(𝕩≥⊑bB)∧𝕩<+´bB # Paren (actually any bracket type) depth and grade + r↩r+𝕩=3+⊑bB # Assume blocks are functions + cp←𝕩=1+⊑bB # Closed paren + ir←((IT cp≤⊢)⊏⊢)0∨|)ps(⊢-TT)+`¬op # Train argument (first-level) + fa←/(ta∨op∨(«∨⊢)psls⋄rt←/fo # Drop (block separator) and return + os←⌽(↕≠op)(⊣-TT)⌽¬op # Operator skip: distance rightward to derived function start + fl←(⊢+dy×⊢⊏os˙)fa+dy # Function application site + + # Object code generation: numbers oc ordered by source location (after rev) oi + fsc←3×fx←↕2 # Body immediacy ¬fx, special name count + or←⍋idor∾g⊏˜∾⟨cn,cn,bk,bk,2/lb,at,dr,oa+1⌈oa⊏os,fl,rt⟩ + oc←or⊏∾idoc∾⟨0¨cn,ob,1¨bk,1+↕≠bk,⥊⍉(11+lb⊏aa)≍ll,48+ao,6¨dr + 24+oa⊏r,16+dy+4×fa⊏tr,¯1↓rc←7¨fx⟩ + # Output + fz←⟨0¨fx,¬fx,↕≠fx⟩ # Per-function data + cz←⟨/1∾or≥(≠oc)-≠rt,fsc+≠¨dn,dn,0¨¨dn⟩ # Per-body data + ⟨oc∾¯1⊑rc,∾⟨u⊏𝕨⟩∾1↓k,<˘⍉>fz,<˘⍉>cz⟩ # Overall output +} diff --git a/src/bootstrap/verify.bqn b/src/bootstrap/verify.bqn index 66939576..c3b9d737 100644 --- a/src/bootstrap/verify.bqn +++ b/src/bootstrap/verify.bqn @@ -1,8 +1,8 @@ glyphs ← •Import "../glyphs.bqn" gl ← ("⟨"∾"⟩"«∾","⊸∾¨'"'(⊣∾∾˜)¨glyphs) # Has to replace •args in c.bqn -f ← "../c.bqn"‿"boot1.bqn"‿"boot2.bqn" # Files to test -c ← (1‿2/⟨glyphs⊸•Import,•Import⟩) {𝕎𝕩}¨ f # Resulting compilers +f ← ⟨"../c.bqn"⟩∾("boot"∾∾⟜".bqn")¨'1'+↕3 # Files to test +c ← (1‿3/⟨glyphs⊸•Import,•Import⟩) {𝕎𝕩}¨ f # Resulting compilers c ↩ (∾glyphs){𝕗⊸𝕏}¨ c t ← (∾∾⟜(@+10)¨)¨ (¯5⊸↓∾gl˙)⌾⊑⌾⊑ •FLines¨ f # Compiler source @@ -12,3 +12,5 @@ t ← (∾∾⟜(@+10)¨)¨ (¯5⊸↓∾gl˙)⌾⊑⌾⊑ •FLines¨ f # Compi •Out "Boot -1 verified!" ! ≡○(⋈⁼∘∾⍟=¨⌾(2⊑¨2⊸⊑) 4⊸↑)´ (0‿2⊏c) {𝕎𝕩}¨ 1⊏t •Out "Boot -2 verified!" +! ≡○(⋈⁼∘∾⍟=¨⌾(2⊑¨2⊸⊑) 4⊸↑)´ (0‿3⊏c) {𝕎𝕩}¨ 2⊏t +•Out "Boot -3 verified!" -- cgit v1.2.3