aboutsummaryrefslogtreecommitdiff
path: root/src/bootstrap/boot3.bqn
blob: 45f6ca9fffb7322d41954f8e3b513fdb9a480c8c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Compiles the twice-simplified compiler
# This file isn't part of the bootstrapping chain as its syntax is
# more complicated than boot2. It shows how boot2 can be compiled.
{
  charSetchars
    "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function
    "˙˜˘¨⌜⁼´˝`"           # 1-modifier
    "∘○⊸⟜⌾⊘◶⎉⚇⍟⎊"         # 2-modifier
    "⋄,"lf@+10          # Separator
    "←↩"                  # Gets
    "(){}⟨⟩"              # Bracket
    "𝕊𝕩𝕨"                 # Input
    "¯π∞"                 # Numeric
    ("0")+↕10            # Digit
    "aA"+na26         # Alphabetic
    " "                   # Whitespace
    sc"#""@"             # Preprocessed characters
  
  cm¨˜(0»+`)cgl¨chars
  bS3cmbG4cmbB5cmbI6cmbN7cmbA9cmbW10cm
  M1(0≤∧>)-   # ∊ for an init,length pair 𝕩 as above
  vibN  # Start of identifier numbering
  charRolecgl¨1,2,3,¯1,¯3,⟨¯1,0⟩,⟨1,0,0⟩,0,0,26/0,1⟩,4,0
  TT`×  ITTT

  # Comments and strings
  s`dd𝕩=1scsssdd                     # Strings; string start
  fs<(s<𝕩=⊑sc)((1+↕)TT)𝕩=lf           # Filter comments
  chr@¨ci/f𝕩=2sc                        # Characters (indices ci)
  str𝕩˜1-˜(+`ss1)×(ss<s)1               # Strings

  # Extract words: identifiers and numbers
  tcharSetf/𝕩                             # Tokens
  rtcharRole                              # Role
  w»<lt M bN(⊣⋈-˜)bW                   # Word chars l, start w
  wi(bA)w/t                              # Type: 0 number, 1 identifier
  tt-na×lr=1                              # Case-insensitive
  nl(+`w)0∾¬wi                           # Number mask
  ide(1-˜(l>n)×+`w>n)tcharSet            # Identifiers

  # Numbers, at most 2 digits
  nt(«0n)/0t×l                         # Number tokens separated by 0
  nnnt=⊑bNnm¬nn0=nt                     # Mask for ¯; digits
  nl(0π,∾↕10)˜nm×nt-⊑bN               # Digit lookup
  ns1,¯1˜(>»nm)nn                    # Negate if ¯
  numns×(>«nm)/(10×»)+nl                 # Numeric values

  # Deduplicate literals and identifiers; other cleanup
  # Identifiers then literal tokens are numbered starting at vi
  ki(wi⊏/w)(ci∾/ss)⊏+`»f                # Indices in t
  kide,⟨⟩,num,chr,str                    # Constants
  kk(⊢>¯1»⌈`)/¨˜j¨k                     # IDs j into uniques k
  wrw∨¬lt M bWrwr/r                     # Remove words/whitespace
  twr/(j++`vi»≠¨k)(ki)t                # Add IDs
  pb`1¨sb¯11↓/1(∾≠∾˜)tst M bS          # Separator group boundaries (excludes leading and trailing)
  eb3,5+⊑bB                              # End brackets that allow separators
  sksb/˜pb>∨«eb˜pb+(sb-pb)t             # Keep the first of each group that's not just inside a bracket
  srtst↑/skrsr/rtsr/t              # Remove the rest
  𝕩tevvi+≠⊑k

  # Bracket roles
  # Open brackets initially have role ¯1 and closed ones have role 0
  gb⍋+`p(¯1-2×r)×𝕩 M bB                   # Paren (actually any bracket type) depth and grade
  rr+𝕩=3+⊑bB                               # Assume blocks are functions
  cp𝕩=1+⊑bB                                # Closed paren
  rr+cp×»(IT cp)0<r                    # Propagate functions through parens

  # Reverse each expression: *more* complicated than it looks
  rev⍋+`¯1(¯1gb)(⊣⍋⊏⊏˜⍋¬⊏˜)⍋+`+1gbr=¯1
  grrev˜g⍋+`revp                        # Then order by bracket depth
  𝕩gr𝕩rgrr

  # Constants
  u∧⍷pr𝕩˜pi/𝕩<⊑bSlt/𝕩ev              # Primitives and literals
  cnpiltob(upr)(ev-˜u)+lt𝕩          # Locations and object numbers
  # Blocks and lists
  xs𝕩 M bSpsr=¯1                         # Separator token; part separator includes {⟨
  bk/𝕩=2+⊑bBrtbk                         # Block load and return
  lb/𝕩=5+⊑bB                               # List starts
  lsxs∧∨`lo𝕩=4+⊑bB                        # List Separators after ⟨lo
  dr/ls<xs                                 # Drop at block separators
  ll(¬lo/1«ps)+-»1(lo1)/+`ls0          # List Length
  # Assignment
  at/aa»ar=¯3                            # Assignment target
  ao(a/𝕩)-⊑bG                              # Assignment opcode
  v/(𝕩≥⊑bI)𝕩<ev                           # Variable indices
  vs(v𝕩)-⊑bI                              # Variable slot
  # Functions and modifiers
  tr0<r˜IT»ps                             # Train
  oa/opr=2                                # Modifier
  tatr2(>∨|)ps(⊢-TT)+`¬op                 # Train argument (first-level)
  fa/(taopaa)<(r=1)∨»op                  # Active function
  os⌽↕(⊣-TT)⌽¬op                        # Operator skip: distance rightward to derived function start
  dyfa⊏«(trr0)op<r=0                    # Dyadic
  fl(dy×⊏os)+fa+dy                       # Function application site
  olos+oa                               # Modifier application site

  # Object code generation: numbers oc ordered by source location after rev
  fsc3×fx2                               # Body immediacy ¬fx, special name count
  org˜v,v,v,cn,cn,bk,bk,lb,lb,at,dr,ol,fl,rt
  ocor 32+vaa,0¨v,vs,0¨cn,ob,1¨bk,1+↕≠bk,11+lbaa,ll,48+ao,6¨dr
           24+oar,16+dy+4×fatr,¯1rc7¨fx
  # Output
  fz0¨fx,¬fx,↕≠fx                        # Per-function data
  lc0⋈+´0=ao                               # Body locals count
  cz/1oroc-rt,fsc+lc,¨lc,0¨lc     # Per-body data
  oc¯1rc,u𝕨1k,<˘⍉>fz,<˘⍉>cz       # Overall output
}