aboutsummaryrefslogtreecommitdiff
path: root/src/bootstrap/boot2.bqn
blob: 65ac35d7daa0055b4fc942afe281b35306cc9f3f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# Compiler simplified twice
# Suitable for compiling once-simplified compiler
# Single-scope; no modified or list assignment; no 2-modifiers
{
  charSetchars
    "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function
    "˙˜˘¨⌜⁼´˝`"           # 1-modifier
    "∘○⊸⟜⌾⊘◶⎉⚇⍟⎊"         # 2-modifier
    "⋄,"lf@+10          # Separator
    "←↩"                  # Gets
    "(){}⟨⟩"              # Bracket
    "𝕊𝕩𝕨"                 # Input
    "¯π∞"                 # Numeric
    ("0")+↕10            # Digit
    "aA"+na26         # Alphabetic
    " "                   # Whitespace
    sc@+35,34,64       # Preprocessed characters: hash, double quote, @
  
  cm((0»+`)¨)cgl¨chars
  bS3cmbG4cmbB5cmbN7cmbA9cmbW10cm
  sepbS
  vibN  # Start of identifier numbering
  charRolecgl¨1,2,3,¯1,¯3,⟨¯1,0⟩,⟨1,0,0⟩,0,0,26/0,1⟩,4,0
  TT`×  IT(↕≠)TT⊢  I1T(1+(↕≠))TT⊢

  # Comments and strings
  s`dd𝕩=1sc
  fs<(I1T s<𝕩=⊑sc)≤I1T𝕩=lf                 # Filter comments
  chr@¨ci/f𝕩=2sc                        # Characters (indices ci)
  ff>qedd∧«sdsdd                        # Quote Escape
  sisdqe                                 # String indices
  str𝕩˜1-˜(+`si1)×(si<s)1               # Strings

  # Extract words: identifiers and numbers
  tcharSetf/𝕩                             # Tokens
  rtcharRole                              # Role
  l(t≥⊑bN)t<⊑bWwll                    # Word chars l, start w
  wi(bA)w/t                              # Type: 0 number, 1 identifier
  tt-na×lr=1                              # Case-insensitive
  nl(+`w)0∾¬wi                           # Number mask
  ide(1-˜(l>n)×+`w>n)tcharSet            # Identifiers

  # Numbers, at most 2 digits
  nt((⊢∨«)0n)/0t×l                       # Number tokens separated by 0
  nnnt=⊑bNm¬nn0=nt                      # Mask for ¯; digits
  nl(0π,∾↕10)˜m×nt-⊑bN                # Digit lookup
  ns1,¯1˜(mm)nn                     # Negate if ¯
  numns×(mm)/nl+10×»nl                   # Numeric values

  # Deduplicate literals and identifiers; other cleanup
  # Identifiers then literal tokens are numbered starting at vi
  ki((wi)⊏/w)(ci∾/si)⊏+`»f               # Indices in t
  kide,⟨⟩,num,chr,str                    # Constants
  kk/¨˜(⊢>¯1»⌈`)¨j¨k                     # IDs j into uniques k
  wrw∨¬lt=⊑bWrwr/rct
  twr/(c↑⍋(⊢+c×⊒)ki∾↕c)(j++`vi»≠¨k)t    # Add IDs; remove words/whitespace
  tt-(t<+´bS)×(⊢×0≤⊢)t-⊑bS                 # Separators are equivalent
  pb`1¨sb¯11↓/1(∾≠∾˜)t=sep              # Separator group boundaries (excludes leading and trailing)
  eb3,5+⊑bB                              # End brackets that allow separators
  sksb/˜pb>(⊢∨«)eb˜pb+(sb-pb)t           # Keep the first of each group that's not just inside a bracket
  sr((t)↑/sk)tseprsr/rtsr/t        # Remove the rest
  𝕩tnv≠⊑k
  # End of tokenization!

  # Bracket roles
  # Open brackets initially have role ¯1 and closed ones have role 0
  gb⍋+`p(¯1-2×r)×(𝕩≥⊑bB)𝕩<+´bB           # Paren (actually any bracket type) depth and grade
  rr+𝕩=3+⊑bB                               # Assume blocks are functions
  cp𝕩=1+⊑bB                                # Closed paren
  rp(gb)(gb)»gb                         # Position of previous, for roles
  ir((IT cp≤⊢)⊏⊢)(rp0˜3=r)0<r           # Role of the expression ending at each position
  rr+cp×»ir                                # Roles at cp were 0; set them now

  # Reorder for parsing
  # Permutation to reverse each expression: *more* complicated than it looks
  rev⍋+`¯1(¯1gb)((⍋⊣)((⍋⊢)⊏⊣)¬⊏˜)(+`+⊢)1gbr=¯1
  gffd+`brrevp×𝕩2,3+⊑bB             # Order by brace depth fd to de-nest blocks
  revgfrevfdgffdbrgfbr
  xvrev𝕩-vi                               # Save for lexical resolution
  g⍋+`br-˜revp                            # Order by non-brace bracket depth
  grgrevgig                            # Final parsing ordering
  bbr>0bc/br<0                           # Block Begin (mask) and Close (index), in matching order
  𝕩gr𝕩rgrrirgrir

  # Parsing part 1
  a¯3=rpsa<r<0                           # a: assignment, ps: part separator
  trir˜IT»ps                              # tr: train or modifier expression
  oa⌽/opr2roop∨«opr=3                 # op: active modifiers; ro: mod or right operand
  xs𝕩=sepfo𝕩=2+⊑bB                       # Separators, function open {
  lsxs(IT fo)<IT lo𝕩=4+⊑bB               # List Separators: after ⟨lo, not {fo
  matr<(𝕩=1+⊑bG)∧«ir1                     # Modified assignment
  os(↕≠ro)(⊣-TT)⌽¬roma                   # Operator skip: distance rightward to derived function start
  at1+(⊢+os˜)ai/a                       # Assignment target
  ao(bG)-˜ai𝕩+ma                         # Assignment opcode
  akgi(«-⊢)(⍋⍷at∾↕≠𝕩)(𝕩)1+ao           # Class of assignment: 1←? 2↩? 3+↩?
  aa0<gac»+`ak×1(»∨⊢)0=+`ak              # Broadcast ak to the entire target

  # Lexical resolution (independent of parsing part 2 below)
  id/(0xv)xv<nv                          # Identifier indices in xv
  sp/(¯3xv)xv<0                          # Special name indices
  d1=idac                                 # Which accesses are definitions
  fi+`bfsc3×fx01¨bc                    # Body index fi, immediacy ¬fx, special name count
  idfidfiidvidxv                       # Function index and name ID
  dn((dfd/idf)∾≠fx)dvd/idv              # Identifier name ID, per-block
  # Order every referenced identifier, and an undeclaration for each declaration
  ixf(idf¯1b/gf)df(𝕩)1-˜bcgf        # First order by block index, open for real and closed for virtual
  ig(((idvdv)˜)⊏⊢)ixf                 # Then order by name
  ig(⊢/˜(d)>⊢)(⍋+`igd¯1¨dv)ig          # Last order by declaration depth
  digdidigid
  ia0<(idsp)ac                           # Which are assignments
  idd(⊢-(IT d)⊏⊢)idfd                     # Identifier frame depth
  idi((¯1+`d)(⍋⍋d/ig)⊏⊢)(⊒+⊢⊏fsc˙)df      # Slot within frame
  spi3+spxv                               # Special name index
  uu(1«d)d((+`)1(∾/∾˜)(∨/⊣))0<idd       # Unused marker
  uuuu∾⌽∊⌽spi+6×spfi                      # ...for special names
  idor3/idsp                           # Identifier bytecode ordering
  idoc32+uu(⊢+2×>)ia,idd0¨sp,idispi    # Identifier bytecode: instruction, depth, slot

  # Parsing part 2
  tatr2(>∨|)ps(⊢-TT)+`¬ro                 # Train argument (first-level)
  fa/(taro(«∨⊢)ps<aa)<(r=1)∨»op          # Active functions
  dyfa⊏«(trr0)ro<r=0                    # Dyadic
  pr𝕩˜pi/𝕩<sepobpr˜u∧⍷pr             # Objects to be loaded
  cnpilt/𝕩clvi+nvobob(cl-˜u)+lt𝕩  # Constants
  bkbcgi                                  # Block loads
  lb/𝕩=5+⊑bB                               # List starts
  ll(¬lo/1«ps)+(⊢-»)1(lo1)/+`ls0        # List Length
  dr/xs>lsrt/fo                          # Drop (block separator) and return
  fl(⊢+dy×⊢⊏os˙)fa+dy                      # Function application site

  # Object code generation: numbers oc ordered by source location (after rev) oi
  oridorg˜cn,cn,bk,bk,2/lb,at,dr,oa+1oaos,fl,rt
  ocor⊏∾idoc0¨cn,ob,1¨bk,1+↕≠bk,⥊⍉(11+lbaa)ll,48+ao,6¨dr
               24+oar,16+dy+4×fatr,¯1rc7¨fx
  # Output
  fz0¨fx,¬fx,↕≠fx                        # Per-function data
  cz/1or(oc)-≠rt,fsc+≠¨dn,dn,0¨¨dn    # Per-body data
  oc¯1rc,u𝕨1k,<˘⍉>fz,<˘⍉>cz       # Overall output
}