aboutsummaryrefslogtreecommitdiff
path: root/src/bootstrap/boot3.bqn
blob: 2ca3f731b5c668e01eede39d127ffa2154d3af4d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Compiles the twice-simplified compiler
# This file isn't part of the bootstrapping chain as its syntax is
# more complicated than boot2. It shows how boot2 can be compiled.
{
  charSetchars
    "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍⋈↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function
    "˙˜˘¨⌜⁼´˝`"           # 1-modifier
    "∘○⊸⟜⌾⊘◶⎉⚇⍟⎊"         # 2-modifier
    "⋄,"lf@+10          # Separator
    "←↩"                  # Gets
    "(){}⟨⟩"              # Bracket
    "𝕊𝕩𝕨"                 # Input
    "¯π∞"                 # Numeric
    ("0")+↕10            # Digit
    "aA"+na26         # Alphabetic
    " "                   # Whitespace
    sc"#""@"             # Preprocessed characters
  
  cm¨˜(0»+`)cgl¨chars
  bS3cmbG4cmbB5cmbI6cmbN7cmbA9cmbW10cm
  M1(0≤∧>)-   # ∊ for an init,length pair 𝕩 as above
  sepbS
  vibN  # Start of identifier numbering
  charRolecgl¨1,2,3,¯1,¯3,⟨¯1,0⟩,⟨1,0,0⟩,0,0,26/0,1⟩,4,0
  TT`×  ITTT

  # Comments and strings
  s`dd𝕩=1scsssdd                     # Strings; string start
  fs<(s<𝕩=⊑sc)((1+↕)TT)𝕩=lf           # Filter comments
  chr@¨ci/f𝕩=2sc                        # Characters (indices ci)
  str𝕩˜1-˜(+`ss1)×(ss<s)1               # Strings

  # Extract words: identifiers and numbers
  tcharSetf/𝕩                             # Tokens
  rtcharRole                              # Role
  w»<lt M bN(⊣⋈-˜)bW                   # Word chars l, start w
  wi(bA)w/t                              # Type: 0 number, 1 identifier
  tt-na×lr=1                              # Case-insensitive
  nl(+`w)0∾¬wi                           # Number mask
  ide(1-˜(l>n)×+`w>n)tcharSet            # Identifiers

  # Numbers, at most 2 digits
  nt(«0n)/0t×l                         # Number tokens separated by 0
  nnnt=⊑bNnm¬nn0=nt                     # Mask for ¯; digits
  nl(0π,∾↕10)˜nm×nt-⊑bN               # Digit lookup
  ns1,¯1˜(>»nm)nn                    # Negate if ¯
  numns×(>«nm)/(10×»)+nl                 # Numeric values

  # Deduplicate literals and identifiers; other cleanup
  # Identifiers then literal tokens are numbered starting at vi
  ki(wi⊏/w)(ci∾/ss)⊏+`»f                # Indices in t
  kide,⟨⟩,num,chr,str                    # Constants
  kk(⊢>¯1»⌈`)/¨˜j¨k                     # IDs j into uniques k
  wrw∨¬lt M bWrwr/r
  twr/(j++`vi»≠¨k)(ki)t                # Add IDs; remove words/whitespace
  tt-t(M×-)bS                            # Separators are equivalent
  pb`1¨sb¯11↓/1(∾≠∾˜)t=sep              # Separator group boundaries (excludes leading and trailing)
  eb3,5+⊑bB                              # End brackets that allow separators
  sksb/˜pb>∨«eb˜pb+(sb-pb)t             # Keep the first of each group that's not just inside a bracket
  sr(t↑/sk)tseprsr/rtsr/t         # Remove the rest
  𝕩tnv≠⊑k
  # End of tokenization!

  # Bracket roles
  # Open brackets initially have role ¯1 and closed ones have role 0
  gb⍋+`p(¯1-2×r)×𝕩 M bB                   # Paren (actually any bracket type) depth and grade
  rr+𝕩=3+⊑bB                               # Assume blocks are functions
  cp𝕩=1+⊑bB                                # Closed paren
  ir(IT cp)0<r                         # Role of the expression ending at each position
  rr+cp×»ir                                # Roles at cp were 0; set them now

  # Reverse each expression: *more* complicated than it looks
  rev⍋+`¯1(¯1gb)((⍋⊣)((⍋⊢)⊏⊣)¬⊏˜)(+`+⊢)1gbr=¯1

  # Lexical resolution, no nesting
  rxrev𝕩
  iv/(rx≥⊑bI)rx<vi+nv                     # Variable indices
  vv(ivrx)-vi                             # Variable name
  ii3↓⊐(¯3+↕3)vv                          # Slot within frame
  vaiv⊏»rxvb(va≥⊑bG)va<+´bG             # Variable assignment, if it's there
  idor3/iv                              # Identifier bytecode ordering
  idoc32+vb,0¨ii,ii                      # Identifier bytecode: instruction, depth, slot
  dn⟨⟨⟩,(va=⊑bG)/vv                       # Identifier name ID, per-block

  # Parsing
  grrev˜g⍋+`revp                        # Order by bracket depth
  𝕩gr𝕩rgrrirgrir
  at/aa»a¯3=r                            # Assignment target
  ao(a/𝕩)-⊑bG                              # Assignment opcode
  trir˜IT»psa<r<0                        # tr: train or modifier expression, ps: part separator
  oa⌽/opr=2                               # op: active modifiers; op: mod or right operand
  xs𝕩=sepfo𝕩=2+⊑bB                       # Separators, function open {
  lsxsfo<IT lo𝕩=4+⊑bB                   # List Separators: after ⟨lo, not {fo
  tatr2(>∨|)ps(⊢-TT)+`¬op                 # Train argument (first-level)
  fa/(taop∨«ps<aa)<(r=1)∨»op            # Active functions
  dyfa⊏«(trr0)op<r=0                    # Dyadic
  pr𝕩˜pi/𝕩<sepobpr⊏/¯1(⊢-»)u⍷∧pr      # Objects to be loaded
  cnpilt/𝕩clvi+nvobob(cl-˜u)+lt𝕩  # Constants
  bk/𝕩=2+⊑bB                               # Block loads
  lb/𝕩=5+⊑bB                               # List starts
  ll(¬lo/1«ps)+-»1(lo1)/+`ls0          # List Length
  dr/xs>lsrt/fo                          # Drop (block separator) and return
  os⌽↕(⊣-TT)⌽¬op                        # Operator skip: distance rightward to derived function start
  fl(dy×⊏os)+fa+dy                       # Function application site

  # Object code generation: numbers oc ordered by source location (after rev) oi
  fsc3×fx2                               # Body immediacy ¬fx, special name count
  oridorg˜cn,cn,bk,bk,2/lb,at,dr,oa+1oaos,fl,rt
  ocor⊏∾idoc0¨cn,ob,1¨bk,1+↕≠bk,⥊⍉(11+lbaa)ll,48+ao,6¨dr
               24+oar,16+dy+4×fatr,¯1rc7¨fx
  # Output
  fz0¨fx,¬fx,↕≠fx                        # Per-function data
  cz/1oroc-rt,fsc+≠¨dn,dn,0¨¨dn      # Per-body data
  oc¯1rc,u𝕨1k,<˘⍉>fz,<˘⍉>cz       # Overall output
}