aboutsummaryrefslogtreecommitdiff
path: root/src/c.bqn
blob: a0446e39839364b40fba615f98007d784390662c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
funcmod1mod2  •args
lf@+10
charSetcgl( < ¨)
  func                  # Function
  mod1                  # 1-modifier
  mod2                  # 2-modifier
  "⋄,"lf               # Separator
  "⇐←↩"                 # Gets
  "(){}⟨⟩"              # Bracket
  "‿"                   # Ligature
  "·"                   # nOthing
  # Use last character in case of UTF-16 (like dzaima/BQN)
  ¯1˘10"𝕊𝕏𝕎𝔽𝔾𝕤𝕩𝕨𝕗𝕘" # Input (𝕣 pending; ℝ not allowed)
  ".¯π∞"                # Numeric
  '0'+↕10               # Digit
  "_"˜"aA"+na26    # Alphabetic
  "•"(¯1"𝕨")" "@+9  # Whitespace (or special name prefix in UTF-16)
  "#'""@"               # Preprocessed characters

bFb1b2bSbGbBbLbObXbNbDbAbWbP¨˜(0»+`)cgl
M1(0≤∧>)-   # ∊ for an init,length pair 𝕩 as above
sepbS
bI5-+bX
Pl("s"/˜1<≠)   # Pluralize
_tmpl{𝕗{𝕎𝕩}¨<𝕩} # Template
# Convert characters to numbers, mostly the same as tokens
CharCodecharSet{
  ErrUnknownChars0!˜"Unknown character"Pl,": ",_tmpl
  Chk  ⊢⊣ErrUnknownChars(≠/⊣)(𝕗)
  g𝕗   Chk g˜1-˜1(g𝕗)⍋⊢
}

vd1+vibN  # Start of identifier numbering (plus dot)
charRole4˜∾⥊¨˜(≠↑cgl˙)1,2,3,¯1,¯3,¯10,¯2,0,5/⌽↕2 # For first vd chars
T`×  ITT  I1T(1+↕)T

# Source to ⟨tokens, roles, number of identifiers, literals⟩
# Identifiers then literal tokens are numbered starting at vi
Tokenize{
  # Resolve comments and strings
  c𝕩='#'s/00«sm𝕩='''⋄d←/dm←𝕩='"'
  g←⍋q←∾⟨  s⋄¯1↓d⋄/c⟩ ⋄q↩g⊏q                # Open indices
  e← g⊏∾⟨2+s⋄ 1↓d⋄-⟜»∘⊏⟜(0∾+`c)⊸//(𝕩=lf)∾1⟩ # Matching close indices
  Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟(0=¯1⊑⊢)       # Mark reachable openings
  St←(≠𝕩)↑·/⁼((≠↑∾⟜≠Se 1∾0¨)q⍋e)⊸/          # All indices → reached mask
  a←St q⋄b←St e⋄f←1≠`ab←a∨b                 # Open/close masks; filter
  {⟨⊑/𝕩,"Unclosed quote"⟩!0}⍟(∨´)(sm∨dm)∧b<f

  # Extract character and string literals
  u←f∧𝕩='@'⋄ci←/u∨»a∧sm
  chr←(⊏⟜𝕩-('@'-@)×⊏⟜u)ci                   # Characters (indices ci)
  f>↩qe←dm∧«a∧↩dm                           # Quote Escape ""
  str←𝕩⊔˜1-˜(si←a>»qe)(⊣+`⊸×○(∾⟜1)<)≠`dm∧ab # Strings (indices /si)

  # Extract words: identifiers and numbers
  ie←/f⋄is←ie≠⊸↑/1»f⋄Fs←{is/˜↩𝕨⋄𝕨/𝕩}        # Token start and end
  is-↩is(-×⊏⟜c)ie                           # Comment → ending newline only
  t←CharCode ie⊏𝕩
  nd←(t=⊑bN)>«t M bD                        # Namespace dot
  w←»⊸<l←nd<t M bN(⊣≍-˜)○⊑bW                # Word chars l, start w
  us←t=¯1++´bA⋄sy←t=⊑bW                     # Underscore, system dot
  {⟨/us∧w+`⊸⊏0∾𝕩,"Words can't only have underscores"⟩!0}⍟(∨´)w(/<1(⊢/«)(∨/⊣))l>us
  wk←na⌊∘÷˜(⊑bA)-˜w/t                       # Kind of word from first char
  t-↩na×l∧t≥na+⊑bA                          # Case-insensitive
  {⟨𝕩/is,"System dot with no name"⟩!0}⍟(∨´)sy>«l
  w≠↩»⊸∨sy                                  # Start system word at dot
  wi←0<wt←(2×wk≥0)(×⟜¬+⊢)w/sy               # Type: 0 number, 1 system, 2 identifier
  i←l>n←l∧(+`w)⊏0∾¬wi                       # Identifier/Number masks
  num←is ReadNums○(((0∾us)<∨⟜«0∾n)/0⊸∾) t×l # Numbers
  ir←(us/˜«⊸<i)(⊢+∧⟜(2⊸=))wi/wk             # Identifier role
  ws←1=wi/wt⋄ig←1-˜(i>us)×+`w>n             # Identifier groups
  {⟨is⊏˜𝕩/𝕨,"Numbers can't start with underscores"⟩!0}⍟(∨´⊢)⟜(ws<(⊑bA)>⊏⟜t)/(¯1»⌈`)⊸<ig
  id←(ws∾2)⊔ig⊔t⊏charSet                    # ⟨Identifiers, system values⟩

  # Deduplicate literals and identifiers; other cleanup
  ki←(wt⍒⊸⊏/w)∾(ci∾/si)⊏+`»f                # Indices in t
  k←id∾num‿chr‿str⋄k(⊢>¯1»⌈`)⊸/¨˜↩j←⊐¨k     # IDs j into uniques k
  k↩𝕎⌾(1⊸⊑)k                                # System value lookup
  wf←¬l∨t M bW⋄ie/˜↩wf∨>⟜«l                 # Index management for...
  t↩(w∨wf)Fs(∾j++`vd»kk←≠¨k)⌾(ki⊸⊏)t        # Add IDs; remove words/whitespace
  t-↩t(M×-⟜⊑)bS                             # Separators are equivalent
  p←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep               # Separator group boundaries (excludes leading and trailing)
  sk←sb/˜p>∨⟜«(p+(sb-p)⊏t)∊3‿5+⊑bB          # Keep the first of each group that's not just inside a bracket
  t{ie/˜↩𝕨⋄𝕨Fs𝕩}˜↩1¨⌾(sk⊸⊏)t≠sep            # Remove the rest
  r←ir⌾((t M vd≍+´2↑kk)⊸/)(vd⌊t)⊏charRole∾0 # Role
  t+↩5×t M⟨⊑bX,5⟩                           # Case-insensitive special names
  t-↩vi(<+10×=)t                            # Shift . to bX and variables back one
  ⟨t,r,k,is,ie⟩
}

# 𝕩 is a list of tokens that contains the numeric literals, each
# preceded by 0. Return the numbers.
ReadNums←{
  _err_←{(0!˜/⟜𝔾≍○<𝔽)⍟(∨´)}
  EChars←⟨"Letter"⊸Pl," """,⊏⟜charSet,""" not allowed in numbers"⟩_tmpl
  e‿d‿n‿p‿i←=⟜𝕩¨((⊑bA)+-´"ea")∾+⟜↕´bN       # Masks for e.¯π∞
  EChars∘(/⟜𝕩)_err_𝕨 ¬e∨𝕩<⊑bA
  s←d∨c←e∨z←0=𝕩⋄m←¬n∨c
  "Negative sign in the middle of a number"_err_𝕨 n>»c
  "Portion of a number is empty"_err_𝕨 (1«s)∧n∨s
  "Ill-formed decimal or exponent use"_err_(s/𝕎) ¬(0⊸=∨»⊸<)s/𝕩
  "π and  must occur alone"_err_𝕨 (p∨i)>1(»∧(p∧«e)∨«)z∨n>»e
  f←(17≥¬(⊢-T)+`)⊸∧g←(«≤(d<𝕩≠⊑bD)>○I1T¬)⊸∧m # No leading 0s; max 17 digits
  l←(¯1∾π‿1∾↕10)⊏˜(¬d)/f×𝕩-1+⊑bN            # Digit lookup, with ∞ as 1 to avoid ∞×0
  v←(>⟜«0≤l)/0(0⊸≤××⟜10⊸+)`l                # Numeric values—mantissas and exponents
  v×↩1‿¯1⊏˜(r←>⟜»m)/»n                      # Negate if ¯
  vm←c/z                                    # Mask of mantissas in l
  dp←vm/f(--»⊸-(<×⊢)⊏⟜(I1T«d)⊸-)○(/>⟜«)g    # Decimal position
  t←10⋆|ee←dp-˜vm/«v׬vm                    # Power of 10
  t÷˜⌾((0>ee)⊸/)t×⌾((0<ee)⊸/)vm/v×(r/i)⊏1‿∞ # Correct ∞ then ×10⋆ee
}


Parse ← {r‿vn‿i‿e←𝕨⋄nv←≠vn
  ErrMismatchedBrackets←{
    Lcs ← ∧○(0<≠)◶⟨0, ¯1 ⊑ 0¨∘⊢ {𝕩⌈⌈`𝕨+»𝕩}˝ =⌜⟜⌽⟩
    _mis_ ← {"Missing "∾𝕗∾" "∾charSet⊏˜𝕘+⊢/˜≠∘⊣=⊒}
    Msg ← >○≠◶⟨"opening"_mis_ 0, "closing"_mis_ 1˜⟩
    0 !˜ 𝕨 (Lcs<⌊○≠)◶⟨Msg,"Malformed bracket nesting"⟩ 𝕩
  }
  _err_←{(0!˜(∧∘⍉(i≍e˙)⊏⎉1˜/⟜𝔾)≍○<𝔽)⍟(∨´⍟=)}

  # Bracket and ligature validation and handling
  # Open brackets have role ¯1 and closed ones have role 0
  "Empty program" ! 0<≠𝕩
  g←⍋pd←+`p←(¯1-2×r)×𝕩 M bB⋄gb←g⊏r=¯1       # Paren (actually any bracket type) depth and grade
  (g⊏p)(>⟜0⊸/ErrMismatchedBrackets⍟≢1-˜<⟜0⊸/)gx←g⊏𝕩
  "Swapped open and closed brackets" ! 0≤(⊑g)⊑pd
  "Parentheses can't contain separators"_err_(gb/G) ((⊑bB)⊸=»⊸∧sep⊸=)gb/gx
  {"Empty statement or expression"_err_(𝕩/0∾G) (4+⊑bB)≠𝕩/0∾gx}1⊸«⊸∧1∾gb
  r-↩(𝕩=⊑bG)>ec←«0≤r+p                      # Role ¯4 for exports: ⊑bG is ⇐
  "Invalid assignment or stranding use"_err_(↕∘≠) ((¯4⊸<∧≤⟜¯2)r)>ec∧»0≤r
  "Dot must be followed by a name"_err_(↕∘≠) (r=4)>«𝕩 M vi‿nv
  dl←«⊸∨dc←r=4                              # Dot left
  sr←»⌾(((⍋⊏⟜dl)⊸⊏g)⊸⊏)sl←«⊸∨r=¯2⋄ns←¬sl∨sr # Strand right and left; not stranded
  cp←𝕩=1+⊑bB                                # Closed paren
  nr←(IT¬cp)⊏𝕩=⊑bO                          # Nothingness role: 1 for · (handle 𝕨 later?)
  "Can't strand Nothing (·)"_err_(↕∘≠) ns<nr
  g⊏˜↩⍋g⊏sdl←sl∨dl                          # Avoid reordering strands and dots in rev
  # Permutation to reverse each expression: *more* complicated than it looks
  rev←⍋+`¯1↓(¯1∾g)(⊣⍋⊸⊏⊏˜⟜⍋¬⊏˜)⍋+`⊸+1∾g⊏sdl∨r=¯1
  gf←⍋fd←+`br←rev⊏p×𝕩M⟨2+⊑bB,2⟩             # Order by brace depth fd to de-nest blocks
  rev⊏˜↩gf⋄fd⊏˜↩gf⋄br⊏˜↩gf
  𝕩⊏˜↩rev⋄p⊏˜↩rev⋄i⊏˜↩rev⋄e⊏˜↩rev
  fi←+`b←br>0⋄c←/br<0                       # Block Begin (mask) and Close (index), in matching order

  # Block properties
  PN←1(∾/∾˜)(∨/⊣)⋄H←b¬∘PN=                  # Which blocks have a property
  sn←(0‿3‿4‿5+⊑bI)⍋𝕩⋄sp←/𝕩 M bI             # Special name index
  fx←1 H sn⋄fr←(fx∨0⊸<)⊸+ft←(2⊸H⌈2×3⊸H)sn   # Block immediacy ¬fx and type ft
  "Special name outside of any block"_err_(/{(0=fi)∧𝕩 M bI}∘𝕩) 0<⊑fr
  fsc←(ft⊏0‿2‿3)+3×fx                       # Special name count

  # Propagate roles through parentheses
  # ir is the role of the expression ending at each position (truncated to the right)
  r↩sl-˜ns×(1↓fr)⌾((c⊏rev)⊸⊏)r              # Add block roles; make strand elements ¯1
  pt←cp∧ns                                  # Pass-through parentheses: not in strands
  pp←pt∧»es←1⊸»⌾(g⊸⊏)r<0                    # Parens enclosing one object (maybe with assignment) don't change roles
  ir←((»⌾(g⊸⊏)(1+es)×3=⊢)⌈⊢-es<2≤⊢)r+pp×(IT¬pp)⊏r # Propagate modifier roles
  ir⌈↩(IT¬pt∧ir=0)((⊏-⊢)⟜(+`¬pp)(⊢⌊1⌈+)⊏)ir # ...and function roles
  r+↩pt×»ir                                 # Roles at pt were 0; set them now
  ir(×⟜¬-⊢)↩nr                              # Include nothingness (doesn't handle 𝕎)
  r-↩(r=¯4)∧1»r=¯1                          # Lone ⇐ to role ¯5
  "Dot must be preceded by a subject"_err_(⍋∘rev) dc>»r=0
  r(×⟜¬-⊢)↩dl⋄dc⊏˜↩rev                      # Namespace and dot to ¯1

  # Prep for lexical resolution before reordering 𝕩
  xv←𝕩-vi

  # Parsing part 1
  p-↩br⋄g↩⍋+`p⋄g⊏˜↩⍋g⊏«⊸∨dc⋄gr←g⊏rev        # Order by non-brace bracket depth, then dots
  sll←1+2÷˜0(<-○/>)gr⊏sr-sl⋄l←/g⊏𝕩=5+⊑bB    # Strand length; list starts
  bp←⟨/b,c⟩∾¨0(<≍○(</⟜g)>)g⊏p               # Bracket pairs
  {i↩(𝕨⊏i)⌾(𝕩⊸⊏)i⋄e↩(𝕩⊏e)⌾(𝕨⊸⊏)e}´bp        # Highlight all contents of a pair when error reporting
  g⊏˜↩gs←⍋gr⊏sl⋄gr↩g⊏rev⋄gi←⍋g              # Send strand prefixes *‿ to the end
  𝕩⊏˜↩g⋄r⊏˜↩gr⋄ir⊏˜↩gr⋄l↩(l⊏⍋gs)∾/gr⊏sr>sl
  a←(¯5⊸<∧≤⟜¯3)r⋄ps←a<r<0                   # a: assignment, ps: part separator
  tr←1≤er←ir⊏˜IT»ps                         # er: expression role; tr: train or modifier expression
  no←ir<0⋄ne←er<0                           # Nothing value; expression
  "Nothing (·) cannot be assigned"_err_(G) ne∧a
  oa←⌽/op←(er<2)∧r≥2⋄ro←op∨«op∧m2←r=3       # op: active modifiers; ro: mod or right operand
  "Missing operand"_err_(G) op>(«∧m2≤»)no<m2<ro∨r∊↕2
  "Double subjects (missing ?)"_err_(G) ∧⟜«ro»⊸∨⊸<r=0
  ma←tr<(𝕩=2+⊑bG)∧«ir≥1                     # Modified assignment
  os←↕∘≠⊸(⊣-T)⌾⌽¬ro∨ma                      # Operator skip: distance rightward to derived function start
  at←1+⊏⟜os⊸+ai←/a⋄af←¯4≠ai⊏r⋄ar←at⊏r       # Assignment target; af for actual (non-export) assignment
  "Role of the two sides in assignment must match"_err_(at⊏G) af∧ar≠0⌈at⊏er
  aid←(¯5⊸≤∧<⟜nv)𝕩-vi                       # Assignable identifer
  ak←af+(0≤ar)+(⊑bG)-˜ai⊏𝕩                  # Class of assignment: 1⇐ 2⇐? 3←? 4↩?
  aa←×g⊏ac←»+`(»⊸∨0=+`)⊸×gi⊏«⊸-ak⌾(at⊸⊏)0¨𝕩 # ac broadcasts it to the entire target
  nf←b¬∘PN ac<xv=vi-˜⊑bG                    # Namespace blocks
  {"Can't return Nothing (·)"_err_(𝕩⊏⍋∘Rev) 𝕩⊏nr} 1-˜nf¬⊸/0∾c⊏rev
  "Assignment target must be a name or list of targets"_err_(G) (a∨aid∨ps∨𝕩(=⟜(3+⊑)<M)bB)<aa
  "Can't nest assignments (write aliases with )"_err_(ai⊏G) (ak=2)<ai⊏aa
  "Can't use result of function/modifier assignment without parentheses"_err_(G) (0<er)∧(0≤r)∧»⊸>aa
  af>↩alm←ai⊏aa⋄al←alm/ai                   # aliases al

  # Lexical resolution (independent of parsing part 2 below)
  di←/dm←»dc                                # Dots aren't scoped
  id/idm(gi⊏«aaa)<dm<(0≤∧<nv)xv
  didddac M 22                          # Which accesses are definitions
  "Can't define special name"_err_(SP) spdd
  idn(idg(d/idfidfi)∾≠fsc)d/idvidxv  # Numbers for each identifier, per-block
  # Order every referenced identifier, and an undeclaration for each declaration
  ixf(((1=idac)+⊏(¯1b/gf))d/⊏((𝕩)cgf)˙)idf # First order by block index, open for real and closed for virtual
  ig(⍋⊏(ixx(d/)idv))⊏⍋ixf            # Then order by name
  {"Redefinition"_err_(𝕩·(d/)ID) ¬ixx(»𝕩)ixf} (d)/ig
  ig<(d)/(⍋+`(d∾-/˜d))ig           # Last order by declaration depth
  d˜igid˜ig
  ipidacxmip M 12                      # Assignment class; which are exports
  ("Undefined identifier"Pl/)_err_(ID) d<¯1»igidv
  idd(⊢-(IT d))idfd                     # Identifier frame depth
  "Can't export from surrounding scope"_err_(ID) xm0<idd
  idxidg(digd/ig)d(/≥1↓PN)xm           # Exported identifier mask
  idi(¯1+`d)dig(fsc+⊒)¯1idg         # Slot within frame
  uu((1«d)0=ip)d(⊣+`⊏PN)0<idd           # Unused marker
  spi((spfspfi)3×fx)+0+2+spxv       # Special name index
  uuspi+6×spf                          # and unused marker
  idor23/di,idsp                      # Identifier bytecode ordering
  ido21+uu(⊢+10×>)0<ipspac               # Opcode
  idbc26¨di,dixv, ido,idd0¨sp,idispi  # Identifier bytecode: instruction, depth, slot

  # Parsing part 2
  tatr2(>∨|)ps(⊢-T)+`¬ro                  # Train argument (first-level)
  fa/(fetaro∨«ps<aa)<ff(r=1)∨»op      # Active functions: cases fe are excluded
  "Second-level parts of a train must be functions"_err_(G) tr>feff
  dyfa⊏«no<(trr0)ro<r=0                 # Dyadic
  obpr⊏/¯1(⊢-»)u⍷∧pr𝕩˜pi/𝕩<sep         # Objects to be loaded
  cnpilt/𝕩civi+nvob(ci-˜u)+lt𝕩    # Constants
  bkcgi                                   # Block loads
  s𝕩=sepfo𝕩=2+⊑bBlssfo<IT lo𝕩=4+⊑bB # List Separators: after ⟨lo, not {fo
  "Can't use Nothing (·) in lists"_err_(G) (»lols)ne
  llsll˜(¬lo/1«ps)+-»1(lo1)/+`ls0     # List Length
  dr/(ne∨»r=¯5)<ls<srt/fo                # Drop (block separator) and return
  fl(dy×⊏os)+fa+dy                       # Function application site
  dr((1+dy)×fnfane)/fl                  # Turn function applications on · to drops
  fn¬fnfa/˜fndy/˜fnfl/˜fn            # And remove them

  # Bytecode generation: numbers bc ordered by source location (after rev) oi
  oroiidorg˜cn,cn,bk,bk,2/l,dr,af/at,al+1,al+1,oa+1oaos,fl,rt
  bcor⊏∾idbc0¨cn,ob,15¨bk,1+↕≠bk,⥊⍉(3+laa)ll,14¨dr
               11(11-1+⊑bG)+(af/ai)ma+𝕩,28¨al,vi-˜(al-1)𝕩
               5+oar,(⊢+9×10=)16+dy-7×faer,¯1rc25+4×nf
  # Instruction source positions
  MI-˜≠∾g˜oj(rt-oi1-˜i)Mi∾1-˜af/at,oa,fa
  indoj,oj Mi fa⊏IT»¬roma(¯1˜or(rt))¨ie
  # Function metadata and overall output
  bc¯1rc,u,⟨ft,¬fx,/1orbc-rt,fsc+≠¨idn,idn,idx⟩,ind
}

Compile{
  primsSys{𝕩,("System values not supported"!0˙)¨¨}(2≠≠)𝕨
  tok,role,val,t0,t1txsys Tokenize 𝕩
  bc,prim,blk,oirole,val,t0,t1 Parse tok
  bc, primprims1val, <˘⍉>blk, oi, tx
}