aboutsummaryrefslogtreecommitdiff
path: root/src/c.bqn
blob: 91a1059734e1ecd1c77ba4526c797c6443676447 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
lf@+10
charSetcgl( < ¨)
  "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function
  "˙˜˘¨⌜⁼´˝`"           # Modifier
  "∘○⊸⟜⌾⊘◶⎉⚇⍟"          # Composition
  lf"⋄,"               # Separator
  "←↩"                  # Gets
  "(){}⟨⟩"              # Bracket
  "‿"                   # Ligature
  "·"                   # nOthing
  # Use last character in case of UTF-16 (like dzaima/BQN)
  ¯1˘10"𝕊𝕏𝕎𝔽𝔾𝕤𝕩𝕨𝕗𝕘" # Input (𝕣 pending; ℝ not allowed)
  '0'+↕10               # Digit
  "π∞¯."                # Numeric
  "_"˜"aA"+na26    # Alphabetic
  (¯1"𝕨")" "@+9      # Whitespace (or special name prefix in UTF-16)
  "#'""@"               # Preprocessed characters

bFbMbCbSbGbBbLbObIbDbNbAbWbP¨˜(+`»)cgl
M1(0≤∧>)-  # ∊ for an init,length pair 𝕩 as above
sepbS
# CharCode converts characters to numbers, mostly the same as tokens
ErrUnknownChars{
  "Unknown character","s"/˜1<≠𝕩,": ",𝕩!0
}
CharCodecharSet{
  Chk  ⊢⊣ErrUnknownChars(≠/⊣)(𝕗)
  g𝕗   Chk g˜1-˜1(g𝕗)⍋⊢
}

vibD  # Start of identifier numbering
charRole∾⥊¨˜(≠↑cgl˙)1,2,3,¯1,¯1,¯10,¯1,0,5/⌽↕2 # For first vi chars
T`×  ITT  I1T(1+↕)T

# Source to ⟨tokens, roles, number of identifiers, literals⟩
# Identifiers then literal tokens are numbered starting at vi
Tokenize{
  # Resolve comments and strings
  r𝕩='#'s/00«sm𝕩='''⋄d←/dm←𝕩='"'
  g←⍋q←∾⟨  s⋄¯1↓d⋄/r⟩ ⋄q↩g⊏q                # Open indices
  e← g⊏∾⟨2+s⋄ 1↓d⋄-⟜»∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩ # Matching close indices
  Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟(0=¯1⊑⊢)       # Mark reachable openings
  St←(≠𝕩)↑·/⁼((≠↑∾⟜≠Se 1∾0¨)q⍋e)⊸/          # All indices → reached mask
  a←St q⋄b←St e⋄f←¬≠`ab←a∨b                 # Open/close masks; filter
  "Unclosed quote"!¬∨´(sm∨dm)∧b<f

  # Extract character and string literals
  u←f∧𝕩='@'⋄ci←/u∨»a∧sm
  chr←(⊏⟜𝕩-('@'-@)×⊏⟜u)ci                   # Characters (indices ci)
  f>↩qe←dm∧«a∧↩dm                           # Quote Escape ""
  str←1↓¨𝕩⊔˜1-˜(+`si←a>»qe)×≠`dm∧ab         # Strings (indices /si)

  # Extract words: identifiers and numbers
  c←CharCode f/𝕩
  w←»⊸<l←c M bD(⊣≍-˜)○⊑bW⋄us←c=¯1++´bA      # Word chars l, start w
  tw←na⌊∘÷˜(⊑bA)-˜w/c                       # Type of word from first char
  c-↩na×l∧c≥na+⊑bA                          # Case-insensitive
  i←l>n←l∧(+`w)⊏0∾tw<0                      # Identifier/Number masks
  num←ReadNums n∨⟜«⊸/○(0⊸∾)c×l              # Numbers
  ir←(us/˜«⊸<i)(⊢+∧⟜(2⊸=))0⊸≤⊸/tw           # Identifier role
  id←(1-˜(i>us)×+`w>n)⊔c                    # Identifiers

  # Deduplicate literals and identifiers; other cleanup
  ki←((⍋⊏⟜n)⊸⊏/w)∾(ci∾/si)⊏+`»f             # Indices in c
  k←id‿num‿chr‿str⋄k(⊢>¯1»⌈`)⊸/¨˜↩j←⊐¨k     # IDs j into uniques k
  c↩(w∨¬l∨c M bW)/(∾j++`vi»≠¨k)⌾(ki⊸⊏)c     # Add IDs; remove words/whitespace
  c-↩c(M×-⟜⊑)bS                             # Separators are equivalent
  c/˜↩¬(1»(c∊2‿4+⊑bB)∨⊢)⊸∧c=sep             # Remove repeated and leading separators
  c/˜↩¬(1«c∊3‿5+⊑bB)∧c=sep                  # ...and trailing ones. In sequence for repeated trailing.
  cr←ir⌾((c M vi∾≠⊑k)⊸/)(vi⌊c)⊏charRole∾0   # Role
  c+↩5×c M⟨⊑bI,5⟩                           # Case-insensitive special names
  ⟨c,cr,≠⊑k,∾1↓k⟩
}

# 𝕩 is a list of tokens that contains the numeric literals, each
# preceded by 0. Return the numbers.
ErrNumericChars←{
  ⟨"Letter","s"/˜1<≠𝕩," """,𝕩⊏charSet,""" not allowed in numbers"⟩∾⊸!0
}
ReadNums←{
  e‿p‿i‿n‿d←=⟜𝕩¨((⊑bA)+-´"ea")∾+⟜↕´bN       # Masks for eπ∞¯.
  ErrNumericChars∘(/⟜𝕩)⍟(∨´) ¬e∨𝕩<⊑bA
  s←d∨c←e∨z←0=𝕩⋄m←¬n∨c
  "Negative sign in the middle of a number" ! ∧´n≤»c
  "Portion of a number is empty" ! ¬∨´(1«s)∧n∨s
  "Ill-formed decimal or exponent use" ! ∧´(0⊸=∨»⊸<)s/𝕩
  "π and  must occur alone" ! ∧´(p∨i)≤1(»∧(p∧«e)∨«)z∨n
  f←(17≥¬(⊢-T)+`)⊸∧(«≤(d<𝕩≠⊑bD)>○I1T¬)⊸∧m   # No leading 0s; max 17 digits
  l←(¯1∾π‿1∾˜↕10)⊏˜(¬d)/f×𝕩-1-˜⊑bD          # Digit lookup, with ∞ as 1 to avoid ∞×0
  v←(>⟜«0≤l)/(0⊸≤××⟜10⊸+)`»«l               # Numeric values—mantissas and exponents
  v×↩1‿¯1⊏˜(r←>⟜»m)/»n                      # Negate if ¯
  vm←c/z                                    # Mask of mantissas in l
  dp←vm/(»⊸<×-)⟜(⊏⟜(I1T d∾0))1+/>⟜«f        # Decimal position
  t←10⋆|ee←dp-˜vm/«v׬vm                    # Power of 10
  t÷˜⌾((0>ee)⊸/)t×⌾((0<ee)⊸/)vm/v×(r/i)⊏1‿∞ # Correct ∞ then ×10⋆ee
}


ErrMismatchedBrackets←{
  Lcs ← ¯1 ⊑ 0˘∘⊢ {𝕩⌈⌈`𝕨+«𝕩}˝ =⌜
  _mis_ ← {"Missing "∾𝕗∾" "∾charSet⊏˜𝕘+⊢/˜≠∘⊣=⊒}
  Msg ← >○≠◶⟨"opening"_mis_ 0, "closing"_mis_ 1˜⟩
  0 !˜ 𝕨 (Lcs<⌊○≠)◶⟨Msg,"Malformed bracket nesting"⟩ 𝕩
}
ErrUndeclared←{
  0 !˜ "Undefined identifier"∾(1<≠𝕩)/"s"
}
Parse ← {r‿nv←𝕨
  g←⍋pd←+`p←(¯1-2×r)×𝕩 M bB⋄br←p×𝕩M⟨2+⊑bB,2⟩
  p(=⟜1⊸/ErrMismatchedBrackets⍟≢1-˜=⟜¯1⊸/)○(g⊸⊏)𝕩
  "Swapped open and closed brackets" ! 0≤(⊑g)⊑pd
  sl←«⊸∨𝕩 M bL⋄sr←»⌾(g⊸⊏)sl⋄cp←𝕩=1+⊑bB
  ps←(𝕩=sep)∨sl∨p>0
  g⊏˜↩⍋g⊏sl
  rev←⍋+`¯1↓(¯1∾g)(⊣⍋⊸⊏⊏˜⟜⍋¬⊏˜)⍋+`⊸+1∾g⊏ps

  gf←⍋fd←+`rev⊏br
  rev⊏˜↩gf⋄fd⊏˜↩gf
  𝕩⊏˜↩rev⋄bv←rev⊏br⋄b←/bv<0⋄fi←+`c←bv>0

  H←¬·1⊸«⊸/c(∨/⊣)=
  ft←(0∾1⊸H+2×2⊸H⌈2×3⊸H)(0‿3‿4‿5+5+⊑bI)⍋𝕩
  fsc←ft⊏⥊3‿5‿6-⌜3‿0
  pt←cp∧sn←¬sl∨sr
  r↩sl-˜sn×((1↓ft)⊏(1+2⊸≤)⊸/↕4)⌾((b⊏rev)⊸⊏)r
  r+↩pt(⊣∧≤=○I1T⊢)(»⌾(g⊸⊏)3=r)∨1≤r

  id←/idm←(0⊸≤∧<⟜nv)xv←𝕩-vi⋄idx←id⊏𝕩⋄sp←/𝕩 M bI
  ad←𝕩(=>○IT⊣=1+⊢)⊑bG

  g↩⍋+`rev⊏p-br⋄gr←g⊏rev⋄sll←1+2÷˜0(<-○/>)gr⊏sr-sl⋄l←/g⊏𝕩=5+⊑bB
  g⊏˜↩gs←⍋gr⊏sl⋄gr↩g⊏rev⋄gi←⍋g
  𝕩⊏˜↩g⋄r⊏˜↩gr⋄ps⊏˜↩gr⋄si←/gr⊏sr>sl⋄l⊏↩⍋gs
  a←𝕩 M bG⋄ro←(r≥2)∨a<«r=3
  tr←(IT»ps)⊏fe←ro∨r=1⋄ma←tr<(𝕩=1+⊑bG)∧«fe
  os←⌽↕∘≠⊸(⊣-T)⌽¬ro∨ma⋄at←1+⊏⟜os⊸+/a
  r(×⟜¬-⊢)↩aa←g⊏asn←»+`gi⊏«⊸-(≠𝕩)↑/⁼at
  ta←tr>(a≤○I1T ps)∧(⊢∧2(|<≤)ps(⊢-T)+`)¬ro
  oa←⌽/op←(«r≥0)∧r≥2
  fa←/(ta∨(ro∧«op)∨«aa)<(r=1)∨<⟜»op
  dy←fa⊏«(𝕩≠⊑bO)∧(tr∧r≥0)∨ro<r=0
  ob←pr⊏/¯1(⊢-»)u←⍷∧pr←𝕩⊏˜pi←/𝕩<sep
  cn←pi∾lt←/𝕩≥ci←vi+nv⋄ob∾↩(ci-˜≠u)+lt⊏𝕩
  s←𝕩=sep⋄fo←𝕩=2+⊑bB⋄ls←s∧fo<○IT lo←𝕩=4+⊑bB
  ll←(¬lo/1«ps)+-⟜»1↓(lo∾1)/+`ls∾0
  dr←/ls<s⋄rt←/fo

  d←id⊏dec←idm∧ad∧asn⋄lc←-⟜»(c∾1)/+`dec∾0
  ixf←(⊏⟜(¯1∾c/gf)∾d⊸/⊏((≠𝕩)∾b⊏gf)˜)id⊏fi
  ig←(⍋⊏⟜(ixx←∾⟜(d⊸/)idx))⊸⊏⍋ixf
  "Multiple definitions" ! 1´ixx(»((id)/ig)⊏⊢)ixf
  ig<(id)/(⍋+`(d∾-/˜d))ig
  d˜igErrUndeclared/˜(0´)(d)igidx
  ui(IT d)(ig)id
  ii(ui(fifsc)-cT¬+`dec)((spfi)3×2|ft)++(0)2+spxv
  idor3⥊<isidsp
  idbc21+isasn,(id-(fd)ui)0¨sp,ii

  bgi
  oridorg˜cn,cn,b,b,dr,l,l,si,si,at,oa+1oaos,(dy×⊏os)+fa+dy,rt
  bcor⊏∾idbc0¨cn,ob,15¨b,1+↕≠b,14¨dr,3+laa,ll,3+siaa,sll
               (11-⊑bG)+a/ma+𝕩,5+oar,(⊢+9×10=)16+dy-7×fatr,25¨rt
  bclelLEB bc25
  fs(ft⊏⥊32)¨(el/˜1orbc-rt)¨lc+fsc
  bcl,u,fs
}

LEB{
  b128
  s+`»i1+lb1𝕩
  o⍋⍋↕-i/s
  vol{f×𝕨(𝕨-1)(b(×f+|)∾𝕊(⌊÷b)(f/))(´f)𝕩}𝕩
  vs
}

Compile{
  t,r,nVar,constTokenize𝕩
  bc,pr,blkrnVar Parse t
  bc,(pr𝕨)const,blk
}