1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
|
lf←@+10
charSet‿cgl←(∾ ≍○< ≠¨)⟨
"+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!" # Function
"˙˜˘¨⌜⁼´˝`" # 1-modifier
"∘○⊸⟜⌾⊘◶⎉⚇⍟" # 2-modifier
"⋄,"∾lf # Separator
"⇐←↩" # Gets
"(){}⟨⟩" # Bracket
"‿" # Ligature
"·" # nOthing
# Use last character in case of UTF-16 (like dzaima/BQN)
¯1⊏˘10‿∘⥊"𝕊𝕏𝕎𝔽𝔾𝕤𝕩𝕨𝕗𝕘" # Input (𝕣 pending; ℝ not allowed)
'0'+↕10 # Digit
"π∞¯." # Numeric
"_"∾˜⥊"aA"+⌜↕na←26 # Alphabetic
(¯1↓"𝕨")∾" "∾@+9 # Whitespace (or special name prefix in UTF-16)
"#'""@" # Preprocessed characters
⟩
bF‿b1‿b2‿bS‿bG‿bB‿bL‿bO‿bI‿bD‿bN‿bA‿bW‿bP←≍¨˜⟜(+`0⊸»)cgl
M←1⊸⊑(0⊸≤∧>)-⟜⊑ # ∊ for an init,length pair 𝕩 as above
sep←⊑bS
Pl←∾⟜("s"/˜1<≠) # Pluralize
_tmpl←{∾𝕗{𝕎𝕩}¨<𝕩} # Template
# Convert characters to numbers, mostly the same as tokens
CharCode←charSet{
ErrUnknownChars←0!˜⟨"Unknown character"⊸Pl,": ",⊢⟩_tmpl
Chk ← ⊢⊣ErrUnknownChars∘(≠/⊣)⍟≢⟜(⊏⟜𝕗)
g←⍋𝕗 ⋄ ⊢ Chk g⊏˜1-˜1⌈(g⊏𝕗)⍋⊢
}
vi←⊑bD # Start of identifier numbering
charRole←∾⥊¨˜⟜(≠↑cgl˙)⟨1,2,3,¯1,¯3,¯1‿0,¯2,0,5/⌽↕2⟩ # For first vi chars
T←⌈`× ⋄ IT←↕∘≠⊸T ⋄ I1T←(1+↕∘≠)⊸T
# Source to ⟨tokens, roles, number of identifiers, literals⟩
# Identifiers then literal tokens are numbered starting at vi
Tokenize←{
# Resolve comments and strings
c←𝕩='#'⋄s←/0‿0⊸«⊸∧sm←𝕩='''⋄d←/dm←𝕩='"'
g←⍋q←∾⟨ s⋄¯1↓d⋄/c⟩ ⋄q↩g⊏q # Open indices
e← g⊏∾⟨2+s⋄ 1↓d⋄-⟜»∘⊏⟜(0∾+`c)⊸//(𝕩=lf)∾1⟩ # Matching close indices
Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟(0=¯1⊑⊢) # Mark reachable openings
St←(≠𝕩)↑·/⁼((≠↑∾⟜≠Se 1∾0¨)q⍋e)⊸/ # All indices → reached mask
a←St q⋄b←St e⋄f←¬≠`ab←a∨b # Open/close masks; filter
{⟨⊑/𝕩,"Unclosed quote"⟩!0}⍟(∨´)(sm∨dm)∧b<f
# Extract character and string literals
u←f∧𝕩='@'⋄ci←/u∨»a∧sm
chr←(⊏⟜𝕩-('@'-@)×⊏⟜u)ci # Characters (indices ci)
f>↩qe←dm∧«a∧↩dm # Quote Escape ""
str←1↓¨𝕩⊔˜1-˜(+`si←a>»qe)×≠`dm∧ab # Strings (indices /si)
# Extract words: identifiers and numbers
ie←/f⋄is←ie≠⊸↑/1»f⋄Fs←{is/˜↩𝕨⋄𝕨/𝕩} # Token start and end
is-↩is(-×⊏⟜c)ie # Comment → ending newline only
t←CharCode ie⊏𝕩
w←»⊸<l←t M bD(⊣≍-˜)○⊑bW⋄us←t=¯1++´bA # Word chars l, start w
wk←na⌊∘÷˜(⊑bA)-˜w/t # Kind of word from first char
t-↩na×l∧t≥na+⊑bA # Case-insensitive
i←l>n←l∧(+`w)⊏0∾wk<0 # Identifier/Number masks
num←is ReadNums○(((0∾us)<∨⟜«0∾n)/0⊸∾) t×l # Numbers
ir←(us/˜«⊸<i)(⊢+∧⟜(2⊸=))0⊸≤⊸/wk # Identifier role
id←(ig←1-˜(i>us)×+`w>n)⊔t # Identifiers
{⟨ig⊐/𝕩,"Numbers can't start with underscores"⟩!0}⍟(∨´)(⊑bA)>⊑¨id
# Deduplicate literals and identifiers; other cleanup
ki←((⍋⊏⟜n)⊸⊏/w)∾(ci∾/si)⊏+`»f # Indices in t
k←id‿num‿chr‿str⋄k(⊢>¯1»⌈`)⊸/¨˜↩j←⊐¨k # IDs j into uniques k
wf←¬l∨t M bW⋄ie/˜↩wf∨>⟜«l # Index management for...
t↩(w∨wf)Fs(∾j++`vi»≠¨k)⌾(ki⊸⊏)t # Add IDs; remove words/whitespace
t-↩t(M×-⟜⊑)bS # Separators are equivalent
p←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep # Separator group boundaries (excludes leading and trailing)
sk←sb/˜p>∨⟜«(p+(sb-p)⊏t)∊3‿5+⊑bB # Keep the first of each group that's not just inside a bracket
t{ie/˜↩𝕨⋄𝕨Fs𝕩}˜↩1¨⌾(sk⊸⊏)t≠sep # Remove the rest
r←ir⌾((t M vi≍≠⊑k)⊸/)(vi⌊t)⊏charRole∾0 # Role
t+↩5×t M⟨⊑bI,5⟩ # Case-insensitive special names
⟨t,r,k,is,ie⟩
}
# 𝕩 is a list of tokens that contains the numeric literals, each
# preceded by 0. Return the numbers.
ReadNums←{
_err_←{(0!˜/⟜𝔾≍○<𝔽)⍟(∨´)}
EChars←⟨"Letter"⊸Pl," """,⊏⟜charSet,""" not allowed in numbers"⟩_tmpl
e‿p‿i‿n‿d←=⟜𝕩¨((⊑bA)+-´"ea")∾+⟜↕´bN # Masks for eπ∞¯.
EChars∘(/⟜𝕩)_err_𝕨 ¬e∨𝕩<⊑bA
s←d∨c←e∨z←0=𝕩⋄m←¬n∨c
"Negative sign in the middle of a number"_err_𝕨 n>»c
"Portion of a number is empty"_err_𝕨 (1«s)∧n∨s
"Ill-formed decimal or exponent use"_err_(s/𝕎) ¬(0⊸=∨»⊸<)s/𝕩
"π and ∞ must occur alone"_err_𝕨 (p∨i)>1(»∧(p∧«e)∨«)z∨n
f←(17≥¬(⊢-T)+`)⊸∧(«≤(d<𝕩≠⊑bD)>○I1T¬)⊸∧m # No leading 0s; max 17 digits
l←(¯1∾π‿1∾˜↕10)⊏˜(¬d)/f×𝕩-1-˜⊑bD # Digit lookup, with ∞ as 1 to avoid ∞×0
v←(>⟜«0≤l)/(0⊸≤××⟜10⊸+)`»«l # Numeric values—mantissas and exponents
v×↩1‿¯1⊏˜(r←>⟜»m)/»n # Negate if ¯
vm←c/z # Mask of mantissas in l
dp←vm/(»⊸<×-)⟜(⊏⟜(I1T d∾0))1+/>⟜«f # Decimal position
t←10⋆|ee←dp-˜vm/«v׬vm # Power of 10
t÷˜⌾((0>ee)⊸/)t×⌾((0<ee)⊸/)vm/v×(r/i)⊏1‿∞ # Correct ∞ then ×10⋆ee
}
Parse ← {r‿vn‿i‿e←𝕨⋄nv←≠vn
ErrMismatchedBrackets←{
Lcs ← ∧○(0<≠)◶⟨0, ¯1 ⊑ 0¨∘⊢ {𝕩⌈⌈`𝕨+»𝕩}˝ =⌜⟜⌽⟩
_mis_ ← {"Missing "∾𝕗∾" "∾charSet⊏˜𝕘+⊢/˜≠∘⊣=⊒}
Msg ← >○≠◶⟨"opening"_mis_ 0, "closing"_mis_ 1˜⟩
0 !˜ 𝕨 (Lcs<⌊○≠)◶⟨Msg,"Malformed bracket nesting"⟩ 𝕩
}
_err_←{(0!˜(∧∘⍉(i≍e)⊏⎉1˜/⟜𝔾)≍○<𝔽)⍟(∨´⍟=)}
# Bracket and ligature validation and handling
# Open brackets have role ¯1 and closed ones have role 0
"Empty program" ! 0<≠𝕩
g←⍋pd←+`p←(¯1-2×r)×𝕩 M bB⋄gb←g⊏r=¯1 # Paren (actually any bracket type) depth and grade
(g⊏p)(>⟜0⊸/ErrMismatchedBrackets⍟≢1-˜<⟜0⊸/)gx←g⊏𝕩
"Swapped open and closed brackets" ! 0≤(⊑g)⊑pd
"Parentheses can't contain separators"_err_(gb/G) ((⊑bB)⊸=»⊸∧sep⊸=)gb/gx
{"Empty statement or expression"_err_(𝕩/0∾G) (4+⊑bB)≠𝕩/0∾gx}1⊸«⊸∧1∾gb
r-↩(𝕩=⊑bG)>ec←«0≤r+p # Role ¯4 for exports: ⊑bG is ⇐
"Invalid assignment or stranding use"_err_(↕∘≠) ((¯4⊸<∧≤⟜¯2)r)>ec∧»0≤r
sr←»⌾(g⊸⊏)sl←«⊸∨r=¯2⋄ns←¬sl∨sr # Strand right and left; not stranded
cp←𝕩=1+⊑bB # Closed paren
nr←(IT¬cp)⊏𝕩=⊑bO # Nothingness role: 1 for · (handle 𝕨 later?)
"Can't strand Nothing (·)"_err_(↕∘≠) ns<nr
"Can't return Nothing (·)"_err_(↕∘≠) nr∧1«𝕩=3+⊑bB
g⊏˜↩⍋g⊏sl # Avoid reordering strands in rev
# Permutation to reverse each expression: *more* complicated than it looks
rev←⍋+`¯1↓(¯1∾g)(⊣⍋⊸⊏⊏˜⟜⍋¬⊏˜)⍋+`⊸+1∾g⊏sl∨r=¯1
gf←⍋fd←+`br←rev⊏p×𝕩M⟨2+⊑bB,2⟩ # Order by brace depth fd to de-nest blocks
rev⊏˜↩gf⋄fd⊏˜↩gf⋄br⊏˜↩gf
𝕩⊏˜↩rev⋄p⊏˜↩rev⋄i⊏˜↩rev⋄e⊏˜↩rev
fi←+`b←br>0⋄c←/br<0 # Block Begin (mask) and Close (index), in matching order
# Block properties
H←¬1(∾/∾˜)b(∨/⊣)= # Which blocks have a property
sn←(0‿3‿4‿5+5+⊑bI)⍋𝕩⋄sp←/𝕩 M bI # Special name index
fx←1 H sn⋄fr←(fx∨0⊸<)⊸+ft←(2⊸H⌈2×3⊸H)sn # Block immediacy ¬fx and type ft
"Special name outside of any block"_err_(/{(0=fi)∧𝕩 M bI}∘𝕩) 0<⊑fr
nf←(⊑bG)H𝕩 # Block has exports (is namespace)
"Can't export from a non-immediate block"_err_(/{(fi⊏0≠fr)∧𝕩=⊑bG}∘𝕩) ∨´nf>fr=0
fsc←(ft⊏0‿2‿3)+3×fx # Special name count
# Propagate roles through parentheses
# ir is the role of the expression ending at each position (truncated to the right)
r↩sl-˜ns×(1↓fr)⌾((c⊏rev)⊸⊏)r # Add block roles; make strand elements ¯1
pt←cp∧ns # Pass-through parentheses: not in strands
pp←pt∧»es←1⊸»⌾(g⊸⊏)r<0 # Parens enclosing one object (maybe with assignment) don't change roles
ir←((»⌾(g⊸⊏)(1+es)×3=⊢)⌈⊢-es<2≤⊢)r+pp×(IT¬pp)⊏r # Propagate modifier roles
ir⌈↩(IT¬pt∧ir=0)((⊏-⊢)⟜(+`¬pp)(⊢⌊1⌈+)⊏)ir # ...and function roles
r+↩pt×»ir # Roles at pt were 0; set them now
ir(×⟜¬-⊢)↩nr # Include nothingness (doesn't handle 𝕎)
# Prep for lexical resolution before reordering 𝕩
idx←𝕩⊏˜id←/idm←(0⊸≤∧<⟜nv)xv←𝕩-vi
# Parsing part 1
p-↩br⋄g↩⍋+`p⋄gr←g⊏rev # Order by non-brace bracket depth
sll←1+2÷˜0(<-○/>)gr⊏sr-sl⋄l←/g⊏𝕩=5+⊑bB # Strand length; list starts
bp←⟨/b,c⟩∾¨0(<≍○(</⟜g)>)g⊏p # Bracket pairs
{i↩(𝕨⊏i)⌾(𝕩⊸⊏)i⋄e↩(𝕩⊏e)⌾(𝕨⊸⊏)e}´bp # Highlight all contents of a pair when error reporting
g⊏˜↩gs←⍋gr⊏sl⋄gr↩g⊏rev⋄gi←⍋g # Send strand prefixes *‿ to the end
𝕩⊏˜↩g⋄r⊏˜↩gr⋄ir⊏˜↩gr⋄l↩(l⊏⍋gs)∾/gr⊏sr>sl
ni←1+(1↓nf)/bk←c⊏gi # Indices of module assignment (import) arrows
"Modules must be immediately assigned"_err_(ni⊏G)∘(∨˝¯3‿0‿¯1≠r∾⟜¯1‿1⊸(⊏˜)(↕3)+⌜⊢)⍟(0<≠)ni
"Modules must be destructured"_err_(ni⊏G) (⊏⟜idm>⊏⟜rev⊏sr˙)(ni+1)⊏g
a←r≤¯3⋄ps←a<r<0⋄tr←1≤er←ir⊏˜IT»ps # er: expression role; tr: train or modifier expression
no←ir<0⋄ne←er<0 # Nothing value; expression
"Nothing (·) cannot be assigned"_err_(G) a∧ne
oa←⌽/op←(er<2)∧r≥2⋄ro←op∨«op∧m2←r=3 # op: active modifiers; ro: mod or right operand
"Missing operand"_err_(G) op>(«∧m2≤»)no<m2<ro∨r∊↕2
"Double subjects (missing ‿?)"_err_(G) ∧⟜«ro»⊸∨⊸<r=0
ma←tr<(𝕩=2+⊑bG)∧«ir≥1 # Modified assignment
os←↕∘≠⊸(⊣-T)⌾⌽¬ro∨ma # Operator skip: distance rightward to derived function start
at←1+⊏⟜os⊸+ai←/a⋄af←¯4≠ai⊏r # Assignment target; af for actual (non-export) assignment
"Role of the two sides in assignment must match"_err_(at⊏G) (0⌈at⊏er)≠ar←at⊏r
aid←(¯10⊸≤∧<⟜nv)𝕩-vi # Assignable identifer
"Function or modifier assignment to a non-name"_err_(at⊏G) ¬(ar=0)∨at⊏aid
ak←1+af+(ai∊ni)+2×(⊑bG)-˜ai⊏𝕩 # Class of assignment: 1⇐ 2⇐? 3⇐{⇐} 4←? 5←{⇐} 6↩? 7↩{⇐}
aa←×g⊏ac←»+`gi⊏«⊸-ak⌾(at⊸⊏)0¨𝕩 # ac broadcasts it to the entire target
"Assignment target must be a name or list of targets"_err_(G) ((𝕩=3+⊑bB)<(aid∧r≤0)∨ps∨𝕩 M bB)<aa∧0≥er
"Can't use result of function/modifier assignment without parentheses"_err_(G) (0<er)∧(0≤r)∧»⊸>aa
# Lexical resolution (independent of parsing part 2 below)
ip←id⊏ac⋄d←ip M 2‿4 # Assignment classes; which are definitions
zic←0¨ic←id/˜icf←2(<∧|)ip⋄II←∾⟜(icf⊸/)∘id # Imports have class 3, 5, or 7
il←ic⊏+`br<0⋄ex←il⊏(/∾≠)b # Block for each import and its end index
# Order every referenced identifier, an extraction for each import, and an undeclaration for each declaration
ixf←(((1=ip)+⊏⟜(¯1∾b/gf))∾(il∾d⊸/)⊏((≠𝕩)∾c⊏gf)˜)id⊏fi # First order by block index, open for real and closed for virtual
d∾↩zic⋄idx∾↩icf/idx # Add in imports
ig←(⍋⊏⟜(ixx←∾⟜(d⊸/)idx))⊸⊏⍋ixf # Then order by name
{"Redefinition"_err_(𝕩⊏·∾⟜(d⊸/)II) ¬ixx∨○(»⊸≠𝕩⊸⊏)ixf} (≠d)⊸≤⊸/ig
ig↩<⟜(≠d)⊸/(⍋+`∘⊏⟜(d∾-/˜d))⊸⊏ig # Last order by declaration depth
d⊏˜↩ig⋄idx⊏˜↩ig⋄II↩ig⊏II
("Undefined identifier"Pl/)_err_(II) d<»⊸≠idx
{"Imports must have been exported"_err_(f/II) 2≤-⟜»(f←d∨×𝕩)/𝕩} ig⊏(ip M 1‿3)∾2¨ic
ui←(IT d)⊸⊏⌾(ig⊸⊏)id∾ex # Index of definition (⊐ for identifiers)
lc←(1«b)/dn←b(⊢-T)+`idm∧ac M 2‿4 # Locals Count
idd←(id-○(⊏⟜fd)id≠⊸↑ui)∾zic∾0¨sp # Identifier frame depth
idi←(ui⊏(fi⊏fsc)+dn-1)∾((sp⊏fi)⊏3×fx)++⟜(0⊸≤)2+sp⊏xv # Slot within frame
# Generate code to load identifiers and list each module's extracted exports
ib←⟨21+0<ip∾zic∾sp⊏ac,idd,idi⟩ # Identifier bytecode: instruction, depth, slot
ed←/¯1⊸»⊸<ex # Select first export in each module
idor←∾¯1⌽1‿3/⟨3/id∾ex∾sp,ed⊏ex⟩ # Identifier bytecode ordering
idbc←¯1⌽⟨⥊⍉>ib,3¨ed,ex≠⊸(«-⊢)ed,14¨ed⟩ # and bytecode
# Parsing part 2
ta←tr∧2(>∨|)ps(⊢-T)+`¬ro # Train argument (first-level)
fa←/(fe←ta∨ro∨«⊸∨ps<aa)<ff←(r=1)∨»op # Active functions: cases fe are excluded
"Second-level parts of a train must be functions"_err_(G) tr>fe∨ff
dy←fa⊏«no<(tr∧r≥0)∨ro<r=0 # Dyadic
ob←pr⊏/¯1(⊢-»)u←⍷∧pr←𝕩⊏˜pi←/𝕩<sep # Objects to be loaded
cn←pi∾lt←/𝕩≥ci←vi+nv⋄ob∾↩(ci-˜≠u)+lt⊏𝕩 # Constants
s←𝕩=sep⋄fo←𝕩=2+⊑bB⋄ls←s∧fo<○IT lo←𝕩=4+⊑bB # List Separators: after ⟨lo, not {fo
"Can't use Nothing (·) in lists"_err_(G) (»lo∨ls)∧ne
ll←sll∾˜(¬lo/1«ps)+-⟜»1↓(lo∾1)/+`ls∾0 # List Length
dr←/ls<s⋄rt←/fo # Drop (block separator) and return
dr∾↩((1+dy)×fn←fa⊏ne)/fa⋄fa/˜↩¬fn⋄dy/˜↩¬fn# Turn function applications on · to drops
# Bytecode generation: numbers bc ordered by source location (after rev) oi
or←⍋oi←idor∾g⊏˜∾⟨cn,cn,bk,bk,dr,2/l,af/at,oa+1⌈oa⊏os,(dy×⊏⟜os)⊸+fa+dy,rt⟩
bc←or⊏∾idbc∾⟨0¨cn,ob,15¨bk,1+↕≠bk,14¨dr,⥊⍉(3+l⊏aa)≍ll
11⌈(11-1+⊑bG)+(af/ai)⊏ma+𝕩,5+oa⊏r,(⊢+9×10⊸=)16+dy-7×fa⊏er,25¨rt⟩
# Instruction source positions
MI←-⊸↓˜⟜≠∾g⊏˜⊢⋄oj←(rt-∘≠⊸⌽oi⌊1-˜≠i)Mi∾⟨1-˜af/at,oa,fa⟩
ind←⟨oj,oj Mi fa⊏IT»¬ro∨ma⟩(¯1∾˜or⊏(≠rt)⊸⌽)⊸⊏¨i‿e
# Function metadata and overall output
⟨bc∾25,u,⟨ft,¬fx,/1∾or≥bc-○≠rt,lc+fsc⟩,ind⟩
}
Compile←{
⟨tok,role,val,t0,t1⟩←tx←Tokenize 𝕩
⟨bc,prim,blk,oi⟩←⟨role,⊑val,t0,t1⟩ Parse tok
⟨bc, ∾⟨prim⊏𝕨⟩∾1↓val, <˘⍉>blk, oi, tx⟩
}
|