1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# Syntax highlighting utilities for e.bqn; copied from ../md.bqn
# It should be getting this information from the tokenizer instead
Modify ← { ⟨include,add,pos⟩←𝕨
((/include)∾(≠¨add)/pos) ⍋⊸⊏ (include/𝕩)∾∾add
}
# Return BQN highlights for an string 𝕩, as an ⟨add,pos⟩ list for Modify
# (include will be all 1s).
lf ← @+10
FindGroup ← {
i ← (∾𝕨) ⊐ 𝕩 # Index in all cells of 𝕨
e ← +`≠¨𝕨 # Index past the end of each group of 𝕨
e ⍋ i # How many end-indices does each element pass?
}
Trace ← {
# 𝕨 is a list with one index for each possible start, giving a later
# start that is known to be enabled if that one is.
# 𝕩 is a mask of all starts known to be enabled.
# A "stop" position that follows all expressions tells when to stop.
# At each step the distance from a start to its successor in 𝕨 is
# doubled, so the maximum number of steps is about 2⋆⁼≠𝕩.
En ← {
𝕩 ↩ 1¨⌾((𝕩/𝕨)⊸⊏)𝕩 # Starts following from an enabled one are enabled
𝕨 ↩ ⊏˜ 𝕨 # Double the number of steps in 𝕨
𝕨 En 𝕩 # Repeat
}⍟{0=¯1⊑𝕩} # until the stop is enabled
g ← ⍋𝕨 # Order expressions by starting index
start ← g⊏𝕨
end ← g⊏𝕩
next ← start ⍋ end # An expression's successor starts after it ends
next ∾↩ ≠next # The stop node is its own successor
enabled ← ¯1 ↓ next En (≠next)↑1 # Search and remove the stop
enabled / start≍˘end # List of enabled starts and ends
}
idChars ← ⟨
('0'+↕10)∾"¯.π∞"
"𝕣"∾˜'a'+↕26
'A'+↕26
"_"
⟩
GetHighlights ← {
# Characters used by BQN, and the HTML class they are associated with.
classes‿chars ← <˘ ⍉ ∘‿2⥊⟨
0 , " "∾@+9‿10 # Should never be highlighted
"Value" , ¯1⊏˘5‿2⥊"𝕨𝕩𝕗𝕘𝕤"# Hack around UTF-16
"Function" , "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕«»⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!"∾¯1⊏˘5‿2⥊"𝕎𝕏𝔽𝔾𝕊"
"Modifier" , "˙˜˘¨⌜⁼´˝`"
"Modifier2" , "∘○⊸⟜⌾⊘◶⎉⚇⍟⎊"
"Number" , ∾idChars # Will be classified among ↑↑ later
"Gets" , "←⇐↩→"
"Paren" , "()"
"Bracket" , "⟨⟩"
"Brace" , "{}"
"Ligature" , "‿"
"Nothing" , "·"
"Separator" , "⋄,"
"String" , "'""@"
"Comment" , "#"
⟩
# Turn non-whitespace classes into ⟨open,close⟩ html tags.
classTag ← ""‿"" ∾ > {⟨"<span class='"∾𝕩∾"'>","</span>"⟩}¨ 1↓classes
# Find each character's group, sending unknowns to 1.
col ← (≠chars) (⊢--⟜1×=) chars FindGroup 𝕩
# Locate comments and strings.
c ← 𝕩='#'
le← /(𝕩=lf)∾1
# Line endings (le) end every comment (/c) on the line, so take a copy
# for each # before that line but not the previous.
ce← le /˜ -⟜» c/⊸⍋le
# A single quote can only be used if there's another two places down.
s ← /0‿0⊸«⊸∧𝕩='''
d ← /𝕩='"'
css ← ⟨ s ⋄ ¯1↓d ⋄ /c ⟩ # Comment or string start
cse ← ⟨ 2+s ⋄ 1↓d ⋄ ce ⟩ # Corresponding end indices
# Now b is a table of (start,end) pairs
b ← css Trace○∾ cse
# Given a list of pairs, get a mask indicating included regions
ToMask ← (≠`∨⊢) (≠𝕩)↑/⁼∘∾
# Split rows and group into text‿comments
tc ← ((⊏˘b)⊏c) 2{𝕗↑⊔○(∾⟜𝕗)} <˘b
# Color with "String" and "Comment"
col ⌈↩ +´ (2‿1-˜≠classes) × ToMask¨ tc
# UTF-16 hack: first half of a special name needs to match the second
col↩ («col) ⊣⌾((𝕩=⊑"𝕩")⊸/) col
# Color numeric literals and identifiers
id ← col=5 # ←→ 𝕩∊idChars
w ← »⊸< id # Word (identifier or number) beginning mask
wt ← idChars FindGroup w/𝕩 # Type based on first character
wt+↩ '_' = («⊸<id)/𝕩 # Modifier1 to Modifier2 based on word end
wt+↩ 5×0=wt # Shift 0 to Number
wi ← 1-˜+`id/w # Index of word containing each of /id
col↩(wi⊏wt)⌾(id⊸/) col
# Tags are placed at boundaries between different colors
boundary ← ¯1⊸»⊸≠ col
bcol ← boundary / col
# Windows gives us rows of start,end where the end position of one
# color is the start of the next
# Subtract one to place before the starting character
pos ← 2 ↕ 1-˜/boundary∾1
# Remove class 0 regions, as these don't use tags
(⥊ (0<bcol)⊸/)¨ ⟨bcol⊏classTag, pos⟩
}
|