aboutsummaryrefslogtreecommitdiff
path: root/md.bqn
blob: 88b94db7726e46ec2591c732bfeab861538d3c39 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
# The Markdown function is a markdown to html converter for a "good
# enough" subset of Github-flavored markdown, as specified at
# https://github.github.com/gfm/ .
#
# Additionally, it highlights code sections as BQN, and executes
# sections that are doubly indented (eight spaces), placing their
# results below them.

# Not supported:
# - Thematic breaks like *** or ---
# - Setext headings (underlined with ==== or ----)
# - Fenced code blocks (marked off with ``` or ~~~)
# - HTML blocks
# - Link reference definitions (who uses these?)
# - Block quotes (start with >)
# - Task lists

# Here, a markdown file is represented as a list of its lines, which are
# strings (they don't include any line ending character).
# The html file is constructed directly as a string, using Html.

################################
# Utilities

# Shift cells 𝕨 into array 𝕩, maintaining its total length
Shl ←   β‰ βˆ˜βŠ’ ↑ ∾   # From the left
Shr ← -βˆ˜β‰ βˆ˜βŠ’ ↑ ∾˜  # From the right

# Index of first zero, or number of leading 1s in a boolean list
Lead ← βŠ‘ ⊐⟜0

# 𝕨 is a list of lists. Find the first of these lists each cell of 𝕩
# belongs to.
FindGroup ← {
  i ← (βˆΎπ•¨) ⊐ 𝕩  # Index in all cells of 𝕨
  e ← +`≠¨𝕨     # Index past the end of each group of 𝕨
  e ⍋ i         # How many end-indices does each element pass?
}

# Count the number of consecutive true values up to the current element.
# To do this, subtract the index of the last false character from the
# current index.
CountRuns ← { (1+↕≠𝕩) (⊣ - ⌈`βˆ˜Γ—) ¬𝕩 }

# 𝕩 is a string; return a mask of the characters that are escaped, that
# is, preceded by an odd number of backslashes (since a backslash can
# escape another backslash).
IsEscaped ← {
  0 Shl 2 | CountRuns 𝕩 = '\'
}

# Remove leading (∧`) and trailing (∧`⌾⌽) spaces
Trim ← { 𝕩 /˜ Β¬ (∧` ∨ ∧`⌾⌽) ' '=𝕩 }

# Find whether 𝕨 was true at the last index where 𝕩 was false, in each
# position.
PrecedesGroup ← {
  # We prepend a 0 to 𝕨, so that 0 is the "before start" index, with a
  # false value, and normal indices are increased by 1.
  𝕨 βˆΎΛœβ†© 0
  inds ← 1 + ↕≠𝕩
  # Zero out indices where x was true, and find the greatest index so
  # far at each position.
  last ← ⌈` inds Γ— ¬𝕩
  last ⊏ 𝕨
}

# 𝕨 is a list of possible expression start indices in any order and 𝕩 is
# the corresponding endpoints. The expressions are mutually exclusive
# and do not nest, and are enabled in index order. Return a shape Β·β€Ώ2
# array where the rows give the start and end of each enabled expression
# in index order.
Trace ← {
  # 𝕨 is a list with one index for each possible start, giving a later
  # start that is known to be enabled if that one is.
  # 𝕩 is a mask of all starts known to be enabled.
  # A "stop" position that follows all expressions tells when to stop.
  # At each step the distance from a start to its successor in 𝕨 is
  # doubled, so the maximum number of steps is about 2⋆⁼≠𝕩.
  En ← {
    𝕩 ↩ 1¨⌾((𝕩/𝕨)⊸⊏)𝕩  # Starts following from an enabled one are enabled
    𝕨 ↩ ⊏˜ 𝕨           # Double the number of steps in 𝕨
    𝕨 En 𝕩             # Repeat
  }⍟{0=Β―1βŠ‘π•©}           #        until the stop is enabled

  g ← ⍋𝕨               # Order expressions by starting index
  start ← gβŠπ•¨
  end   ← gβŠπ•©
  next ← start ⍋ end   # An expression's successor starts after it ends
  next βˆΎβ†© β‰ next        # The stop node is its own successor
  enabled ← Β―1 ↓ next En (β‰ next)↑1  # Search and remove the stop
  enabled / startβ‰Λ˜end # List of enabled starts and ends
}

# Join lines with newline characters. Include a trailing newline.
JoinLines ← ∾ ∾⟜lfΒ¨

# Create an html node from a tag name and interior text.
Html ← {open π•Š contents:
  close ← (βŠ‘open⊐" ") ↑ open
  ∾ ⟨"<",open,">" , contents , "</",close,">"⟩
}

# Insert and remove things from the list 𝕩:
# - include is the mask of elements to keep in 𝕩
# - add is a list of lists to be inserted
# - pos is the list of positions where they should start
# Elements are added just after the given position in 𝕩, in the order
# they appear in ∾add.
Modify ← { ⟨include,add,posβŸ©π•Šπ•©:
  ((/include)∾(β‰ Β¨add)/pos) β‹βŠΈβŠ (include/𝕩)∾∾add
}


################################
Markdown ← {filenameπ•Šπ•©:
  extensions ← filename β‰’ 0
  path ← extensionsβ—Ά""β€Ώ(⊒/˜·∨`⌾⌽'/'⊸=) filename

  ######
  # First we classify each line based on the type of block it can start.
  ClassifyLine ← (0<β‰ )β—Ά(0β€Ώ0)β€Ώ{
    ind ← βŠ‘ lineChars FindGroup βŠπ•©
    getLen ← ind βŠ‘ lineClas∾⟨0˜⟩
    l ← GetLen 𝕩
    ⟨ind ∧ l>0 β‹„ l⟩
  }

  # Character entity escaping
  # In order to use this with other modifications such as highlighting,
  # CharEntities returns a mask of characters to be escaped, and their
  # corresponding escapes.
  CharEntities ← {1Β¨βŠΈπ•Šπ•©;  # 𝕨 gives characters to potentially escape
    # The string gives escapes and their names, separated by spaces.
    # First split it on the first character.
    ce ← (1-ΛœΒ¬Γ—+`)∘=βŸœβŠ‘βŠΈβŠ” " ""quot &amp <lt >gt"
    # Characters to escape are given first
    chars ← βŠ‘Β¨ce
    # HTML character entities start with & and end with ;
    entities ← ("&"∾∾⟜";")Β¨ 1↓¨ce

    # Replace a character if 𝕨 is not set and it's on our list.
    ind ← chars ⊐ 𝕩
    useEntity ← 𝕨 ∧ ind < β‰ chars
    ⟨¬ useEntity , entities ⊏˜ useEntity/ind , /useEntity⟩
  }

  # Non-empty lines in code blocks have 4 leading spaces
  ProcCode ← {
    # Strip the leading spaces
    𝕩 ↩ 4 ↓¨ 𝕩
    code ← JoinLines 𝕩

    # Highlight and unescape html-unsafe characters
    cβ€Ώci ← extensionsβ—Ά(2β₯Š<⟨⟩)β€ΏGetHighlights code
    emβ€Ώeβ€Ώei ← CharEntities code

    # If every line is indented by at least 4 additional spaces, we will
    # execute each one and insert the results.
    addRslt ← ∧´ ' ' = ∾ 4 (βŒŠβŸœβ‰  ↑ ⊒)Β¨ 𝕩
    # Don't show assignment results by default
    ShowRslt ← {
      depth ← +` "(){}⟨⟩" (⊣(β‰ βŠΈ>Γ—Β―1⋆2|⊒)⊐) 𝕩
      𝕩 /Λœβ†© Β¬ ∨`⌾⌽ (0=depth) ∧ π•©βˆŠ"β‹„,"  # Just the last statement
      g ← π•©βˆŠ"←↩"
      (βŠ‘g⊐1) (<⟜(β‰ g))β—ΆβŸ¨1,Β¬(" "∾∾idChars)βˆ§Β΄βˆ˜βˆŠΛœβ†‘βŸ© 𝕩
    }
    rβ€Ώri ← addRsltβ—Ά(2β₯Š<⟨⟩)β€Ώ{
      ⟨ (ShowRslt βŠ£β—ΆβŸ¨"",(β₯ŠβˆΎβŸœlfβŽ‰1)∘Fmt∘⊒⟩ CodeExec)⍟(0<β‰ )Β¨ 𝕩
        1 -˜ +` 1 + β‰ Β¨ 𝕩   ⟩ # Don't forget the trailing newline
    } 𝕩

    mod ← ⟨em,e∾c∾r,ei∾ci∾ri⟩ Modify code
    "pre" Html "code" Html⍟(¬extensions) mod
  }
  CodeExec ← X  # dzaima+reference exec. Should be {⍎}

  # Headings start with #, and require 1-6 #s followed by a space.
  # Any trailing #s are ignored.
  LenHeading ← {
    n ← Lead 𝕩='#'
    l ← (0<n) ∧ (6β‰₯n)
    s ← n (<βŸœβ‰ )β—ΆβŸ¨1,' '=βŠ‘βŸ© 𝕩 # Character after hashes must be a space, if any
    n Γ— l ∧ s
  }
  ProcHeading ← {
    tag ← "h" ∾ π•¨βŠβ€’d        # h3 for 3 hashes, etc.
    𝕩 β†“Λœβ†© 𝕨+1
    trsp ← ∧`⌾⌽ 𝕩=' '
    tail ← ∧`⌾⌽ trspβˆ¨π•©='#'  # Mask of trailing hashes
    f ← tail < 0 Shr tail   # Character before trailing hashes
    𝕩 /Λœβ†© Β¬ f (βŠ‘βŸ¨"\"," ",""⟩⊐<f/𝕩)β—ΆβŸ¨βŠ£,⊒,⊒,0¨⊒⟩ tail
    # Add an id, containing only a-z, digits, and hyphens
    Slugify ← {
      𝕩 ↩ '-'¨⌾((𝕩=' ')⊸/) 𝕩  # Replace spaces with dashes
      bounds ← β₯Š +⌜⟜0β€Ώ26βŒΎβ€’UCS "Aa"  # Of the upper and lowercase alphabet
      # Lowercase alphabetic characters and remove special characters
      b ← bounds ⍋ 𝕩
      ((2|b)∨∊⟜("-"βˆΎβ€’d))⊸/ +⟜(32Γ—1=b)βŒΎβ€’UCS 𝕩
    }
    extensions { tag βˆΎβ†© " id="∾""""(∾∾⊣) Slugify 𝕩 }⍟⊣ 𝕩
    tag Html ProcInline Trim 𝕩
  }βŸœβŠ‘

  # List items start with a bullet (unordered) or number (ordered).
  LenBullet ← +βŸœΓ— Β·β‰€βŸœ4βŠΈΓ— Β·Lead ' '=1βŠΈβ†“
  ProcBullet ← {
    "ul" Html lf ∾ JoinLines ("li" Html ProcInline)Β¨ 𝕨 ↓¨ 𝕩
  }
  LenListNum ← { # Not used yet
    n ← Lead π•©βˆŠβ€’d
    l ← (1≀n) ∧ (9β‰₯n)
    ' ' = n ↓ 𝕩
    t ← n↓(n+2)↑𝕩
    l ∧ (" " ≑ 1↓t) ∧ βŠ‘(")." ∊˜ 1↑t)
  }

  # Table detection handled specially because the spec is... special
  CutTableRow ← {
    b ← '|' = 𝕩            # Mask of bars
    o ← (Β¬b) β‰ β—‹Lead ' '=𝕩  # Leading | omitted
    r ← b > 0 Shl '\' = 𝕩  # Non-escaped bars
    1 -˜ (Β¬r∨1⌽b>r) Γ— o + +` r
  }
  ProcTable ← {
    rows ← (TrimΒ¨ CutTableRowβŠΈβŠ”)Β¨ 𝕩
    inc ← Β¬ rule ← ∧´∘∾¨'-'=rows
    rows ↩ ProcInline¨¨⌾(inc⊸/) rows
    rows ↩ (⊏rows) (⊒ ∾ ⟨""⟩ /˜ 0⌈-β—‹β‰ )Β¨ rows
    rowType ← inc / +` rule  # Head or body
    DoRow ← { lf ∾ JoinLines π•¨βŠΈHtmlΒ¨ 𝕩 }
    rows ↩ (rowType ⊏ "th"β€Ώ"td") DoRowΒ¨ inc/rows
    rowGroups ← Β―1 ↓ rowType βŠ”β—‹(∾⟜2) "tr"⊸HtmlΒ¨ rows
    sections ← "thead"β€Ώ"tbody" Html⟜(lf ∾ JoinLines)Β¨ rowGroups
    "table" Html lf ∾ JoinLines (0 < β‰ Β¨rowGroups) / sections
  }

  # Paragraphs
  ProcParagraph ← {
    "p" Html ProcInline Β―1 ↓ JoinLines Trim⌾(Β―1βŠΈβŠ‘) (Lead ' '⊸=)βŠΈβ†“Β¨ 𝕩
  }

  # HTML blocks
  # Lazy rule: if it starts with < and contains >, it's probably HTML
  IsHtmlBlock ← βŠ‘ ">"⊸∊
  ProcHtmlBlock ← {
    codeMask ← "<code>" Β―6⊸⌽⊸(>β—‹(⌈`(1+β†•βˆ˜β‰ )βŠΈΓ—))β—‹(β·βŸœπ•© ∾ 0β₯ŠΛœ1-Λœβ‰ ) "</code>"
    (1Β¨ <⊸∾ codeMask⊸GetMultiHighlights)⊸Modify 𝕩
  }⍟extensions⟜JoinLines

  lineCharsβ€ΏlineClasβ€ΏprocFns ← <Λ˜β‰>⟨
    ""    β€Ώ (!∘0)       β€Ώ ProcParagraph
    "#"   β€Ώ LenHeading  β€Ώ ProcHeading
    ""    β€Ώ 0           β€Ώ ProcCode
    ""    β€Ώ 0           β€Ώ ProcTable
    "-+*" β€Ώ LenBullet   β€Ώ ProcBullet
  # β€’d    β€Ώ LenListNum  β€Ώ ProcListNum
    "<"   β€Ώ IsHtmlBlock β€Ώ ProcHtmlBlock
  ⟩

  ######
  # Inline elements
  ProcInline ← {
    I2M ← (≠𝕩) ↑ /⁼  # Index to mask
    punc ← 𝕩 ∊ "!""#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
    actual ← Β¬ punc ∧ IsEscaped 𝕩  # backtick or *actual* backtick?

    # Code spans
    tick ← 𝕩 = '`'
    tend ← / (⊒ > 0⊸Shr) tick
    tcount ← CountRuns tick
    # 𝕨 are tick lengths and 𝕩 are positions, both sorted by length
    MatchTicks ← {
      # Tick runs other than the last of each length
      notLast ← (⊒=0⊸Shr) 𝕨
      # Ticks preceded by backslashes can't start code blocks, but can
      # end them. This approach is wrong for multiple ticks with a
      # leading backslash in front, which are excluded but should just
      # be treated as one shorter when leading.
      filter ← notLast / (𝕩¬𝕨) ⊏ actual
      # For leading ticks, filter by not-last; for trailing ones, rotate
      # by Β―1 to filter by not-first.
      (filter / ⌽⟜notLast / π•©Λœ)Β¨ 0β€ΏΒ―1
    }
    tlen ← tend ⊏ tcount
    c ← TraceΒ΄ tlen MatchTicksβ—‹((⍋tlen)⊸⊏) tend
    cl ← (⊏˘c) ⊏ tcount
    ctInds ← β₯ŠΛ˜ 1 + c -⌜˘ clΓ—βŒœ1β€Ώ0
    codeMask ← β‰ ` I2M β₯Š codeBounds ← 1β€Ώ2⊸⊏˘ ctInds
    𝕩 ↩ ' '¨⌾((codeMaskβˆ§π•©=lf)⊸/) 𝕩
    # If span has both a leading and a trailing space, they are removed.
    remSpace ← I2M β₯Š ((1<-˜˝˘)∧·∧˝˘' '=βŠβŸœπ•©)⊸/ -⟜0β€Ώ1˘ codeBounds
    codeMask βˆ§β†© Β¬ remSpace
    ⟨code,codePos⟩ ← codeMask extensionsβ—Ά(2β₯Š<⟨⟩)β€ΏGetMultiHighlights 𝕩
    include ← Β¬ remSpace ∨ β‰ ` I2M β₯Š ctInds
    codeBounds ↩ β₯Š -⟜1β€Ώ0˘ codeBounds
    unused ← actual ∧ include ∧ Β¬ codeMask

    # Links
    ghPath ← "https://github.com/mlochbaum/BQN/blob/master/"∾path
    ReplaceMDSub ← { Β―2 ("md"≑↑)β—Ά(ghPath⊸∾)β€Ώ("README"β€’_r_"index"βˆ˜β†“βˆΎ"html"˜)⍟(':'βˆ§Β΄βˆ˜β‰ βŠ’) 𝕩 }
    ReplaceMD ← { ReplaceMDSub⍟(0<β‰ )⌾((βŠ‘π•©βŠ"#")βŠΈβ†‘) 𝕩 }
    ProcLink ← { ∾⟨"<a href=""",(ReplaceMD 𝕩),""">",ProcInline 𝕨,"</a>"⟩ }
    # Find matched-depth [] and () pairs, then join adjacent ones
    brak ← (unused ∧ π•©βŠΈ=)Β¨ 2β€Ώ2β₯Š"[]()"
    depth ← (+`-0⌊⊒)∘(-Β΄)˘ brak
    FindPairs ← βŸ¨βŸ©β€Ώ2 β₯Š 1⊸⊏ /˜ 2⊸⊏ β‰  (¯∞⊸Shl⊸=(∧+⊒)∞⊸Shr⊸=)∘⊏
    pairs ← depth <∘(FindPairs⍟(0<β‰ ))βˆ˜β‰βˆ˜(∧(βŠΛœβ‰Λ˜βŠ’)⟜∾∾˘·/β‰ Β¨βˆ˜βŠ’)˘ /Β¨brak
    JoinPairs ← {
      e←1+1βŠΛ˜π•¨ β‹„ bβ†βŠΛ˜π•© β‹„ m←(β‰ b)>i←b⊐e
      (m/𝕨) ∾˘ (m/i)βŠπ•©
    }
    lInds ← βˆ§β—‹(0<β‰ )β—Ά(0β€Ώ4β₯Š0)β€ΏJoinPairsΒ΄ pairs
    linkPos ← ⊏˘ lInds
    lInds +βŽ‰1↩ 1β€Ώ0β€Ώ1β€Ώ0
    unused βˆ§β†© include βˆ§β†© Β¬ β‰ ` I2M β₯Š (Β―1β€Ώ1+0β€Ώ3⊸⊏)˘ lInds
    linkGroup ← 1 -˜ (1β€Ώ0β₯ŠΛœβ‰’)⊸(/ (βŠ£Γ—>)β—‹(+`I2M) ¬⊸/) β₯ŠlInds
    links ← <∘ProcLink´˘ (lIndsβ‰ βŠΈβˆΎ2) (⊣β₯ŠΓ—Β΄βŠΈβ†‘) linkGroup βŠ” 𝕩

    # Emphasis (still rudimentary)
    eMasks ← (unused ∧ π•©βŠΈ=)Β¨ "*_"
    eMasks ↩ 0⊸Shr⊸∧¨⊸(⊣∾˜0⊸Shl⊸∨⊸<Β¨) eMasks
    eInds ← (⊒-2|⊒)βˆ˜β‰ βŠΈβ†‘βˆ˜/Β¨ eMasks
    include βˆ§β†© Β¬ I2M ∧ ∾ eInds∾1+2↓eInds
    eInds βˆΎβ†© ⟨codeBounds⟩
    eTags ← ∾ eInds β‰ βŠΈβ₯ŠΒ¨ 2β€Ώ2β€Ώ1 / ("<"β€Ώ"</"∾¨·<∾⟜">")Β¨ "em"β€Ώ"strong"β€Ώ"code"
    eInds ↩ ∾ eInds

    # Remove backslashes used for escaping
    include βˆ§β†© codeMask ∨ 1 ⌽ actual

    emβ€Ώentβ€Ώei ← include CharEntities 𝕩
    include βˆ§β†© em

    add ← ∾⟨eTags,ent,code,links⟩         # Text to be added
    pos ← ∾⟨eInds,ei,codePos,linkPos⟩     # Where to add it
    ⟨include,add,pos⟩ Modify 𝕩
  }

  ######
  # Create the block structure using line classifications.

  # First remove the html link line: the output *is* the html file.
  𝕩 ↩ 2βŠΈβ†“βŸ("*View this file" (⊣ ≑ βŒŠβ—‹β‰ β†‘βŠ’) βŠ‘) 𝕩

  lengths ← β‰ Β¨ 𝕩                   # Length of each line
  blanks ← (Lead ' '⊸=)Β¨ 𝕩         # Number of leading blanks
  nonEmptyMask ← blanks < lengths  # Empty ←→ all leading blanks

  # Get line classifications: type of line, and data to be passed into
  # the line processor. Note that leading blanks aren't passed in.
  lineTypeβ€ΏlineDat ← <Λ˜β‰ > ClassifyLineΒ¨ blanks ↓¨ 𝕩
  # Empty lines have type Β―1.
  lineType ↩ Β―1¨⌾((Β¬nonEmptyMask)⊸/) lineType

  # Lines that could be included in code blocks (will be refined)
  codeMask ← nonEmptyMask ∧ blanks β‰₯ 4
  paragraphMask ← 0 = lineType
  # A header can't have 4 spaces of indentation. If it doesn't become
  # part of a code block, it will be included in a paragraph.
  lineType -↩ codeMask ∧ 1 = lineType

  # Tables are made up of rows that would otherwise be paragraph rows.
  # They are indicated by the delimiter row, consisting of only a few
  # allowed characters, preceded (!) by a header row with the same
  # number of cells.
  IsTD ← (∧´ ∊ ∾ ⊣ ∊˜ 2β†‘βŠ’)⟜"-|: "
  tableMask ← (0βŒΎβŠ‘ nonEmptyMask) ∧ paragraphMask ∧¬ codeMask
  tableDelimMask ← { 𝕩 IsTD¨∘⊣⌾(π•¨βŠΈ/) 𝕨 }βŸœπ•© tableMask
  delimValid ← (⊒ =β—‹(β‰ βˆ˜βŠ”βˆ˜CutTableRowΒ¨ βŠβŸœπ•©) -⟜1) / tableDelimMask
  headerMask ← 1 ⌽ delimValid⌾(tableDelimMask⊸/) 0¨𝕩
  tableMask ↩ headerMask (⊒ ∧ ⊣ ∨ ⊣ PrecedesGroup <) tableMask
  lineType ↩ 3¨⌾(tableMask⊸/) lineType

  # Code blocks consist of indented lines, possibly with blank lines
  # in between. They must be separated from paragraphs by blank lines.
  codeMask βˆ§β†© Β¬ paragraphMask PrecedesGroup codeMask
  codeMask βˆ¨β†© codeMask (⊒ ∧ PrecedesGroup ∧ PrecedesGroup⌾⌽) lineType < 0
  lineType ↩ 2¨⌾(codeMask⊸/) lineType

  # Lines continue blocks if they are part of the same multi-line
  # type as the previous line, and otherwise start new ones.
  # Headers (type 1) always start new blocks.
  blockStart ← nonEmptyMask ∧ (1 = lineType) ∨ Β―1⊸ShlβŠΈβ‰  lineType
  # Headers and paragraphs ignore leading blanks.
  drop ← blanks Γ— lineType < 2
  # Group blocks based on blockStart, with type Β―1 lines excluded.
  blocks ← (1 -˜ (lineType β‰₯ 0) Γ— +`blockStart) βŠ” drop ↓¨ 𝕩

  # To process a block, pick the appropriate function from procFns.
  ProcBlock ← {tβ€Ώl G b: f←tβŠ‘procFns β‹„ l F βŠ‘b }
  JoinLines (blockStart / lineTypeβ‰Λ˜lineDat) <∘ProcBlock˘ blocks
}


################################
# Testing
# Uses the test cases at https://spec.commonmark.org/0.29/spec.json
# since Github doesn't seem to have published theirs
TestSections ← {
  tests ← Β―2 β†“Λ˜ 8⊸(Γ·ΛœβŸœβ‰ βˆΎβŠ£)⊸β₯Š2↓‒LNS β€’path∾"spec.json"
  tests ↩ ((βŠ‘2+⊐⟜':')¨∘⊏ ((-','=Β―1βŠ‘βŠ’)↓↓)Β¨βŽ‰1 ⊒) tests
  testSection ← (1↓¯1β†“βŠ’)Β¨ 5⊏˘tests
  UnEscape ← {
    EscapeChar ← { ("\""tn"βŠπ•©) ⊏ "\"""βˆΎβ€’UCS 9β€Ώ10 }
    esc ← IsEscaped 𝕩
    (Β¬1⌽esc) / EscapeChar⌾(esc⊸/) 𝕩
  }
  RunTest ← {
    inβ€Ώexp ← UnEscape∘(1↓¯1β†“βŠ’)Β¨2↑𝕩
    out ← 0 Markdown (β€’UCS 10) ((⊒-ΛœΒ¬Γ—+`)∘=βŠ”βŠ’) in
    ⟨exp≑out,in,exp,out,2βŠ‘π•©βŸ©
  }

  ignore ← (2 ⊏˘ tests) ∊ ⟨"47","85"⟩
  res ← 1 β†“Λ˜ (¬⊏˘)⊸/ RunTest˘ tests /˜ ignore < testSection ∊ 𝕩
  res
}


################################
# Syntax highlighting
idChars ← ⟨
  β€’d∾"Β―.Ο€βˆž"
  ' '+βŒΎβ€’UCSβ€’a
  β€’a
  "_"
⟩
GetHighlights ← {
  classesβ€Ώchars ← <˘ ⍉ 2⊸(Γ·ΛœβŸœβ‰ βˆΎβŠ£)⊸β₯ŠβŸ¨
    0             , " "βˆΎβ€’UCS 9β€Ώ10
    "Value"       , Β―1⊏˘5β€Ώ2β₯Š"π•¨π•©π•—π•˜π•€"
    "Function"    , "+-Γ—Γ·β‹†βˆšβŒŠβŒˆ|¬∧∨<>β‰ =≀β‰₯β‰‘β‰’βŠ£βŠ’β₯ŠβˆΎβ‰β†‘β†“β†•βŒ½β‰/β‹β’βŠβŠ‘βŠβŠ’βˆŠβ·βŠ”!"∾¯1⊏˘5β€Ώ2β₯Š"π•Žπ•π”½π”Ύπ•Š"
    "Modifier"    , "˜˘¨⌜⁼´˝`"
    "Modifier2"   , "βˆ˜β—‹βŠΈβŸœβŒΎβŠ˜β—ΆβŽ‰βš‡βŸ"
    "Number"      , ∾idChars
    "Gets"        , "←↩→"
    "Paren"       , "()"
    "Bracket"     , "⟨⟩"
    "Brace"       , "{}"
    "Ligature"    , "β€Ώ"
    "Nothing"     , "Β·"
    "Separator"   , "β‹„,"
    "Comment"     , "#"
    "String"      , "'"""
  ⟩
  classTag ← ""β€Ώ""∾>{⟨"<span class='"βˆΎπ•©βˆΎ"'>","</span>"⟩}Β¨1↓classes

  r←𝕩='#'β‹„s←/(≠↑2βŠΈβ†“)βŠΈβˆ§π•©='''β‹„d←/𝕩='"'
  bβ†βŸ¨sβ‹„Β―1↓dβ‹„/r⟩ Traceβ—‹βˆΎ ⟨2+sβ‹„1↓dβ‹„(⊒-Β―1↓0∾⊒)∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩
  sc←+Β΄(1β€Ώ2-Λœβ‰ classes)Γ—(β‰ `∨⊒)∘((≠𝕩)↑/⁼∘∾)Β¨2↑((⊏˘b)⊏r)βŠ”β—‹(∾⟜2)<˘b
  col←sc⌈14|chars FindGroup 𝕩

  w←(≠↑0∾⊒)⊸<id←col=5
  idc←1+5|1-˜(idChars FindGroup w/𝕩)+'_'=((1β†“βˆΎβŸœ0)⊸<id)/𝕩
  col↩((id/+`w)⊏0∾idc)⌾(id⊸/)col

  col↩(1⌽col)⊣⌾((𝕩=βŠ‘"𝕩")⊸/)col

  bd←(≠↑¯1∾⊒)βŠΈβ‰ col
  bc←bd/col
  (β₯Š(0<bc)⊸/)¨⟨bc⊏classTag,2↕1-˜/bd∾1⟩
}
# Return highlights for areas in 𝕩 where 𝕨 is true.
GetMultiHighlights ← {
  start ← 0⊸Shl⊸< 𝕨
  groups ← (1 -˜ 𝕨 Γ— +` start) βŠ” 𝕩
  <∘∾˘ ⍉ ((β‰ βˆΎ2˜) β₯Š Β·> (/start) {π•¨βŠΈ+⌾(1βŠΈβŠ‘)𝕩}⟜GetHighlightsΒ¨ ⊒) groups
}


################################
# Format an array to a character matrix
# Won't work on functions until we can catch errors
Fmt ← {
  # Vertical padding for arrays of rank greater than 2
  PadV ← {
    # Leading shape
    ls ← Β―1↓≒𝕩
    # Empty lines after each row: 1 if it's at the end of a 2-cell, plus
    # 1 if it's at the end of a 2-cell and a 3-cell, and so on
    p ← β₯Š +βŽ‰Β―1β€ΏβˆžΒ΄ Γ—βŒœΛœ`⌾⌽ (-1⌈ls)↑¨1
    # But none at the very end
    p ↩ 0⌾(Β―1βŠΈβŠ‘) p
    Pad ← {i←/1+𝕨 β‹„ (Β―1¨⌾((¬∊i)⊸/)i) ⊏ π•©βˆΎ(Β―1βŠ‘β‰’π•©)β₯Š" "}
    p (βŠ‘0∊ls)β—ΆβŸ¨Pad,+Β΄βŠΈβ†‘βŸ© ((Γ—Β΄ls)∾¯1βŠ‘β‰’π•©) β₯Š 𝕩
  }⍟(2 < =)
  # Horizontal padding: just some spaces on either side
  PadH ← {
    (𝕨/" ") (βˆΎβŽ‰1βˆΎβŽ‰1⊣) 𝕩
  }
  Pad ← PadH⟜PadV
  Enframe ← {(1≠𝕨)∨(1≠≠𝕩)βˆ¨βŠ‘2∊+`-˝"⟨⟩"=βŒœβŠπ•©}β—Ά{
    ≍"⟨"∾(Β―1↓1β†“βŠπ•©)∾"⟩"
  }β€Ώ{
    l ← Β―1 βŠ‘ ≒𝕩
    ∾ ⟨ # "β”Όβ•’β•›β•ͺ"
      1β€Ώlβ†‘βˆΎβŸ¨"β”Œ",(5⊸<)β—ΆβŸ¨β₯Š"·─"⊏˜1⌊⊒,β•βŸ©π•¨βŸ©
      ((4⌊0βŒˆπ•¨-1)βŠ‘"Β·β•΅β•Žβ”†β”Š")βŒΎβŠ‘ 𝕩
      (1∾-l)↑"β”˜"
    ⟩
  }
  FmtEmpty ← (0β€Ώ0β‰’β‰’)β—Ά("β”Œβ”"≍"β””β”˜")β€Ώ(((2β‰ =)∨0=β‰ )β—Ά{
    '┐'⌾(0β€ΏΒ―1βŠΈβŠ‘) 2 Enframe 1 PadH " "¨𝕩
  }β€Ώ{
    ≍"⟨⟩"∾˜(1<β‰ )β—ΆβŸ¨"",'β₯Š'⌾(Β―1βŠΈβŠ‘)·∾·∾⟜"β€Ώ"Β¨β•Β¨βŸ©β‰’π•©
  })
  PaddingJoin ← {1π•Šπ•©;
    s ← β‰’Β¨ 𝕩
    w ← ⌈˝⍟(=-1˜)1βŠ‘Β¨s
    h ← βŒˆΛβŽ‰1 βŠ‘Β¨s
    βˆΎβŽ‰2 β‰βŸ(0⌈2-=) (h ∾⌜ 𝕨×wΒ¬(-𝕨×≠w)↑1) ↑¨ 𝕩
  }
  FmtMixed ← {
    (=𝕩) Enframe 2 Pad 𝕨 PaddingJoin F¨𝕩
  }
  F ← (2βŒŠβ‰‘)β—Ά(β‰β‰€βŸœβˆžβ—ΆβŸ¨"'"⊸(∾∾⊣),β•βŸ©)β€Ώ{
    num ← π•©β‰€βˆž β‹„ r ← =𝕩
    ((β‰ (0⊸<+≀)+Β΄)β₯Šnum)β—Ά{
      # All characters
      k ← -β‰  c ← Β―1↓≒𝕩
      (r Enframe 1 PadH PadV)⍟(1β‰ r) ≍ (c↑'"') βˆΎβŽ‰k 𝕩 βˆΎβŽ‰k ⌽c↑'"'
    }β€Ώ{
      # Not homogeneous, or empty
      (∨´0=β‰’)β—ΆFmtMixedβ€ΏFmtEmpty 𝕩
    }β€Ώ{
      # All numbers
      Β―1 FmtMixed 𝕩
    } 𝕩
  }β€ΏFmtMixed
  F 𝕩
}


################################
# Creating HTML files
Head ← "<head><link href="""∾("../"/˜"/"βŠ‘βˆ˜βˆŠβŠ’)∾"style.css"" rel=""stylesheet""/></head>"∾lf˜
nav  ← "<div class=""nav""><a href=""https://github.com/mlochbaum/BQN"">BQN</a></div>"∾lf
ConvertFile ← Head ∾ nav ∾ Markdown⟜(β€’LNS β€’path∾⊒)