aboutsummaryrefslogtreecommitdiff
path: root/docsrc/md.bqn
diff options
context:
space:
mode:
Diffstat (limited to 'docsrc/md.bqn')
-rw-r--r--docsrc/md.bqn324
1 files changed, 324 insertions, 0 deletions
diff --git a/docsrc/md.bqn b/docsrc/md.bqn
new file mode 100644
index 00000000..acb264ba
--- /dev/null
+++ b/docsrc/md.bqn
@@ -0,0 +1,324 @@
+# The Markdown function is a markdown to html converter for a "good
+# enough" subset of Github-flavored markdown, as specified at
+# https://github.github.com/gfm/ .
+#
+# Additionally, it highlights code sections as BQN, and executes
+# sections that are doubly indented (eight spaces), placing their
+# results below them.
+
+# Not supported:
+# - Thematic breaks like *** or ---
+# - Setext headings (underlined with ==== or ----)
+# - Fenced code blocks (marked off with ``` or ~~~)
+# - HTML blocks
+# - Link reference definitions (who uses these?)
+# - Block quotes (start with >)
+# - Task lists
+
+# Here, a markdown file is represented as a list of its lines, which are
+# strings (they don't include any line ending character).
+# The html file is constructed directly as a string, using Html.
+
+################################
+# Utilities
+
+# 𝕨 is a list of lists. Find the first of these lists each cell of 𝕩
+# belongs to.
+FindGroup ← {
+ i ← (∾𝕨) ⊐ 𝕩 # Index in all cells of 𝕨
+ e ← +`≠¨𝕨 # Index past the end of each group of 𝕨
+ e ⍋ i # How many end-indices does each element pass?
+}
+
+# 𝕨 is a list of possible expression start indices in any order and 𝕩 is
+# the corresponding endpoints. The expressions are mutually exclusive
+# and do not nest, and are enabled in index order. Return a shape ·‿2
+# array where the rows give the start and end of each enabled expression
+# in index order.
+Trace ← {
+ Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟{0=⊑⌽𝕩}
+ g←⍋𝕨 ⋄ s←g⊏𝕨 ⋄ e←g⊏𝕩
+ st←¯1↓Se⟜(1↑˜≠)∾⟜≠s⍋e
+ st/s≍˘e
+}
+
+# Join lines with newline characters. Include the trailing newline.
+JoinLines ← ∾ ∾⟜lf¨
+
+# Create an html node from a tag name and interior text
+Html ← {
+ ∾ ⟨"<",𝕨,">" , 𝕩 , "</",(⊑⊐⟜" ")⊸↑𝕨,">"⟩
+}
+
+################################
+Markdown ← {
+ ######
+ # Utilities
+
+ # Index of first zero, or number of leading 1s
+ Lead ← ⊑ ⊐⟜0
+
+ # Shift cells 𝕨 into array 𝕩, maintaining its total length
+ Shl ← ≠∘⊢ ↑ ∾ # From the left
+ Shr ← -∘≠∘⊢ ↑ ∾˜ # From the right
+
+ # Find whether 𝕨 was true at the last index where 𝕩 was true, in each
+ # position.
+ PrecedesGroup ← {
+ # We prepend a 0 to 𝕨, so that 0 is the "before start" index, with a
+ # false value, and normal indices are increased by 1.
+ 𝕨 ∾˜↩ 0
+ inds ← 1 + ↕≠𝕩
+ # Zero out indices where x was false, and find the greatest index so
+ # far at each position.
+ last ← ⌈` inds × ¬𝕩
+ last ⊏ 𝕨
+ }
+
+ # Remove leading and trailing spaces
+ Trim ← { 𝕩 /˜ ¬ (∧` ∨ ∧`⌾⌽) ' '=𝕩 }
+
+ ######
+ # First we classify each line based on the type of block it can start.
+ ClassifyLine ← (0<≠)◶(0‿0)‿{
+ ind ← ⊑ lineChars FindGroup ⊏𝕩
+ getLen ← ind ⊑ lineClas∾⟨0˜⟩
+ l ← GetLen 𝕩
+ ⟨ind ∧ l>0 ⋄ l⟩
+ }
+
+ # Non-empty lines in code blocks have 4 leading spaces
+ IsCode ← 4 (≤⟜≠)◶⟨0,∧´' '=↑⟩ ⊢
+ ProcCode ← {
+ lines ← JoinLines 4 ↓¨ 𝕩
+ Esc ← (∾⥊¨) ("<>"⊸⊐ ⊑⟜⟨"&lt;","&gt;"⟩⍟(2>⊣)¨ ⊢)
+ "pre" Html doHighlight◶⟨"code"Html Esc,Highlight⟩ lines
+ }
+
+ # Headings start with #, and require 1-6 #s followed by a space.
+ # Any trailing #s are ignored.
+ LenHeading ← {
+ n ← Lead 𝕩='#'
+ l ← (0<n) ∧ (6≥n)
+ s ← n (<⟜≠)◶⟨1,' '=⊑⟩ 𝕩 # Character after hashes must be a space, if any
+ n × l ∧ s
+ }
+ ProcHeading ← {
+ tag ← "h" ∾ 𝕨⊏•d # h3 for 3 hashes, etc.
+ 𝕩 ↓˜↩ 𝕨+1
+ trsp ← ∧`⌾⌽ 𝕩=' '
+ tail ← ∧`⌾⌽ trsp∨𝕩='#' # Mask of trailing hashes
+ f ← tail < 0 Shr tail # Character before trailing hashes
+ 𝕩 /˜↩ ¬ f (⊑⟨"\"," ",""⟩⊐<f/𝕩)◶⟨⊣,⊢,⊢,0¨⊢⟩ tail
+ # Add an id: lowercase the header, replacing non-•a with hyphens
+ Slugify ← {
+ ch ← •UCS "-Aa"
+ bounds ← ⥊ (1↓ch) +⌜ 0‿26 # Of the upper and lowercase alphabet
+ (bounds⊸⍋ {(⊑ch)¨⌾((¬2|𝕨)⊸/)𝕩+32×1=𝕨} ⊢)⌾•UCS 𝕩
+ }
+ tag ∾↩ " id="∾""""(∾∾⊣) Slugify 𝕩
+ tag Html ProcInline Trim 𝕩
+ }⟜⊑
+
+ # List items start with a bullet (unordered) or number (ordered).
+ LenBullet ← 2 × 1 (<⟜≠)◶⟨0,' '=⊑⟩ ⊢
+ LenListNum ← {
+ n ← Lead 𝕩∊•d
+ l ← (1≤n) ∧ (9≥n)
+ ' ' = n ↓ 𝕩
+ t ← n↓(n+2)↑𝕩
+ l ∧ (" " ≡ 1↓t) ∧ ⊑(")." ∊˜ 1↑t)
+ }
+
+ # Any line that starts with a | is a table, at least in my lazy version
+ IsTable ← 1˜
+ ProcTable ← {
+ rows ← (Trim¨ ((1-˜¬×+`)'|'⊸=)⊸⊔)¨ 𝕩
+ inc ← ¬ rule ← ∧´∘∾¨'-'=rows
+ rows ↩ ProcInline¨¨⌾(inc⊸/) rows
+ rowType ← inc / +` rule # Head or body
+ DoRow ← { lf ∾ JoinLines 𝕨⊸Html¨ 𝕩 }
+ rows ↩ (rowType ⊏ "th"‿"td") DoRow¨ inc/rows
+ rowGroups ← ¯1 ↓ rowType ⊔○(∾⟜2) "tr"⊸Html¨ rows
+ sections ← "thead"‿"tbody" Html⟜(lf ∾ JoinLines)¨ rowGroups
+ "table" Html lf ∾ JoinLines (0 < ≠¨rowGroups) / sections
+ }
+
+ # Paragraphs
+ ProcParagraph ← {
+ Trsp ← { m←∧`⌾⌽𝕩=' ' ⋄ (m¬⊸/𝕩)∾(𝕨<∨´m)/"<br />" }
+ 𝕩 ↩ (/(≠𝕩)(-∾⊢)1) Trsp¨ 𝕩
+ "p" Html ProcInline ¯1 ↓ JoinLines ((Lead ' '⊸=)+"\#"≡2⊸↑)⊸↓¨ 𝕩
+ }
+
+ lineChars‿lineClas‿procFns ← <˘⍉>⟨
+ "" ‿ (!∘0) ‿ ProcParagraph
+ "#" ‿ LenHeading ‿ ProcHeading
+ " " ‿ IsCode ‿ ProcCode
+ # "-+*" ‿ LenBullet ‿ ProcBullet
+ # •d ‿ LenListNum ‿ ProcListNum
+ "|" ‿ IsTable ‿ ProcTable
+ ⟩
+
+ ######
+ # Inline elements
+ ProcInline ← {
+ puncChars ← "!""#$%&'()*+,-./:;<=>?@[\]^_`{|}~"
+ I2M ← (≠𝕩)↑/⁼ # Index to mask
+
+ # Code spans
+ ProcCodeSpan ← {
+ 𝕩 ↩ ' '¨⌾((𝕩=lf)⊸/) 𝕩
+ 𝕩 ↩ (1↓¯1↓⊢)⍟((⊢<○(∧´)⊑∾⊑∘⌽) ' '⊸=) 𝕩
+ "code" Html Highlight⍟doHighlight 𝕩
+ }
+ tick ← 𝕩='`'
+ tend ← / (⊢ > 0⊸Shr) tick
+ tcount ← (1+↕∘≠)⊸(⊣-⌈`∘×) ¬ tick
+ tlen ← tend ⊏ tcount
+ c ← Trace´ tlen {m←(⊢=0⊸Shl)𝕨⋄(⌽⟜m/𝕩˜)¨1‿0}○((⍋tlen)⊸⊏) tend
+ cl ← (⊏˘c) ⊏ tcount
+ ctInds ← ⥊˘ 1 + c -⌜˘ cl×⌜1‿0
+ include ← ¬ ≠` I2M ⥊ 0‿3⊸⊏˘ ctInds
+ codeStart ← I2M 1 ⊏˘ ctInds
+ codeGroup ← 1 -˜ codeStart (⊣×>)○(+`) I2M 2 ⊏˘ ctInds
+ code ← ProcCodeSpan¨ codeGroup ⊔ 𝕩
+
+ # Links
+ ReplaceMDSub ← { ¯2 (↓∾"html"˜)⍟(("md"≡↑)∧'/'∧´∘≠⊢) 𝕩 }
+ ReplaceMD ← { ReplaceMDSub⌾((⊑𝕩⊐"#")⊸↑) 𝕩 }
+ ProcLink ← { ∾⟨"<a href=""",(ReplaceMD 𝕩),""">",𝕨,"</a>"⟩ }
+ brak ← /∘(include ∧ 𝕩⊸=)¨ "]()["
+ link ← (∊/⊣)´ 0‿¯1 + 2 ↑ brak
+ chains ← (⍋˜ ⊏ ⊢∾(≠𝕩)˜)` ¯1 ⌽ (<link) ∾ 2 ↓ brak
+ chains ↩ > (∧´ (∊ ∧ <⟜(≠𝕩))¨ 1 ↓ chains)⊸/¨ chains
+ linkStart ← I2M 0 ⊏ chains
+ lInds ← 1‿0‿2‿0⊸+˘ (⥊2⊸↕)˘ ⍉ chains
+ include ∧↩ ¬ ≠` I2M ⥊ (¯1‿1+0‿3⊸⊏)˘ lInds
+ linkGroup ← 1 -˜ (1‿0⥊˜≢)⊸(/ (⊣×>)○(+`I2M) ¬⊸/) ⥊lInds
+ links ← <∘ProcLink´˘ 2⊸(÷˜⟜≠∾⊣)⊸⥊ linkGroup ⊔ 𝕩
+
+ # Emphasis (still rudimentary)
+ eMasks ← (include ∧ 𝕩⊸=)¨ "*_"
+ eInds ← (⊢-2|⊢)∘≠⊸↑∘/¨ eMasks
+ include ∧↩ ¬∨´eMasks
+ eTags ← ∾ eInds ≠⊸⥊¨ <"<em>"‿"</em>"
+
+ new ← ∾⟨eTags,code,links⟩ # Text to be added
+ inds← ∾eInds∾/¨codeStart‿linkStart # Where to add it
+ ((/include)∾(≠¨new)/inds) ⍋⊸⊏ (include/𝕩)∾∾new
+ }
+
+ ######
+ # Create the block structure using line classifications.
+ lengths ← ≠¨ 𝕩 # Length of each line
+ blanks ← (Lead ' '⊸=)¨ 𝕩 # Number of leading blanks
+ nonEmptyMask ← blanks < lengths # Empty ←→ all leading blanks
+
+ # Get line classifications: type of line, and data to be passed into
+ # the line processor. Note that leading blanks aren't passed in.
+ lineType‿lineDat ← <˘⍉ > ClassifyLine¨ blanks ↓¨ 𝕩
+ # Empty lines have type ¯1.
+ lineType ↩ ¯1¨⌾((¬nonEmptyMask)⊸/) lineType
+
+ # Lines that could be included in code blocks (will be refined)
+ codeMask ← nonEmptyMask ∧ blanks ≥ 4
+ paragraphMask ← 0 = lineType
+ # A header can't have 4 spaces of indentation. If it doesn't become
+ # part of a code block, it will be included in a paragraph.
+ lineType -↩ codeMask ∧ 1 = lineType
+
+ # Code blocks consist of indented lines, possibly with blank lines
+ # in between. They must be separated from paragraphs by blank lines.
+ codeMask ∧↩ ¬ paragraphMask PrecedesGroup codeMask
+ codeMask ∨↩ codeMask (⊢ ∧ PrecedesGroup ∧ PrecedesGroup⌾⌽) lineType < 0
+ lineType ↩ 2¨⌾(codeMask⊸/) lineType
+
+ # Lines continue blocks if they are part of the same multi-line
+ # type as the previous line, and otherwise start new ones.
+ # Headers (type 1) always start new blocks.
+ blockStart ← nonEmptyMask ∧ (1 = lineType) ∨ ¯1⊸Shl⊸≠ lineType
+ # Headers and paragraphs ignore leading blanks.
+ drop ← blanks × lineType < 2
+ # Group blocks based on blockStart, with type ¯1 lines excluded.
+ blocks ← (1 -˜ (lineType ≥ 0) × +`blockStart) ⊔ drop ↓¨ 𝕩
+
+ # To process a block, pick the appropriate function from procFns.
+ ProcBlock ← {t‿l G b: f←t⊑procFns ⋄ l F ⊑b }
+ JoinLines (blockStart / lineType≍˘lineDat) <∘ProcBlock˘ blocks
+}
+
+################################
+# Testing
+# Uses the test cases at https://spec.commonmark.org/0.29/spec.json
+# since Github doesn't seem to have published theirs
+TestSections ← {
+ doHighlight ↩ 0
+ tests ← ¯2 ↓˘ 8⊸(÷˜⟜≠∾⊣)⊸⥊2↓•LNS •path∾"../spec.json"
+ tests ↩ ((⊑2+⊐⟜':')¨∘⊏ ((-','=¯1⊑⊢)↓↓)¨⎉1 ⊢) tests
+ testSection ← (1↓¯1↓⊢)¨ 5⊏˘tests
+ UnEsc ← {
+ esc ← (2 | (1+↕∘≠) (⊣-⌈`∘×) '\'≠⊢) 𝕩
+ esc ¬⊸/ (("\"""∾•UCS 9‿10)⊏˜"\""tn"⊐⊢)⌾((¯1⌽esc)⊸/) 𝕩
+ }
+ RunTest ← {
+ in‿exp ← UnEsc∘(1↓¯1↓⊢)¨2↑𝕩
+ out ← Markdown (•UCS 10) ((⊢-˜¬×+`)∘=⊔⊢) in
+ ⟨exp≡out,in,exp,out,2⊑𝕩⟩
+ }
+
+ ignore ← (2 ⊏˘ tests) ∊ ⟨"47","85"⟩
+ res ← 1 ↓˘ (¬⊏˘)⊸/ RunTest˘ tests /˜ ignore < testSection ∊ 𝕩
+ doHighlight ↩ 1
+ res
+}
+
+################################
+# Syntax highlighting
+doHighlight ← 1
+Highlight ← {
+ idChars ← ⟨
+ •d∾"¯.π∞"
+ ' '+⌾•UCS•a
+ •a
+ "_"
+ ⟩
+ classes‿chars ← <˘ ⍉ 2⊸(÷˜⟜≠∾⊣)⊸⥊⟨
+ 0 , " "∾•UCS 9‿10
+ "Value" , ¯1⊏˘5‿2⥊"𝕨𝕩𝕗𝕘𝕤"
+ "Function" , "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!"∾¯1⊏˘5‿2⥊"𝕎𝕏𝔽𝔾𝕊"
+ "Modifier" , "˜˘¨⌜⁼´`"
+ "Composition" , "∘○⊸⟜⌾⊘◶⎉⚇⍟"
+ "Number" , ∾idChars
+ "Gets" , "←↩→"
+ "Paren" , "()"
+ "Bracket" , "⟨⟩"
+ "Brace" , "{}"
+ "Ligature" , "‿"
+ "Nothing" , "·"
+ "Separator" , "⋄,"
+ "Comment" , "#"
+ "String" , "'"""
+ ⟩
+ classTag ← ""‿""∾>{⟨"<span class='"∾𝕩∾"'>","</span>"⟩}¨1↓classes
+
+ r←𝕩='#'⋄s←/(≠↑2⊸↓)⊸∧𝕩='''⋄d←/𝕩='"'
+ b←⟨s⋄¯1↓d⋄/r⟩ Trace○∾ ⟨2+s⋄1↓d⋄(⊢-¯1↓0∾⊢)∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩
+ sc←+´(1‿2-˜≠classes)×(≠`∨⊢)∘((≠𝕩)↑/⁼∘∾)¨2↑((⊏˘b)⊏r)⊔○(∾⟜2)<˘b
+ col←sc⌈14|chars FindGroup 𝕩
+
+ w←(≠↑0∾⊢)⊸<id←col=5
+ idc←1+5|1-˜(idChars FindGroup w/𝕩)+'_'=((1↓∾⟜0)⊸<id)/𝕩
+ col↩((id/+`w)⊏0∾idc)⌾(id⊸/)col
+
+ col↩(1⌽col)⊣⌾((𝕩=⊑"𝕩")⊸/)col
+
+ bd←(≠↑¯1∾⊢)⊸≠col
+ f←0<bd/col
+ tags←⥊f/(bd/col)⊏classTag
+ pos←⥊f/2↕/bd∾1
+ ((↕≠𝕩)∾˜(≠¨tags)/pos) ⍋⊸⊏ 𝕩∾˜∾tags
+}
+
+head ← "<head><link href=""style.css"" rel=""stylesheet""/></head>"∾lf
+ConvertFile ← head ∾ Markdown∘•LNS