diff options
| author | Marshall Lochbaum <mwlochbaum@gmail.com> | 2020-07-15 22:21:51 -0400 |
|---|---|---|
| committer | Marshall Lochbaum <mwlochbaum@gmail.com> | 2020-07-15 22:21:51 -0400 |
| commit | 8b98b5429be5afdcc63094001259cdf27d53ad35 (patch) | |
| tree | f20e34f1adfac7cda99e05d600645315780651bb /doc | |
| parent | 765e8065c40be192fad6d571684ebb7d23f1fb25 (diff) | |
Draft of BQN-based markdown converter
Diffstat (limited to 'doc')
| -rw-r--r-- | doc/md.bqn | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/doc/md.bqn b/doc/md.bqn new file mode 100644 index 00000000..9b04405b --- /dev/null +++ b/doc/md.bqn @@ -0,0 +1,238 @@ +# The Markdown function is a markdown to html converter for a "good +# enough" subset of Github-flavored markdown, as specified at +# https://github.github.com/gfm/ . +# +# Additionally, it highlights code sections as BQN, and executes +# sections that are doubly indented (eight spaces), placing their +# results below them. + +# Not supported: +# - Thematic breaks like *** or --- +# - Setext headings (underlined with ==== or ----) +# - Fenced code blocks (marked off with ``` or ~~~) +# - HTML blocks +# - Link reference definitions (who uses these?) +# - Block quotes (start with >) +# - Task lists + +# Here, a markdown file is represented as a list of its lines, which are +# strings (they don't include any line ending character). +# The html file is constructed directly as a string, using Html. + +JoinLines ← ∾ ∾⟜lf¨ + +# Create an html node +Html ← { + tag ← "<"‿"</" ∾¨ <𝕨∾">" + ∾ ⟨⊑tag , 𝕩 , ¯1⊑tag⟩ +} + +Markdown ← { + # ⌜ + # Utilities + + # Index of first zero, or number of leading 1s + Lead ← ⊑ ⊐⟜0 + + # Shift cells 𝕨 into array 𝕩, maintaining its total length + Shl ← ≠∘⊢ ↑ ∾ # From the left + Shr ← -∘≠∘⊢ ↑ ∾˜ # From the right + + # Find whether 𝕨 was true at the last index where 𝕩 was false, in each + # position. + PrecedesGroup ← { + (0 ∾ 𝕨) ⊏˜ ⌈` (1 + ↕≠𝕩) × ¬𝕩 + } + + # ⌜ + # First we classify each line based on the type of block it can start. + ClassifyLine ← (0<≠)◶(0‿0)‿{ + FindGroup ← { ⊑ (+`≠¨𝕨) ⍋ 𝕨 ∾⊸⊐ 𝕩 } + ind ← lineChars FindGroup ⊑𝕩 + getLen ← ind ⊑ lineClas∾⟨0˜⟩ + l ← GetLen 𝕩 + ⟨ind ∧ l>0 ⋄ l⟩ + } + + # Non-empty lines in code blocks have 4 leading spaces + IsCode ← 4 (≤⟜≠)◶⟨0,∧´' '=↑⟩ ⊢ + ProcCode ← { + lines ← JoinLines 4 ↓¨ 𝕩 + #lines ↩ ∾⥊¨ ("<>"⊸⊐ ⊑⟜⟨"<",">"⟩⍟(2>⊣)¨ ⊢) lines + "pre" Html doHighlight◶⟨"code"⊸Html,Highlight⟩ lines + } + + # Headings start with #, and require 1-6 #s followed by a space. + # Any trailing #s are ignored. + LenHeading ← { + n ← Lead 𝕩='#' + l ← (0<n) ∧ (6≥n) + s ← n (<⟜≠)◶⟨1,' '=⊑⟩ 𝕩 # Character after hashes must be a space, if any + n × l ∧ s + } + ProcHeading ← { + tag ← "h" ∾ 𝕨⊏•d # h3 for 3 hashes, etc. + 𝕩 ↓˜↩ 𝕨+1 + trsp ← ∧`⌾⌽ 𝕩=' ' + tail ← ∧`⌾⌽ trsp∨𝕩='#' # Mask of trailing hashes + f ← tail < 0 Shr tail # Character before trailing hashes + 𝕩 /˜↩ ¬ f (⊑⟨"\"," ",""⟩⊐<f/𝕩)◶⟨⊣,⊢,⊢,0¨⊢⟩ tail + 𝕩 /˜↩ ¬ (∧` ∨ ∧`⌾⌽) ' '=𝕩 + tag Html ProcInline 𝕩 + }⟜⊑ + + # List items start with a bullet (unordered) or number (ordered). + LenBullet ← 2 × 1 (<⟜≠)◶⟨0,' '=⊑⟩ ⊢ + LenListNum ← { + n ← Lead 𝕩∊•d + l ← (1≤n) ∧ (9≥n) + ' ' = n ↓ 𝕩 + t ← n↓(n+2)↑𝕩 + l ∧ (" " ≡ 1↓t) ∧ ⊑(")." ∊˜ 1↑t) + } + + # Tables are not yet supported + IsTable ← 0˜ + + # Paragraphs + ProcParagraph ← { + Trsp ← { m←∧`⌾⌽𝕩=' ' ⋄ (m¬⊸/𝕩)∾(𝕨<∨´m)/"<br />" } + 𝕩 ↩ (/(≠𝕩)(-∾⊢)1) Trsp¨ 𝕩 + "p" Html ProcInline ¯1 ↓ JoinLines ((Lead ' '⊸=)+"\#"≡2⊸↑)⊸↓¨ 𝕩 + } + + # Inline elements + ProcInline ← { + s←"`*"=⌜𝕩 + d←<∘/˘s + c←⊏s⋄r←¯1⌽l←≠`c⋄cs←l∧c + code←Highlight⍟doHighlight¨(1-˜(l∧r)×+`cs)⊔𝕩 + inc←¬l∨∨´<˘s + tags←∾d≠⊸⥊¨⟨"<code>"‿"</code>","<em>"‿"</em>"⟩ + ((/inc)∾(≠¨tags∾code)/(∾d)∾/cs) ⍋⊸⊏ (inc/𝕩)∾∾tags∾code + }⍟doHighlight + + lineChars‿lineClas‿procFns ← <˘⍉>⟨ + "" ‿ (!∘0) ‿ ProcParagraph + "#" ‿ LenHeading ‿ ProcHeading + " " ‿ IsCode ‿ ProcCode + "-+*" ‿ LenBullet ‿ (∾⊢) # ProcBullet + •d ‿ LenListNum ‿ (∾⊢) # ProcListNum + "|" ‿ IsTable ‿ (∾⊢) # ProcTable + ⟩ + + # ⌜ + # We will also use the length and number of leading blanks. + lengths ← ≠¨ 𝕩 + blanks ← (Lead ' '⊸=)¨ 𝕩 + nonEmptyMask ← blanks < lengths + # Now let's use the line classifications to get the block structure. + lineType‿lineDat ← <˘⍉ > ClassifyLine¨ blanks ↓¨ 𝕩 + + # We will construct a mask of lines that start new blocks, blockStart. + + codeMask ← nonEmptyMask ∧ blanks ≥ 4 + lineType -↩ codeMask ∧ 1 = lineType + paragraphMask ← nonEmptyMask ∧ 0 = lineType + + # Code blocks consist of indented lines, possibly with blank lines + # in between. They must be separated from paragraphs by blank lines. + codeMask ∧↩ ¬ paragraphMask PrecedesGroup codeMask + codeMask ∨↩ codeMask (⊢ ∧ PrecedesGroup ∧ PrecedesGroup⌾⌽) ¬ nonEmptyMask + lineType ↩ 2¨⌾(codeMask⊸/) lineType + paragraphMask ∧↩ ¬ codeMask + + # Lists group together for now + bulletListMask‿orderedListMask ← <˘ 3‿4 =⌜ lineType + + drop ← blanks × lineType < 2 + + # Lines continue blocks if they are part of the same multi-line + # type as the previous line, and otherwise start new ones. + blockMasks ← codeMask‿bulletListMask‿orderedListMask‿paragraphMask + blockStart ← nonEmptyMask ∧ ¬ ∨´ (⊢ ∧ 0⊸Shl)¨ blockMasks + + ProcBlock ← {t‿l G b: f←t⊑procFns ⋄ l F ⊑b } + blocks ← (1 -˜ (nonEmptyMask ∨ codeMask) × +`blockStart) ⊔ drop ↓¨ 𝕩 + JoinLines (blockStart / lineType≍˘lineDat) <∘ProcBlock˘ blocks +} + +# ⌜ +# Testing +# Uses the test cases at https://spec.commonmark.org/0.29/spec.json +# since Github doesn't seem to have published theirs +TestSections ← { + doHighlight ↩ 0 + tests ← ¯2 ↓˘ 8⊸(÷˜⟜≠∾⊣)⊸⥊2↓•LNS •path∾"../spec.json" + tests ↩ ((⊑2+⊐⟜':')¨∘⊏ ((-','=¯1⊑⊢)↓↓)¨⎉1 ⊢) tests + testSection ← (1↓¯1↓⊢)¨ 5⊏˘tests + UnEsc ← { + esc ← (2 | (1+↕∘≠) (⊣-⌈`∘×) '\'≠⊢) 𝕩 + esc ¬⊸/ (("\"""∾•UCS 9‿10)⊏˜"\""tn"⊐⊢)⌾((¯1⌽esc)⊸/) 𝕩 + } + RunTest ← { + in‿exp ← UnEsc∘(1↓¯1↓⊢)¨2↑𝕩 + out ← Markdown (•UCS 10) ((⊢-˜¬×+`)∘=⊔⊢) in + ⟨exp≡out,in,exp,out,2⊑𝕩⟩ + } + + ignore ← (2 ⊏˘ tests) ∊ ⟨"47","85"⟩ + res ← 1 ↓˘ (¬⊏˘)⊸/ RunTest˘ tests /˜ ignore < testSection ∊ 𝕩 + doHighlight ↩ 1 + res +} + +# ⌜ +# Syntax highlighting +doHighlight ← 1 +Highlight ← { + idChars ← ⟨ + •d∾"¯.π∞" + ' '+⌾•UCS•a + •a + "_" + ⟩ + classes‿chars ← <˘ ⍉ 2⊸(÷˜⟜≠∾⊣)⊸⥊⟨ + "Value" , ¯1⊏˘5‿2⥊"𝕨𝕩𝕗𝕘𝕤" + "Function" , "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!"∾¯1⊏˘5‿2⥊"𝕎𝕏𝔽𝔾𝕊" + "Modifier" , "˜˘¨⌜⁼´`" + "Composition" , "∘○⊸⟜⌾⊘◶⎉⚇⍟" + "Number" , •d∾"¯.π∞" + "Alphabetic" , "_"∾˜' '(+∾⊢)⌾•UCS•a + "Separator" , "⋄," + "Gets" , "←↩→" + "Bracket" , "()⟨⟩" + "Brace" , "{}" + "Ligature" , "‿" + "Nothing" , "·" + "Comment" , "#" + "String" , "'""" + 0 , " "∾•UCS 9‿10 + ⟩ + classTag ← ""‿""∾˜>{⟨"<span class='"∾𝕩∾"'>","</span>"⟩}¨¯1↓classes + FindGroup ← { (+`≠¨𝕨) ⍋ (∾𝕨) ⊐ 𝕩 } + + r←𝕩='#'⋄s←/(≠↑2⊸↓)⊸∧𝕩='''⋄d←/𝕩='"' + g←⍋q←∾⟨ s⋄¯1↓d⋄/r⟩ ⋄q↩g⊏q + e← g⊏∾⟨2+s⋄ 1↓d⋄(⊢-¯1↓0∾⊢)∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩ + Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟{0=⊑⌽𝕩} + st←¯1↓Se⟜(1↑˜≠)∾⟜≠q⍋e⋄b←st/q∾˘e + ToI←¯1↓·/⁼(≠𝕩)∾˜⥊ + str‿com←(≠`∨⊢)∘ToI∘>¨¯1↓((st/q)⊏r)⊔○(∾⟜2)<˘b + col←14⌊((12×com)+(13×str))⌈chars FindGroup 𝕩 + + w←(≠↑0∾⊢)⊸<id←col∊4‿5 + idc←5|1-˜(idChars FindGroup w/𝕩)+'_'=((1↓∾⟜0)⊸<id)/𝕩 + col↩((id/+`w)⊏0∾idc)⌾(id⊸/)col + + col↩(1⌽col)⊣⌾((𝕩=⊑"𝕩")⊸/)col + + bd←(≠↑¯1∾⊢)⊸≠col + f←14≠bd/col + tags←⥊f/(bd/col)⊏classTag + pos←⥊f/2↕/bd∾1 + ((↕≠𝕩)∾˜(≠¨tags)/pos) ⍋⊸⊏ 𝕩∾˜∾tags +} + +head ← "<head><link href=""style.css"" rel=""stylesheet""/></head>"∾lf +ConvertFile ← head ∾ Markdown∘•LNS |
