From cda7eb24f91daaa3b1f9e5999c3948b4f3cd3133 Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Thu, 16 Jul 2020 22:05:49 -0400 Subject: Move markdown docs to docsrc/ and generate html in doc/ --- docsrc/md.bqn | 324 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 324 insertions(+) create mode 100644 docsrc/md.bqn (limited to 'docsrc/md.bqn') diff --git a/docsrc/md.bqn b/docsrc/md.bqn new file mode 100644 index 00000000..acb264ba --- /dev/null +++ b/docsrc/md.bqn @@ -0,0 +1,324 @@ +# The Markdown function is a markdown to html converter for a "good +# enough" subset of Github-flavored markdown, as specified at +# https://github.github.com/gfm/ . +# +# Additionally, it highlights code sections as BQN, and executes +# sections that are doubly indented (eight spaces), placing their +# results below them. + +# Not supported: +# - Thematic breaks like *** or --- +# - Setext headings (underlined with ==== or ----) +# - Fenced code blocks (marked off with ``` or ~~~) +# - HTML blocks +# - Link reference definitions (who uses these?) +# - Block quotes (start with >) +# - Task lists + +# Here, a markdown file is represented as a list of its lines, which are +# strings (they don't include any line ending character). +# The html file is constructed directly as a string, using Html. + +################################ +# Utilities + +# 𝕨 is a list of lists. Find the first of these lists each cell of 𝕩 +# belongs to. +FindGroup ← { + i ← (∾𝕨) ⊐ 𝕩 # Index in all cells of 𝕨 + e ← +`≠¨𝕨 # Index past the end of each group of 𝕨 + e ⍋ i # How many end-indices does each element pass? +} + +# 𝕨 is a list of possible expression start indices in any order and 𝕩 is +# the corresponding endpoints. The expressions are mutually exclusive +# and do not nest, and are enabled in index order. Return a shape ·‿2 +# array where the rows give the start and end of each enabled expression +# in index order. +Trace ← { + Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟{0=⊑⌽𝕩} + g←⍋𝕨 ⋄ s←g⊏𝕨 ⋄ e←g⊏𝕩 + st←¯1↓Se⟜(1↑˜≠)∾⟜≠s⍋e + st/s≍˘e +} + +# Join lines with newline characters. Include the trailing newline. +JoinLines ← ∾ ∾⟜lf¨ + +# Create an html node from a tag name and interior text +Html ← { + ∾ ⟨"<",𝕨,">" , 𝕩 , ""⟩ +} + +################################ +Markdown ← { + ###### + # Utilities + + # Index of first zero, or number of leading 1s + Lead ← ⊑ ⊐⟜0 + + # Shift cells 𝕨 into array 𝕩, maintaining its total length + Shl ← ≠∘⊢ ↑ ∾ # From the left + Shr ← -∘≠∘⊢ ↑ ∾˜ # From the right + + # Find whether 𝕨 was true at the last index where 𝕩 was true, in each + # position. + PrecedesGroup ← { + # We prepend a 0 to 𝕨, so that 0 is the "before start" index, with a + # false value, and normal indices are increased by 1. + 𝕨 ∾˜↩ 0 + inds ← 1 + ↕≠𝕩 + # Zero out indices where x was false, and find the greatest index so + # far at each position. + last ← ⌈` inds × ¬𝕩 + last ⊏ 𝕨 + } + + # Remove leading and trailing spaces + Trim ← { 𝕩 /˜ ¬ (∧` ∨ ∧`⌾⌽) ' '=𝕩 } + + ###### + # First we classify each line based on the type of block it can start. + ClassifyLine ← (0<≠)◶(0‿0)‿{ + ind ← ⊑ lineChars FindGroup ⊏𝕩 + getLen ← ind ⊑ lineClas∾⟨0˜⟩ + l ← GetLen 𝕩 + ⟨ind ∧ l>0 ⋄ l⟩ + } + + # Non-empty lines in code blocks have 4 leading spaces + IsCode ← 4 (≤⟜≠)◶⟨0,∧´' '=↑⟩ ⊢ + ProcCode ← { + lines ← JoinLines 4 ↓¨ 𝕩 + Esc ← (∾⥊¨) ("<>"⊸⊐ ⊑⟜⟨"<",">"⟩⍟(2>⊣)¨ ⊢) + "pre" Html doHighlight◶⟨"code"Html Esc,Highlight⟩ lines + } + + # Headings start with #, and require 1-6 #s followed by a space. + # Any trailing #s are ignored. + LenHeading ← { + n ← Lead 𝕩='#' + l ← (0" } + 𝕩 ↩ (/(≠𝕩)(-∾⊢)1) Trsp¨ 𝕩 + "p" Html ProcInline ¯1 ↓ JoinLines ((Lead ' '⊸=)+"\#"≡2⊸↑)⊸↓¨ 𝕩 + } + + lineChars‿lineClas‿procFns ← <˘⍉>⟨ + "" ‿ (!∘0) ‿ ProcParagraph + "#" ‿ LenHeading ‿ ProcHeading + " " ‿ IsCode ‿ ProcCode + # "-+*" ‿ LenBullet ‿ ProcBullet + # •d ‿ LenListNum ‿ ProcListNum + "|" ‿ IsTable ‿ ProcTable + ⟩ + + ###### + # Inline elements + ProcInline ← { + puncChars ← "!""#$%&'()*+,-./:;<=>?@[\]^_`{|}~" + I2M ← (≠𝕩)↑/⁼ # Index to mask + + # Code spans + ProcCodeSpan ← { + 𝕩 ↩ ' '¨⌾((𝕩=lf)⊸/) 𝕩 + 𝕩 ↩ (1↓¯1↓⊢)⍟((⊢<○(∧´)⊑∾⊑∘⌽) ' '⊸=) 𝕩 + "code" Html Highlight⍟doHighlight 𝕩 + } + tick ← 𝕩='`' + tend ← / (⊢ > 0⊸Shr) tick + tcount ← (1+↕∘≠)⊸(⊣-⌈`∘×) ¬ tick + tlen ← tend ⊏ tcount + c ← Trace´ tlen {m←(⊢=0⊸Shl)𝕨⋄(⌽⟜m/𝕩˜)¨1‿0}○((⍋tlen)⊸⊏) tend + cl ← (⊏˘c) ⊏ tcount + ctInds ← ⥊˘ 1 + c -⌜˘ cl×⌜1‿0 + include ← ¬ ≠` I2M ⥊ 0‿3⊸⊏˘ ctInds + codeStart ← I2M 1 ⊏˘ ctInds + codeGroup ← 1 -˜ codeStart (⊣×>)○(+`) I2M 2 ⊏˘ ctInds + code ← ProcCodeSpan¨ codeGroup ⊔ 𝕩 + + # Links + ReplaceMDSub ← { ¯2 (↓∾"html"˜)⍟(("md"≡↑)∧'/'∧´∘≠⊢) 𝕩 } + ReplaceMD ← { ReplaceMDSub⌾((⊑𝕩⊐"#")⊸↑) 𝕩 } + ProcLink ← { ∾⟨"",𝕨,""⟩ } + brak ← /∘(include ∧ 𝕩⊸=)¨ "]()[" + link ← (∊/⊣)´ 0‿¯1 + 2 ↑ brak + chains ← (⍋˜ ⊏ ⊢∾(≠𝕩)˜)` ¯1 ⌽ ( (∧´ (∊ ∧ <⟜(≠𝕩))¨ 1 ↓ chains)⊸/¨ chains + linkStart ← I2M 0 ⊏ chains + lInds ← 1‿0‿2‿0⊸+˘ (⥊2⊸↕)˘ ⍉ chains + include ∧↩ ¬ ≠` I2M ⥊ (¯1‿1+0‿3⊸⊏)˘ lInds + linkGroup ← 1 -˜ (1‿0⥊˜≢)⊸(/ (⊣×>)○(+`I2M) ¬⊸/) ⥊lInds + links ← <∘ProcLink´˘ 2⊸(÷˜⟜≠∾⊣)⊸⥊ linkGroup ⊔ 𝕩 + + # Emphasis (still rudimentary) + eMasks ← (include ∧ 𝕩⊸=)¨ "*_" + eInds ← (⊢-2|⊢)∘≠⊸↑∘/¨ eMasks + include ∧↩ ¬∨´eMasks + eTags ← ∾ eInds ≠⊸⥊¨ <""‿"" + + new ← ∾⟨eTags,code,links⟩ # Text to be added + inds← ∾eInds∾/¨codeStart‿linkStart # Where to add it + ((/include)∾(≠¨new)/inds) ⍋⊸⊏ (include/𝕩)∾∾new + } + + ###### + # Create the block structure using line classifications. + lengths ← ≠¨ 𝕩 # Length of each line + blanks ← (Lead ' '⊸=)¨ 𝕩 # Number of leading blanks + nonEmptyMask ← blanks < lengths # Empty ←→ all leading blanks + + # Get line classifications: type of line, and data to be passed into + # the line processor. Note that leading blanks aren't passed in. + lineType‿lineDat ← <˘⍉ > ClassifyLine¨ blanks ↓¨ 𝕩 + # Empty lines have type ¯1. + lineType ↩ ¯1¨⌾((¬nonEmptyMask)⊸/) lineType + + # Lines that could be included in code blocks (will be refined) + codeMask ← nonEmptyMask ∧ blanks ≥ 4 + paragraphMask ← 0 = lineType + # A header can't have 4 spaces of indentation. If it doesn't become + # part of a code block, it will be included in a paragraph. + lineType -↩ codeMask ∧ 1 = lineType + + # Code blocks consist of indented lines, possibly with blank lines + # in between. They must be separated from paragraphs by blank lines. + codeMask ∧↩ ¬ paragraphMask PrecedesGroup codeMask + codeMask ∨↩ codeMask (⊢ ∧ PrecedesGroup ∧ PrecedesGroup⌾⌽) lineType < 0 + lineType ↩ 2¨⌾(codeMask⊸/) lineType + + # Lines continue blocks if they are part of the same multi-line + # type as the previous line, and otherwise start new ones. + # Headers (type 1) always start new blocks. + blockStart ← nonEmptyMask ∧ (1 = lineType) ∨ ¯1⊸Shl⊸≠ lineType + # Headers and paragraphs ignore leading blanks. + drop ← blanks × lineType < 2 + # Group blocks based on blockStart, with type ¯1 lines excluded. + blocks ← (1 -˜ (lineType ≥ 0) × +`blockStart) ⊔ drop ↓¨ 𝕩 + + # To process a block, pick the appropriate function from procFns. + ProcBlock ← {t‿l G b: f←t⊑procFns ⋄ l F ⊑b } + JoinLines (blockStart / lineType≍˘lineDat) <∘ProcBlock˘ blocks +} + +################################ +# Testing +# Uses the test cases at https://spec.commonmark.org/0.29/spec.json +# since Github doesn't seem to have published theirs +TestSections ← { + doHighlight ↩ 0 + tests ← ¯2 ↓˘ 8⊸(÷˜⟜≠∾⊣)⊸⥊2↓•LNS •path∾"../spec.json" + tests ↩ ((⊑2+⊐⟜':')¨∘⊏ ((-','=¯1⊑⊢)↓↓)¨⎉1 ⊢) tests + testSection ← (1↓¯1↓⊢)¨ 5⊏˘tests + UnEsc ← { + esc ← (2 | (1+↕∘≠) (⊣-⌈`∘×) '\'≠⊢) 𝕩 + esc ¬⊸/ (("\"""∾•UCS 9‿10)⊏˜"\""tn"⊐⊢)⌾((¯1⌽esc)⊸/) 𝕩 + } + RunTest ← { + in‿exp ← UnEsc∘(1↓¯1↓⊢)¨2↑𝕩 + out ← Markdown (•UCS 10) ((⊢-˜¬×+`)∘=⊔⊢) in + ⟨exp≡out,in,exp,out,2⊑𝕩⟩ + } + + ignore ← (2 ⊏˘ tests) ∊ ⟨"47","85"⟩ + res ← 1 ↓˘ (¬⊏˘)⊸/ RunTest˘ tests /˜ ignore < testSection ∊ 𝕩 + doHighlight ↩ 1 + res +} + +################################ +# Syntax highlighting +doHighlight ← 1 +Highlight ← { + idChars ← ⟨ + •d∾"¯.π∞" + ' '+⌾•UCS•a + •a + "_" + ⟩ + classes‿chars ← <˘ ⍉ 2⊸(÷˜⟜≠∾⊣)⊸⥊⟨ + 0 , " "∾•UCS 9‿10 + "Value" , ¯1⊏˘5‿2⥊"𝕨𝕩𝕗𝕘𝕤" + "Function" , "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!"∾¯1⊏˘5‿2⥊"𝕎𝕏𝔽𝔾𝕊" + "Modifier" , "˜˘¨⌜⁼´`" + "Composition" , "∘○⊸⟜⌾⊘◶⎉⚇⍟" + "Number" , ∾idChars + "Gets" , "←↩→" + "Paren" , "()" + "Bracket" , "⟨⟩" + "Brace" , "{}" + "Ligature" , "‿" + "Nothing" , "·" + "Separator" , "⋄," + "Comment" , "#" + "String" , "'""" + ⟩ + classTag ← ""‿""∾>{⟨"",""⟩}¨1↓classes + + r←𝕩='#'⋄s←/(≠↑2⊸↓)⊸∧𝕩='''⋄d←/𝕩='"' + b←⟨s⋄¯1↓d⋄/r⟩ Trace○∾ ⟨2+s⋄1↓d⋄(⊢-¯1↓0∾⊢)∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩ + sc←+´(1‿2-˜≠classes)×(≠`∨⊢)∘((≠𝕩)↑/⁼∘∾)¨2↑((⊏˘b)⊏r)⊔○(∾⟜2)<˘b + col←sc⌈14|chars FindGroup 𝕩 + + w←(≠↑0∾⊢)⊸"∾lf +ConvertFile ← head ∾ Markdown∘•LNS -- cgit v1.2.3