diff options
| author | Marshall Lochbaum <mwlochbaum@gmail.com> | 2020-07-17 07:18:22 -0400 |
|---|---|---|
| committer | Marshall Lochbaum <mwlochbaum@gmail.com> | 2020-07-17 07:18:22 -0400 |
| commit | 5c4a7106dc47e2e1849eddf4d87fb0e107461d74 (patch) | |
| tree | 89c21dffbaeee21ab1701b9c58bdb405b832dc14 /docsrc/md.bqn | |
| parent | e8b8e378d3fb3da0c492759ac74803b2fa417651 (diff) | |
Now I remember why I named it doc/ instead of docs/
Diffstat (limited to 'docsrc/md.bqn')
| -rw-r--r-- | docsrc/md.bqn | 324 |
1 files changed, 0 insertions, 324 deletions
diff --git a/docsrc/md.bqn b/docsrc/md.bqn deleted file mode 100644 index acb264ba..00000000 --- a/docsrc/md.bqn +++ /dev/null @@ -1,324 +0,0 @@ -# The Markdown function is a markdown to html converter for a "good -# enough" subset of Github-flavored markdown, as specified at -# https://github.github.com/gfm/ . -# -# Additionally, it highlights code sections as BQN, and executes -# sections that are doubly indented (eight spaces), placing their -# results below them. - -# Not supported: -# - Thematic breaks like *** or --- -# - Setext headings (underlined with ==== or ----) -# - Fenced code blocks (marked off with ``` or ~~~) -# - HTML blocks -# - Link reference definitions (who uses these?) -# - Block quotes (start with >) -# - Task lists - -# Here, a markdown file is represented as a list of its lines, which are -# strings (they don't include any line ending character). -# The html file is constructed directly as a string, using Html. - -################################ -# Utilities - -# 𝕨 is a list of lists. Find the first of these lists each cell of 𝕩 -# belongs to. -FindGroup ← { - i ← (∾𝕨) ⊐ 𝕩 # Index in all cells of 𝕨 - e ← +`≠¨𝕨 # Index past the end of each group of 𝕨 - e ⍋ i # How many end-indices does each element pass? -} - -# 𝕨 is a list of possible expression start indices in any order and 𝕩 is -# the corresponding endpoints. The expressions are mutually exclusive -# and do not nest, and are enabled in index order. Return a shape ·‿2 -# array where the rows give the start and end of each enabled expression -# in index order. -Trace ← { - Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟{0=⊑⌽𝕩} - g←⍋𝕨 ⋄ s←g⊏𝕨 ⋄ e←g⊏𝕩 - st←¯1↓Se⟜(1↑˜≠)∾⟜≠s⍋e - st/s≍˘e -} - -# Join lines with newline characters. Include the trailing newline. -JoinLines ← ∾ ∾⟜lf¨ - -# Create an html node from a tag name and interior text -Html ← { - ∾ ⟨"<",𝕨,">" , 𝕩 , "</",(⊑⊐⟜" ")⊸↑𝕨,">"⟩ -} - -################################ -Markdown ← { - ###### - # Utilities - - # Index of first zero, or number of leading 1s - Lead ← ⊑ ⊐⟜0 - - # Shift cells 𝕨 into array 𝕩, maintaining its total length - Shl ← ≠∘⊢ ↑ ∾ # From the left - Shr ← -∘≠∘⊢ ↑ ∾˜ # From the right - - # Find whether 𝕨 was true at the last index where 𝕩 was true, in each - # position. - PrecedesGroup ← { - # We prepend a 0 to 𝕨, so that 0 is the "before start" index, with a - # false value, and normal indices are increased by 1. - 𝕨 ∾˜↩ 0 - inds ← 1 + ↕≠𝕩 - # Zero out indices where x was false, and find the greatest index so - # far at each position. - last ← ⌈` inds × ¬𝕩 - last ⊏ 𝕨 - } - - # Remove leading and trailing spaces - Trim ← { 𝕩 /˜ ¬ (∧` ∨ ∧`⌾⌽) ' '=𝕩 } - - ###### - # First we classify each line based on the type of block it can start. - ClassifyLine ← (0<≠)◶(0‿0)‿{ - ind ← ⊑ lineChars FindGroup ⊏𝕩 - getLen ← ind ⊑ lineClas∾⟨0˜⟩ - l ← GetLen 𝕩 - ⟨ind ∧ l>0 ⋄ l⟩ - } - - # Non-empty lines in code blocks have 4 leading spaces - IsCode ← 4 (≤⟜≠)◶⟨0,∧´' '=↑⟩ ⊢ - ProcCode ← { - lines ← JoinLines 4 ↓¨ 𝕩 - Esc ← (∾⥊¨) ("<>"⊸⊐ ⊑⟜⟨"<",">"⟩⍟(2>⊣)¨ ⊢) - "pre" Html doHighlight◶⟨"code"Html Esc,Highlight⟩ lines - } - - # Headings start with #, and require 1-6 #s followed by a space. - # Any trailing #s are ignored. - LenHeading ← { - n ← Lead 𝕩='#' - l ← (0<n) ∧ (6≥n) - s ← n (<⟜≠)◶⟨1,' '=⊑⟩ 𝕩 # Character after hashes must be a space, if any - n × l ∧ s - } - ProcHeading ← { - tag ← "h" ∾ 𝕨⊏•d # h3 for 3 hashes, etc. - 𝕩 ↓˜↩ 𝕨+1 - trsp ← ∧`⌾⌽ 𝕩=' ' - tail ← ∧`⌾⌽ trsp∨𝕩='#' # Mask of trailing hashes - f ← tail < 0 Shr tail # Character before trailing hashes - 𝕩 /˜↩ ¬ f (⊑⟨"\"," ",""⟩⊐<f/𝕩)◶⟨⊣,⊢,⊢,0¨⊢⟩ tail - # Add an id: lowercase the header, replacing non-•a with hyphens - Slugify ← { - ch ← •UCS "-Aa" - bounds ← ⥊ (1↓ch) +⌜ 0‿26 # Of the upper and lowercase alphabet - (bounds⊸⍋ {(⊑ch)¨⌾((¬2|𝕨)⊸/)𝕩+32×1=𝕨} ⊢)⌾•UCS 𝕩 - } - tag ∾↩ " id="∾""""(∾∾⊣) Slugify 𝕩 - tag Html ProcInline Trim 𝕩 - }⟜⊑ - - # List items start with a bullet (unordered) or number (ordered). - LenBullet ← 2 × 1 (<⟜≠)◶⟨0,' '=⊑⟩ ⊢ - LenListNum ← { - n ← Lead 𝕩∊•d - l ← (1≤n) ∧ (9≥n) - ' ' = n ↓ 𝕩 - t ← n↓(n+2)↑𝕩 - l ∧ (" " ≡ 1↓t) ∧ ⊑(")." ∊˜ 1↑t) - } - - # Any line that starts with a | is a table, at least in my lazy version - IsTable ← 1˜ - ProcTable ← { - rows ← (Trim¨ ((1-˜¬×+`)'|'⊸=)⊸⊔)¨ 𝕩 - inc ← ¬ rule ← ∧´∘∾¨'-'=rows - rows ↩ ProcInline¨¨⌾(inc⊸/) rows - rowType ← inc / +` rule # Head or body - DoRow ← { lf ∾ JoinLines 𝕨⊸Html¨ 𝕩 } - rows ↩ (rowType ⊏ "th"‿"td") DoRow¨ inc/rows - rowGroups ← ¯1 ↓ rowType ⊔○(∾⟜2) "tr"⊸Html¨ rows - sections ← "thead"‿"tbody" Html⟜(lf ∾ JoinLines)¨ rowGroups - "table" Html lf ∾ JoinLines (0 < ≠¨rowGroups) / sections - } - - # Paragraphs - ProcParagraph ← { - Trsp ← { m←∧`⌾⌽𝕩=' ' ⋄ (m¬⊸/𝕩)∾(𝕨<∨´m)/"<br />" } - 𝕩 ↩ (/(≠𝕩)(-∾⊢)1) Trsp¨ 𝕩 - "p" Html ProcInline ¯1 ↓ JoinLines ((Lead ' '⊸=)+"\#"≡2⊸↑)⊸↓¨ 𝕩 - } - - lineChars‿lineClas‿procFns ← <˘⍉>⟨ - "" ‿ (!∘0) ‿ ProcParagraph - "#" ‿ LenHeading ‿ ProcHeading - " " ‿ IsCode ‿ ProcCode - # "-+*" ‿ LenBullet ‿ ProcBullet - # •d ‿ LenListNum ‿ ProcListNum - "|" ‿ IsTable ‿ ProcTable - ⟩ - - ###### - # Inline elements - ProcInline ← { - puncChars ← "!""#$%&'()*+,-./:;<=>?@[\]^_`{|}~" - I2M ← (≠𝕩)↑/⁼ # Index to mask - - # Code spans - ProcCodeSpan ← { - 𝕩 ↩ ' '¨⌾((𝕩=lf)⊸/) 𝕩 - 𝕩 ↩ (1↓¯1↓⊢)⍟((⊢<○(∧´)⊑∾⊑∘⌽) ' '⊸=) 𝕩 - "code" Html Highlight⍟doHighlight 𝕩 - } - tick ← 𝕩='`' - tend ← / (⊢ > 0⊸Shr) tick - tcount ← (1+↕∘≠)⊸(⊣-⌈`∘×) ¬ tick - tlen ← tend ⊏ tcount - c ← Trace´ tlen {m←(⊢=0⊸Shl)𝕨⋄(⌽⟜m/𝕩˜)¨1‿0}○((⍋tlen)⊸⊏) tend - cl ← (⊏˘c) ⊏ tcount - ctInds ← ⥊˘ 1 + c -⌜˘ cl×⌜1‿0 - include ← ¬ ≠` I2M ⥊ 0‿3⊸⊏˘ ctInds - codeStart ← I2M 1 ⊏˘ ctInds - codeGroup ← 1 -˜ codeStart (⊣×>)○(+`) I2M 2 ⊏˘ ctInds - code ← ProcCodeSpan¨ codeGroup ⊔ 𝕩 - - # Links - ReplaceMDSub ← { ¯2 (↓∾"html"˜)⍟(("md"≡↑)∧'/'∧´∘≠⊢) 𝕩 } - ReplaceMD ← { ReplaceMDSub⌾((⊑𝕩⊐"#")⊸↑) 𝕩 } - ProcLink ← { ∾⟨"<a href=""",(ReplaceMD 𝕩),""">",𝕨,"</a>"⟩ } - brak ← /∘(include ∧ 𝕩⊸=)¨ "]()[" - link ← (∊/⊣)´ 0‿¯1 + 2 ↑ brak - chains ← (⍋˜ ⊏ ⊢∾(≠𝕩)˜)` ¯1 ⌽ (<link) ∾ 2 ↓ brak - chains ↩ > (∧´ (∊ ∧ <⟜(≠𝕩))¨ 1 ↓ chains)⊸/¨ chains - linkStart ← I2M 0 ⊏ chains - lInds ← 1‿0‿2‿0⊸+˘ (⥊2⊸↕)˘ ⍉ chains - include ∧↩ ¬ ≠` I2M ⥊ (¯1‿1+0‿3⊸⊏)˘ lInds - linkGroup ← 1 -˜ (1‿0⥊˜≢)⊸(/ (⊣×>)○(+`I2M) ¬⊸/) ⥊lInds - links ← <∘ProcLink´˘ 2⊸(÷˜⟜≠∾⊣)⊸⥊ linkGroup ⊔ 𝕩 - - # Emphasis (still rudimentary) - eMasks ← (include ∧ 𝕩⊸=)¨ "*_" - eInds ← (⊢-2|⊢)∘≠⊸↑∘/¨ eMasks - include ∧↩ ¬∨´eMasks - eTags ← ∾ eInds ≠⊸⥊¨ <"<em>"‿"</em>" - - new ← ∾⟨eTags,code,links⟩ # Text to be added - inds← ∾eInds∾/¨codeStart‿linkStart # Where to add it - ((/include)∾(≠¨new)/inds) ⍋⊸⊏ (include/𝕩)∾∾new - } - - ###### - # Create the block structure using line classifications. - lengths ← ≠¨ 𝕩 # Length of each line - blanks ← (Lead ' '⊸=)¨ 𝕩 # Number of leading blanks - nonEmptyMask ← blanks < lengths # Empty ←→ all leading blanks - - # Get line classifications: type of line, and data to be passed into - # the line processor. Note that leading blanks aren't passed in. - lineType‿lineDat ← <˘⍉ > ClassifyLine¨ blanks ↓¨ 𝕩 - # Empty lines have type ¯1. - lineType ↩ ¯1¨⌾((¬nonEmptyMask)⊸/) lineType - - # Lines that could be included in code blocks (will be refined) - codeMask ← nonEmptyMask ∧ blanks ≥ 4 - paragraphMask ← 0 = lineType - # A header can't have 4 spaces of indentation. If it doesn't become - # part of a code block, it will be included in a paragraph. - lineType -↩ codeMask ∧ 1 = lineType - - # Code blocks consist of indented lines, possibly with blank lines - # in between. They must be separated from paragraphs by blank lines. - codeMask ∧↩ ¬ paragraphMask PrecedesGroup codeMask - codeMask ∨↩ codeMask (⊢ ∧ PrecedesGroup ∧ PrecedesGroup⌾⌽) lineType < 0 - lineType ↩ 2¨⌾(codeMask⊸/) lineType - - # Lines continue blocks if they are part of the same multi-line - # type as the previous line, and otherwise start new ones. - # Headers (type 1) always start new blocks. - blockStart ← nonEmptyMask ∧ (1 = lineType) ∨ ¯1⊸Shl⊸≠ lineType - # Headers and paragraphs ignore leading blanks. - drop ← blanks × lineType < 2 - # Group blocks based on blockStart, with type ¯1 lines excluded. - blocks ← (1 -˜ (lineType ≥ 0) × +`blockStart) ⊔ drop ↓¨ 𝕩 - - # To process a block, pick the appropriate function from procFns. - ProcBlock ← {t‿l G b: f←t⊑procFns ⋄ l F ⊑b } - JoinLines (blockStart / lineType≍˘lineDat) <∘ProcBlock˘ blocks -} - -################################ -# Testing -# Uses the test cases at https://spec.commonmark.org/0.29/spec.json -# since Github doesn't seem to have published theirs -TestSections ← { - doHighlight ↩ 0 - tests ← ¯2 ↓˘ 8⊸(÷˜⟜≠∾⊣)⊸⥊2↓•LNS •path∾"../spec.json" - tests ↩ ((⊑2+⊐⟜':')¨∘⊏ ((-','=¯1⊑⊢)↓↓)¨⎉1 ⊢) tests - testSection ← (1↓¯1↓⊢)¨ 5⊏˘tests - UnEsc ← { - esc ← (2 | (1+↕∘≠) (⊣-⌈`∘×) '\'≠⊢) 𝕩 - esc ¬⊸/ (("\"""∾•UCS 9‿10)⊏˜"\""tn"⊐⊢)⌾((¯1⌽esc)⊸/) 𝕩 - } - RunTest ← { - in‿exp ← UnEsc∘(1↓¯1↓⊢)¨2↑𝕩 - out ← Markdown (•UCS 10) ((⊢-˜¬×+`)∘=⊔⊢) in - ⟨exp≡out,in,exp,out,2⊑𝕩⟩ - } - - ignore ← (2 ⊏˘ tests) ∊ ⟨"47","85"⟩ - res ← 1 ↓˘ (¬⊏˘)⊸/ RunTest˘ tests /˜ ignore < testSection ∊ 𝕩 - doHighlight ↩ 1 - res -} - -################################ -# Syntax highlighting -doHighlight ← 1 -Highlight ← { - idChars ← ⟨ - •d∾"¯.π∞" - ' '+⌾•UCS•a - •a - "_" - ⟩ - classes‿chars ← <˘ ⍉ 2⊸(÷˜⟜≠∾⊣)⊸⥊⟨ - 0 , " "∾•UCS 9‿10 - "Value" , ¯1⊏˘5‿2⥊"𝕨𝕩𝕗𝕘𝕤" - "Function" , "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!"∾¯1⊏˘5‿2⥊"𝕎𝕏𝔽𝔾𝕊" - "Modifier" , "˜˘¨⌜⁼´`" - "Composition" , "∘○⊸⟜⌾⊘◶⎉⚇⍟" - "Number" , ∾idChars - "Gets" , "←↩→" - "Paren" , "()" - "Bracket" , "⟨⟩" - "Brace" , "{}" - "Ligature" , "‿" - "Nothing" , "·" - "Separator" , "⋄," - "Comment" , "#" - "String" , "'""" - ⟩ - classTag ← ""‿""∾>{⟨"<span class='"∾𝕩∾"'>","</span>"⟩}¨1↓classes - - r←𝕩='#'⋄s←/(≠↑2⊸↓)⊸∧𝕩='''⋄d←/𝕩='"' - b←⟨s⋄¯1↓d⋄/r⟩ Trace○∾ ⟨2+s⋄1↓d⋄(⊢-¯1↓0∾⊢)∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩ - sc←+´(1‿2-˜≠classes)×(≠`∨⊢)∘((≠𝕩)↑/⁼∘∾)¨2↑((⊏˘b)⊏r)⊔○(∾⟜2)<˘b - col←sc⌈14|chars FindGroup 𝕩 - - w←(≠↑0∾⊢)⊸<id←col=5 - idc←1+5|1-˜(idChars FindGroup w/𝕩)+'_'=((1↓∾⟜0)⊸<id)/𝕩 - col↩((id/+`w)⊏0∾idc)⌾(id⊸/)col - - col↩(1⌽col)⊣⌾((𝕩=⊑"𝕩")⊸/)col - - bd←(≠↑¯1∾⊢)⊸≠col - f←0<bd/col - tags←⥊f/(bd/col)⊏classTag - pos←⥊f/2↕/bd∾1 - ((↕≠𝕩)∾˜(≠¨tags)/pos) ⍋⊸⊏ 𝕩∾˜∾tags -} - -head ← "<head><link href=""style.css"" rel=""stylesheet""/></head>"∾lf -ConvertFile ← head ∾ Markdown∘•LNS |
