diff options
| author | Marshall Lochbaum <mwlochbaum@gmail.com> | 2020-07-17 21:25:22 -0400 |
|---|---|---|
| committer | Marshall Lochbaum <mwlochbaum@gmail.com> | 2020-07-17 21:25:22 -0400 |
| commit | 15908ba604c2a27b84a30d7ce91ceb7a8c1064aa (patch) | |
| tree | d7a456e917af68385310043041fdb303259476d7 /doc | |
| parent | 7d3af92a20237eed83b943fa74aa2a086e657658 (diff) | |
Mirror repository tree in docs/ and add html spec documents
Diffstat (limited to 'doc')
| -rwxr-xr-x | doc/gen | 3 | ||||
| -rw-r--r-- | doc/md.bqn | 394 |
2 files changed, 0 insertions, 397 deletions
diff --git a/doc/gen b/doc/gen deleted file mode 100755 index 8a70dfdd..00000000 --- a/doc/gen +++ /dev/null @@ -1,3 +0,0 @@ -#! /usr/bin/env bash - -for f in *.md; do ../dzref md.bqn "•←ConvertFile \"$PWD/$f\"" > ../docs/${f%md}html; done diff --git a/doc/md.bqn b/doc/md.bqn deleted file mode 100644 index 33261326..00000000 --- a/doc/md.bqn +++ /dev/null @@ -1,394 +0,0 @@ -# The Markdown function is a markdown to html converter for a "good -# enough" subset of Github-flavored markdown, as specified at -# https://github.github.com/gfm/ . -# -# Additionally, it highlights code sections as BQN, and executes -# sections that are doubly indented (eight spaces), placing their -# results below them. - -# Not supported: -# - Thematic breaks like *** or --- -# - Setext headings (underlined with ==== or ----) -# - Fenced code blocks (marked off with ``` or ~~~) -# - HTML blocks -# - Link reference definitions (who uses these?) -# - Block quotes (start with >) -# - Task lists - -# Here, a markdown file is represented as a list of its lines, which are -# strings (they don't include any line ending character). -# The html file is constructed directly as a string, using Html. - -################################ -# Utilities - -# Shift cells 𝕨 into array 𝕩, maintaining its total length -Shl ← ≠∘⊢ ↑ ∾ # From the left -Shr ← -∘≠∘⊢ ↑ ∾˜ # From the right - -# 𝕨 is a list of lists. Find the first of these lists each cell of 𝕩 -# belongs to. -FindGroup ← { - i ← (∾𝕨) ⊐ 𝕩 # Index in all cells of 𝕨 - e ← +`≠¨𝕨 # Index past the end of each group of 𝕨 - e ⍋ i # How many end-indices does each element pass? -} - -# 𝕨 is a list of possible expression start indices in any order and 𝕩 is -# the corresponding endpoints. The expressions are mutually exclusive -# and do not nest, and are enabled in index order. Return a shape ·‿2 -# array where the rows give the start and end of each enabled expression -# in index order. -Trace ← { - Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟{0=⊑⌽𝕩} - g←⍋𝕨 ⋄ s←g⊏𝕨 ⋄ e←g⊏𝕩 - st←¯1↓Se⟜(1↑˜≠)∾⟜≠s⍋e - st/s≍˘e -} - -# Count the number of consecutive true values up to the current element. -# To do this, subtract the index of the last false character from the -# current index. -CountRuns ← { (1+↕≠𝕩) (⊣ - ⌈`∘×) ¬𝕩 } - -# 𝕩 is a string; return a mask of the characters that are escaped, that -# is, preceded by an odd number of backslashes (since a backslash can -# escape another backslash). -IsEscaped ← { - 0 Shl 2 | CountRuns 𝕩 = '\' -} - -# Join lines with newline characters. Include the trailing newline. -JoinLines ← ∾ ∾⟜lf¨ - -# Create an html node from a tag name and interior text -Html ← { - ∾ ⟨"<",𝕨,">" , 𝕩 , "</",(⊑⊐⟜" ")⊸↑𝕨,">"⟩ -} - -Modify ← { ⟨include,add,pos⟩𝕊𝕩: - ((/include)∾(≠¨add)/pos) ⍋⊸⊏ (include/𝕩)∾∾add -} - -# Character entity escaping -# In order to use this with other modifications such as highlighting, -# CharEntities returns a mask of characters to be escaped, and their -# corresponding escapes. -CharEntities ← {1¨⊸𝕊𝕩; # 𝕨 gives characters to potentially escape - ce ← (1-˜¬×+`)∘=⟜⊑⊸⊔ " ""quot & <lt >gt" - chars ← ⊑¨ce ⋄ entities ← ("&"∾∾⟜";")¨ 1↓¨ce - ind ← chars ⊐ 𝕩 - useEntity ← 𝕨 ∧ ind < ≠chars - ⟨¬ useEntity , entities ⊏˜ useEntity/ind , /useEntity⟩ -} - -################################ -Markdown ← {𝕊𝕩:1𝕊𝕩; extensions𝕊𝕩: - ###### - # Utilities - - # Index of first zero, or number of leading 1s - Lead ← ⊑ ⊐⟜0 - - # Find whether 𝕨 was true at the last index where 𝕩 was true, in each - # position. - PrecedesGroup ← { - # We prepend a 0 to 𝕨, so that 0 is the "before start" index, with a - # false value, and normal indices are increased by 1. - 𝕨 ∾˜↩ 0 - inds ← 1 + ↕≠𝕩 - # Zero out indices where x was false, and find the greatest index so - # far at each position. - last ← ⌈` inds × ¬𝕩 - last ⊏ 𝕨 - } - - # Remove leading and trailing spaces - Trim ← { 𝕩 /˜ ¬ (∧` ∨ ∧`⌾⌽) ' '=𝕩 } - - ###### - # First we classify each line based on the type of block it can start. - ClassifyLine ← (0<≠)◶(0‿0)‿{ - ind ← ⊑ lineChars FindGroup ⊏𝕩 - getLen ← ind ⊑ lineClas∾⟨0˜⟩ - l ← GetLen 𝕩 - ⟨ind ∧ l>0 ⋄ l⟩ - } - - # Non-empty lines in code blocks have 4 leading spaces - IsCode ← 4 (≤⟜≠)◶⟨0,∧´' '=↑⟩ ⊢ - ProcCode ← { - lines ← JoinLines 4 ↓¨ 𝕩 - c‿ci ← extensions◶(2⥊<⟨⟩)‿GetHighlights lines - em‿e‿ei ← CharEntities lines - mod ← ⟨em,e∾c,ei∾ci⟩ Modify lines - "pre" Html "code" Html⍟(¬extensions) mod - } - - # Headings start with #, and require 1-6 #s followed by a space. - # Any trailing #s are ignored. - LenHeading ← { - n ← Lead 𝕩='#' - l ← (0<n) ∧ (6≥n) - s ← n (<⟜≠)◶⟨1,' '=⊑⟩ 𝕩 # Character after hashes must be a space, if any - n × l ∧ s - } - ProcHeading ← { - tag ← "h" ∾ 𝕨⊏•d # h3 for 3 hashes, etc. - 𝕩 ↓˜↩ 𝕨+1 - trsp ← ∧`⌾⌽ 𝕩=' ' - tail ← ∧`⌾⌽ trsp∨𝕩='#' # Mask of trailing hashes - f ← tail < 0 Shr tail # Character before trailing hashes - 𝕩 /˜↩ ¬ f (⊑⟨"\"," ",""⟩⊐<f/𝕩)◶⟨⊣,⊢,⊢,0¨⊢⟩ tail - # Add an id: lowercase the header, replacing non-•a with hyphens - Slugify ← { - ch ← •UCS "-Aa" - bounds ← ⥊ (1↓ch) +⌜ 0‿26 # Of the upper and lowercase alphabet - (bounds⊸⍋ {(⊑ch)¨⌾((¬2|𝕨)⊸/)𝕩+32×1=𝕨} ⊢)⌾•UCS 𝕩 - } - extensions { tag ∾↩ " id="∾""""(∾∾⊣) Slugify 𝕩 }⍟⊣ 𝕩 - tag Html ProcInline Trim 𝕩 - }⟜⊑ - - # List items start with a bullet (unordered) or number (ordered). - LenBullet ← +⟜× ·≤⟜4⊸× ·Lead ' '=1⊸↓ - ProcBullet ← { - "ul" Html lf ∾ JoinLines ("li" Html ProcInline)¨ 𝕨 ↓¨ 𝕩 - } - LenListNum ← { # Not used yet - n ← Lead 𝕩∊•d - l ← (1≤n) ∧ (9≥n) - ' ' = n ↓ 𝕩 - t ← n↓(n+2)↑𝕩 - l ∧ (" " ≡ 1↓t) ∧ ⊑(")." ∊˜ 1↑t) - } - - # Any line that starts with a | is a table, at least in my lazy version - IsTable ← 1˜ - ProcTable ← { - CutRow ← { - b ← '|' = 𝕩 - r ← b > 0 Shl '\' = 𝕩 - 1 -˜ (¬r∨1⌽b>r) × +`r - } - rows ← (Trim¨ CutRow⊸⊔)¨ 𝕩 - inc ← ¬ rule ← ∧´∘∾¨'-'=rows - rows ↩ ProcInline¨¨⌾(inc⊸/) rows - rows ↩ (⊏rows) (⊢ ∾ ⟨""⟩ /˜ 0⌈-○≠)¨ rows - rowType ← inc / +` rule # Head or body - DoRow ← { lf ∾ JoinLines 𝕨⊸Html¨ 𝕩 } - rows ↩ (rowType ⊏ "th"‿"td") DoRow¨ inc/rows - rowGroups ← ¯1 ↓ rowType ⊔○(∾⟜2) "tr"⊸Html¨ rows - sections ← "thead"‿"tbody" Html⟜(lf ∾ JoinLines)¨ rowGroups - "table" Html lf ∾ JoinLines (0 < ≠¨rowGroups) / sections - } - - # Paragraphs - ProcParagraph ← { - "p" Html ProcInline ¯1 ↓ JoinLines Trim⌾(¯1⊸⊑) (Lead ' '⊸=)⊸↓¨ 𝕩 - } - - # HTML blocks - # Lazy rule: if it starts with < and contains >, it's probably HTML - IsHtmlBlock ← ⊑ ">"⊸∊ - ProcHtmlBlock ← { - codeMask ← "<code>" ¯6⊸⌽⊸(>○(⌈`(1+↕∘≠)⊸×))○(⍷⟜𝕩 ∾ 0⥊˜1-˜≠) "</code>" - (1¨ <⊸∾ codeMask⊸GetMultiHighlights)⊸Modify 𝕩 - }⍟extensions⟜JoinLines - - lineChars‿lineClas‿procFns ← <˘⍉>⟨ - "" ‿ (!∘0) ‿ ProcParagraph - "#" ‿ LenHeading ‿ ProcHeading - " " ‿ IsCode ‿ ProcCode - "-+*" ‿ LenBullet ‿ ProcBullet - # •d ‿ LenListNum ‿ ProcListNum - "|" ‿ IsTable ‿ ProcTable - "<" ‿ IsHtmlBlock ‿ ProcHtmlBlock - ⟩ - - ###### - # Inline elements - ProcInline ← { - I2M ← (≠𝕩) ↑ /⁼ # Index to mask - punc ← 𝕩 ∊ "!""#$%&'()*+,-./:;<=>?@[\]^_`{|}~" - actual ← ¬ punc ∧ IsEscaped 𝕩 # backtick or *actual* backtick? - - # Code spans - tick ← 𝕩 = '`' - tend ← / (⊢ > 0⊸Shr) tick - tcount ← CountRuns tick - # 𝕨 are tick lengths and 𝕩 are positions, both sorted by length - MatchTicks ← { - # Tick runs other than the last of each length - notLast ← (⊢=0⊸Shr) 𝕨 - # Ticks preceded by backslashes can't start code blocks, but can - # end them. This approach is wrong for multiple ticks with a - # leading backslash in front, which are excluded but should just - # be treated as one shorter when leading. - filter ← notLast / (𝕩¬𝕨) ⊏ actual - # For leading ticks, filter by not-last; for trailing ones, rotate - # by ¯1 to filter by not-first. - (filter / ⌽⟜notLast / 𝕩˜)¨ 0‿¯1 - } - tlen ← tend ⊏ tcount - c ← Trace´ tlen MatchTicks○((⍋tlen)⊸⊏) tend - cl ← (⊏˘c) ⊏ tcount - ctInds ← ⥊˘ 1 + c -⌜˘ cl×⌜1‿0 - codeMask ← ≠` I2M ⥊ codeBounds ← 1‿2⊸⊏˘ ctInds - 𝕩 ↩ ' '¨⌾((codeMask∧𝕩=lf)⊸/) 𝕩 - # If span has both a leading and a trailing space, they are removed. - remSpace ← I2M ⥊ ((1<-˜´˘)∧·∧´˘' '=⊏⟜𝕩)⊸/ -⟜0‿1˘ codeBounds - codeMask ∧↩ ¬ remSpace - ⟨code,codePos⟩ ← codeMask extensions◶(2⥊<⟨⟩)‿GetMultiHighlights 𝕩 - include ← ¬ remSpace ∨ ≠` I2M ⥊ ctInds - codeBounds ↩ ⥊ -⟜1‿0˘ codeBounds - unused ← actual ∧ include ∧ ¬ codeMask - - # Links - ReplaceMDSub ← { ¯2 (↓∾"html"˜)⍟(("md"≡↑)∧'/'∧´∘≠⊢) 𝕩 } - ReplaceMD ← { ReplaceMDSub⌾((⊑𝕩⊐"#")⊸↑) 𝕩 } - ProcLink ← { ∾⟨"<a href=""",(ReplaceMD 𝕩),""">",𝕨,"</a>"⟩ } - brak ← /∘(unused ∧ 𝕩⊸=)¨ "]()[" - link ← (∊/⊣)´ 0‿¯1 + 2 ↑ brak - chains ← (⍋˜ ⊏ ⊢∾(≠𝕩)˜)` ¯1 ⌽ (<link) ∾ 2 ↓ brak - chains ↩ > (∧´ (∊ ∧ <⟜(≠𝕩))¨ 1 ↓ chains)⊸/¨ chains - linkPos ← 0 ⊏ chains - lInds ← 1‿0‿2‿0⊸+˘ (⥊2⊸↕)˘ ⍉ chains - unused ∧↩ include ∧↩ ¬ ≠` I2M ⥊ (¯1‿1+0‿3⊸⊏)˘ lInds - linkGroup ← 1 -˜ (1‿0⥊˜≢)⊸(/ (⊣×>)○(+`I2M) ¬⊸/) ⥊lInds - links ← <∘ProcLink´˘ 2⊸(÷˜⟜≠∾⊣)⊸⥊ linkGroup ⊔ 𝕩 - - # Emphasis (still rudimentary) - eMasks ← (unused ∧ 𝕩⊸=)¨ "*_" - eInds ← ∾ (⊢-2|⊢)∘≠⊸↑∘/¨ eMasks - include ∧↩ ¬ I2M ∧ eInds - eTags ← eInds ≠⊸⥊ "<em>"‿"</em>" - eInds ∾↩ codeBounds - eTags ∾↩ codeBounds ≠⊸⥊ "<code>"‿"</code>" - - # Remove backslashes used for escaping - include ∧↩ codeMask ∨ 1 ⌽ actual - - em‿ent‿ei ← include CharEntities 𝕩 - include ∧↩ em - - add ← ∾⟨eTags,ent,code,links⟩ # Text to be added - pos ← ∾⟨eInds,ei,codePos,linkPos⟩ # Where to add it - ⟨include,add,pos⟩ Modify 𝕩 - } - - ###### - # Create the block structure using line classifications. - lengths ← ≠¨ 𝕩 # Length of each line - blanks ← (Lead ' '⊸=)¨ 𝕩 # Number of leading blanks - nonEmptyMask ← blanks < lengths # Empty ←→ all leading blanks - - # Get line classifications: type of line, and data to be passed into - # the line processor. Note that leading blanks aren't passed in. - lineType‿lineDat ← <˘⍉ > ClassifyLine¨ blanks ↓¨ 𝕩 - # Empty lines have type ¯1. - lineType ↩ ¯1¨⌾((¬nonEmptyMask)⊸/) lineType - - # Lines that could be included in code blocks (will be refined) - codeMask ← nonEmptyMask ∧ blanks ≥ 4 - paragraphMask ← 0 = lineType - # A header can't have 4 spaces of indentation. If it doesn't become - # part of a code block, it will be included in a paragraph. - lineType -↩ codeMask ∧ 1 = lineType - - # Code blocks consist of indented lines, possibly with blank lines - # in between. They must be separated from paragraphs by blank lines. - codeMask ∧↩ ¬ paragraphMask PrecedesGroup codeMask - codeMask ∨↩ codeMask (⊢ ∧ PrecedesGroup ∧ PrecedesGroup⌾⌽) lineType < 0 - lineType ↩ 2¨⌾(codeMask⊸/) lineType - - # Lines continue blocks if they are part of the same multi-line - # type as the previous line, and otherwise start new ones. - # Headers (type 1) always start new blocks. - blockStart ← nonEmptyMask ∧ (1 = lineType) ∨ ¯1⊸Shl⊸≠ lineType - # Headers and paragraphs ignore leading blanks. - drop ← blanks × lineType < 2 - # Group blocks based on blockStart, with type ¯1 lines excluded. - blocks ← (1 -˜ (lineType ≥ 0) × +`blockStart) ⊔ drop ↓¨ 𝕩 - - # To process a block, pick the appropriate function from procFns. - ProcBlock ← {t‿l G b: f←t⊑procFns ⋄ l F ⊑b } - JoinLines (blockStart / lineType≍˘lineDat) <∘ProcBlock˘ blocks -} - -################################ -# Testing -# Uses the test cases at https://spec.commonmark.org/0.29/spec.json -# since Github doesn't seem to have published theirs -TestSections ← { - tests ← ¯2 ↓˘ 8⊸(÷˜⟜≠∾⊣)⊸⥊2↓•LNS •path∾"spec.json" - tests ↩ ((⊑2+⊐⟜':')¨∘⊏ ((-','=¯1⊑⊢)↓↓)¨⎉1 ⊢) tests - testSection ← (1↓¯1↓⊢)¨ 5⊏˘tests - UnEscape ← { - EscapeChar ← { ("\""tn"⊐𝕩) ⊏ "\"""∾•UCS 9‿10 } - esc ← IsEscaped 𝕩 - (¬1⌽esc) / EscapeChar⌾(esc⊸/) 𝕩 - } - RunTest ← { - in‿exp ← UnEscape∘(1↓¯1↓⊢)¨2↑𝕩 - out ← 0 Markdown (•UCS 10) ((⊢-˜¬×+`)∘=⊔⊢) in - ⟨exp≡out,in,exp,out,2⊑𝕩⟩ - } - - ignore ← (2 ⊏˘ tests) ∊ ⟨"47","85"⟩ - res ← 1 ↓˘ (¬⊏˘)⊸/ RunTest˘ tests /˜ ignore < testSection ∊ 𝕩 - res -} - -################################ -# Syntax highlighting -GetHighlights ← { - idChars ← ⟨ - •d∾"¯.π∞" - ' '+⌾•UCS•a - •a - "_" - ⟩ - classes‿chars ← <˘ ⍉ 2⊸(÷˜⟜≠∾⊣)⊸⥊⟨ - 0 , " "∾•UCS 9‿10 - "Value" , ¯1⊏˘5‿2⥊"𝕨𝕩𝕗𝕘𝕤" - "Function" , "+-×÷⋆√⌊⌈|¬∧∨<>≠=≤≥≡≢⊣⊢⥊∾≍↑↓↕⌽⍉/⍋⍒⊏⊑⊐⊒∊⍷⊔!"∾¯1⊏˘5‿2⥊"𝕎𝕏𝔽𝔾𝕊" - "Modifier" , "˜˘¨⌜⁼´`" - "Composition" , "∘○⊸⟜⌾⊘◶⎉⚇⍟" - "Number" , ∾idChars - "Gets" , "←↩→" - "Paren" , "()" - "Bracket" , "⟨⟩" - "Brace" , "{}" - "Ligature" , "‿" - "Nothing" , "·" - "Separator" , "⋄," - "Comment" , "#" - "String" , "'""" - ⟩ - classTag ← ""‿""∾>{⟨"<span class='"∾𝕩∾"'>","</span>"⟩}¨1↓classes - - r←𝕩='#'⋄s←/(≠↑2⊸↓)⊸∧𝕩='''⋄d←/𝕩='"' - b←⟨s⋄¯1↓d⋄/r⟩ Trace○∾ ⟨2+s⋄1↓d⋄(⊢-¯1↓0∾⊢)∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩ - sc←+´(1‿2-˜≠classes)×(≠`∨⊢)∘((≠𝕩)↑/⁼∘∾)¨2↑((⊏˘b)⊏r)⊔○(∾⟜2)<˘b - col←sc⌈14|chars FindGroup 𝕩 - - w←(≠↑0∾⊢)⊸<id←col=5 - idc←1+5|1-˜(idChars FindGroup w/𝕩)+'_'=((1↓∾⟜0)⊸<id)/𝕩 - col↩((id/+`w)⊏0∾idc)⌾(id⊸/)col - - col↩(1⌽col)⊣⌾((𝕩=⊑"𝕩")⊸/)col - - bd←(≠↑¯1∾⊢)⊸≠col - bc←bd/col - (⥊(0<bc)⊸/)¨⟨bc⊏classTag,2↕1-˜/bd∾1⟩ -} -# Return highlights for areas in 𝕩 where 𝕨 is true. -GetMultiHighlights ← { - start ← 0⊸Shl⊸< 𝕨 - groups ← (1 -˜ 𝕨 × +` start) ⊔ 𝕩 - <∘∾˘ ⍉ ((≠∾2˜) ⥊ ·> (/start) {𝕨⊸+⌾(1⊸⊑)𝕩}⟜GetHighlights¨ ⊢) groups -} - -head ← "<head><link href=""style.css"" rel=""stylesheet""/></head>"∾lf -ConvertFile ← head ∾ Markdown∘•LNS |
