# The Markdown function is a markdown to html converter for a "good # enough" subset of Github-flavored markdown, as specified at # https://github.github.com/gfm/ . # Extensions are used whenever a source filename is given (mainly just # so testing won't use them). They: # - Add id= slugs to headers that match Githubs, for linking # - Adjust relative links to account for filename changes # - Highlight inline and block code as BQN # - Place code blocks in
tags only, not# - Insert results into doubly-indented (8 spaces) code blocks # - Add links to open and execute code in the REPL # Supports: # - ATX headings (start with hashes #) # - Paragraphs # - Indented code blocks # - Inline and raw HTML in a way that doesn't match the spec at all # - Tables # - Lists, unordered with single-line items only; can be nested # - Inlines: code fully, links partially, and emphasis somewhat # Important missing features: # - Thematic breaks like *** or --- # - Setext headings (underlined with ==== or ----) # - Fenced code blocks (marked off with ``` or ~~~) # - Block quotes (start with >) # - Strikethrough (~~text~~) # - Images (like links) # - Hard line breaks (trailing spaces or backslash) # Here, a markdown file is represented as a list of its lines, which are # strings (they don't include any line ending character). # The html file is constructed directly as a string, using Html. ################################ # Utilities # Linefeed lf ← @+10 # Index of first zero, or number of leading 1s in a boolean list Lead ← ⊑ ⊐⟜0 # 𝕨 is a list of lists. Find the first of these lists each cell of 𝕩 # belongs to. FindGroup ← { i ← (∾𝕨) ⊐ 𝕩 # Index in all cells of 𝕨 e ← +`≠¨𝕨 # Index past the end of each group of 𝕨 e ⍋ i # How many end-indices does each element pass? } # Count the number of consecutive true values up to the current element. # To do this, subtract the index of the last false character from the # current index. CountRuns ← { (1+↕≠𝕩) (⊣ - ⌈`∘×) ¬𝕩 } # 𝕩 is a string; return a mask of the characters that are escaped, that # is, preceded by an odd number of backslashes (since a backslash can # escape another backslash). # Another implementation is {»<`𝕩='\'}. IsEscaped ← { » 2 | CountRuns 𝕩 = '\' } # Remove leading (∧`) and trailing (∧`⌾⌽) spaces Trim ← { 𝕩 /˜ ¬ (∧` ∨ ∧`⌾⌽) ' '=𝕩 } # Find whether 𝕨 was true at the last index where 𝕩 was false, in each # position. PrecedesGroup ← { # We prepend a 0 to 𝕨, so that 0 is the "before start" index, with a # false value, and normal indices are increased by 1. 𝕨 ∾˜↩ 0 inds ← 1 + ↕≠𝕩 # Zero out indices where 𝕩 was true, and find the greatest index so # far at each position. last ← ⌈` inds × ¬𝕩 last ⊏ 𝕨 } # 𝕨 is a list of possible expression start indices in any order and 𝕩 is # the corresponding endpoints. The expressions are mutually exclusive # and do not nest, and are enabled in index order. Return a shape ·‿2 # array where the rows give the start and end of each enabled expression # in index order. Trace ← { # 𝕨 is a list with one index for each possible start, giving a later # start that is known to be enabled if that one is. # 𝕩 is a list of all starts known to be enabled. # A "stop" position that follows all expressions tells when to stop. # At each step the distance from a start to its successor in 𝕨 is # doubled, so the maximum number of steps is about 2⋆⁼≠𝕩. En ← { 𝕩 ∾↩ 𝕩⊏𝕨 # Add starts following from an enabled one 𝕨 ↩ ⊏˜ 𝕨 # Double the number of steps in 𝕨 𝕨 En 𝕩 # Repeat }⍟{stop≠¯1⊑𝕩} # until the stop is reached g ← ⍋𝕨 # Order expressions by starting index start ← g⊏𝕨 end ← g⊏𝕩 next ← start ⍋ end # An expression's successor starts after it ends next ∾↩ stop←≠next # The stop node is its own successor enabled ← stop⊸>⊸/ next En ⋈0 # Search, then remove stops enabled ⊏ start≍˘end # List of enabled starts and ends } # Join lines with newline characters. Include a trailing newline. JoinLines ← ∾ ∾⟜lf¨ # Given a list of begin-end pairs run together, return a list Ranges ← { R ← {𝕨+↕𝕩¬𝕨} # Single range 𝕩 ↩ ∘‿2 ⥊ 𝕩 # Reshape into pairs ∾ R¨˝˘ 𝕩 } # ∊⟜Ranges assuming 𝕨 is sorted InRanges ← { 𝕩 +↩ 2|↕≠𝕩 # Since ⍋ works with half-open intervals 2 | 𝕩 ⍋ 𝕨 } # Create an html node from a tag name and interior text. Html ← {open 𝕊 contents: close ← (⊑open⊐" ") ↑ open ∾ ⟨"<",open,">" , contents , "",close,">"⟩ } # Insert and remove things from the list 𝕩: # - include is the mask of elements to keep in 𝕩 # - add is a list of lists to be inserted # - pos is the list of positions where they should start # Elements are added just after the given position in 𝕩, in the order # they appear in ∾add. Modify ← { ⟨include,add,pos⟩𝕊𝕩: ((/include)∾(≠¨add)/pos) ⍋⊸⊏ (include/𝕩)∾∾add } # URL encoding for links to the REPL UTF8 ← ∾ (2⋆7) (⊣+(2⋆6){𝕨 ≤◶⟨⥊⊢-2×-⟜𝕗 ⋄ 𝕗(|∾˜(2÷˜⌊⟜𝕨)𝕊⌊∘÷˜)⊢⟩ 𝕩}¨) -⟜@ Base64 ← { b64 ← Ranges "AZaz09++//" b←3|↕l←≠u←UTF8 𝕩 M←((0<↕4)⥊˜≠)⊸× (1+0=b)⊸/ v←(4⋆1+b) ((⌊∘÷˜) «⊸+○M (64÷⊣)×|) u (v⊏b64)∾(3|-l)⥊'=' } # Various URLs siteURL ← "https://mlochbaum.github.io/BQN/" tryURL ← siteURL∾"try.html#code=" repoURL ← "https://github.com/mlochbaum/BQN" blobURL ← repoURL∾"/blob/master/" # Environments NewREPL ← •ReBQN∘{repl⇐"strict"} _getCodeExec ← {𝕗⋄NewREPL@} _getSvgExec ← {𝕗 e←NewREPL@ ⟨"","",GetHighlights‿Modify‿E⟩ E "GetHighlights‿Modify‿Eval←•args" E •file.Chars "svg.bqn" JoinLines⍟(1<≡)∘E } ################################ Markdown ← {filename𝕊𝕩: extensions ← filename ≢ 0 path ← extensions◶""‿(⊢/˜·∨`⌾⌽'/'⊸=) filename CodeExec ← @_getCodeExec GenHtml ← @_getSvgExec ###### # First we classify each line based on the type of block it can start. ClassifyLine ← (0<≠)◶(0‿0)‿{ ind ← ⊑ lineChars FindGroup ⊏𝕩 getLen ← ind ⊑ lineClas∾⟨0⟩ l ← GetLen 𝕩 ⟨ind ∧ l>0 ⋄ l⟩ } # Character entity escaping # In order to use this with other modifications such as highlighting, # CharEntities returns a mask of characters to be escaped, and their # corresponding escapes. CharEntities ← {1¨⊸𝕊𝕩; # 𝕨 gives characters to potentially escape # The string gives escapes and their names, separated by spaces. # First split it on the first character. ce ← (1-˜¬×+`)∘=⟜⊑⊸⊔ " ""quot &gt" # Characters to escape are given first chars ← ⊑¨ce # HTML character entities start with & and end with ; entities ← ("&"∾∾⟜";")¨ 1↓¨ce # Replace a character if 𝕨 is not set and it's on our list. ind ← chars ⊐ 𝕩 useEntity ← 𝕨 ∧ ind < ≠chars ⟨¬ useEntity , entities ⊏˜ useEntity/ind , /useEntity⟩ } # Function to build REPL link # May include previous statements to define variables makeLink ← { lines ← names ← deps ← ⟨⟩ { ns‿assigned‿line 𝕊 𝕩: M ← ⍷∘∧∘∾ # Merge n ← ≠ ls ← 𝕩 ⊏˜ ⍷line # Lines with variables lc ← (≠lines) + ↕n # Indices they'll have lines ∾↩ ls JoinLines 𝕩 ∾˜ lines ⊏˜ (-n) ↓ M lc { l𝕊[n,a]: e ← (≠names) > i ← names ⊐ n # Look up the names d ← l ∾˜ M (e/i) ⊏ deps # Dependencies for this line deps M⟜d¨⌾(((a∧e)/i)⊸⊏)↩ # Add these to reassigned names names ∾↩ new ← (a∧¬e) / n # Add new names deps ∾↩ d¨ new # With this line's dependencies d }⟜⍉¨ (⊐line) ⊔ ns≍˘assigned } } # Non-empty lines in code blocks have 4 leading spaces ProcCode ← { # Strip the leading spaces lines ← 4 ↓¨ 𝕩 code ← JoinLines lines # Highlight and unescape html-unsafe characters c‿ci ← extensions◶(⋈˜⟨⟩)‿GetHighlights code em‿e‿ei ← CharEntities code # If every line is indented by at least 4 additional spaces, we will # execute each one and insert the results. r‿ri‿link ← { extensions ? ∧´ ' ' = ∾ 4 (⌊⟜≠ ↑ ⊢)¨ lines ? # Top-level separators are those not inside brackets m ← NotCommentOrString code depth ← +` m × "(){}⟨⟩[]" (⊣(≠⊸>×·¬⊸-2|⊢)⊐) code top ← m ∧ 0=depth sep ← top ∧ code∊"⋄,"∾lf # Top-level separators break ← sep∧lf=code # Mask of line breaks # Don't show assignment result PG ← PrecedesGroup a ← m∧code∊"←↩" ⋄ sid ← m∧code∊" "∾∾idChars sa ← a ∧ ¬(¬sep) PG sid∨a # Silent assignment show ← ¬ break / sa PG ¬sa∨»sep # If last expression began with one # Remove indentation and split lines notIndent ← (0⊸=∨4⊸<) CountRuns ¬m∧lf=code parts ← code ⊔˜ 1 -˜ (notIndent∧¬break) × 1+`break # Evaluate E ← ⊣◶⟨"",(⥊∾⟜lf⎉1)∘•Fmt⊢⟩⟜CodeExec ShowErr ← lf∾˜"span class='Error'"Html"Error: "∾(∧`lf⊸≠)⊸/⎊•Repr r ← show ('#'≠⊑∘⊢)◶⟨"",E⎊(ShowErr∘•CurrentError⊢)⟩⍟(0<≠∘⊢)¨ parts # Link that runs the code # Parse assignments and variables to add previous lines if needed # First locate the names In ← 1=+⟜(↕2)⊸⍋ ma‿sp‿st ← {m∧code=𝕩}¨"↩ ‿" ⋄ id←sp » '\' = 𝕩 # Non-escaped bars 1 -˜ (¬r∨«b>r) × o + +` r } alignments ← (" align="""∾∾⟜"""")⍟(0<≠)¨ ""‿"right"‿"left"‿"center" ProcTable ← { rows ← (Trim¨ CutTableRow⊸⊔)¨ 𝕩 incl ← ¬ rule ← (∧´∾∊"-:"˙)¨ rows align ← alignments ⊏˜ (+˜⊸+´0‿¯1⊏⊢)¨ ':' = ⊑ rule / rows rows ↩ (((≠align)⌊≠)⊸↑ ProcInline¨)¨⌾(incl⊸/) rows rows ↩ (⊏rows) (⊢ ∾ ⟨""⟩ /˜ 0⌈-○≠)¨ rows rowType ← incl / +` rule # Head or body tags ← rowType ⊏ "th"‿"td" DoRow ← { lf ∾ JoinLines 𝕨 Html¨ 𝕩 } rows ↩ (<˘ tags ∾⌜ align) DoRow¨ incl/rows rowGroups ← (rowType∾2) ⊔ "tr"⊸Html¨ rows sections ← "thead"‿"tbody" Html⟜(lf ∾ JoinLines)¨ rowGroups "table" Html lf ∾ JoinLines (0 < ≠¨rowGroups) / sections } # Paragraphs ProcParagraph ← { "p" Html ProcInline ¯1 ↓ JoinLines Trim⌾(¯1⊸⊑) (Lead ' '⊸=)⊸↓¨ 𝕩 } # HTML blocks # Lazy rule: if it starts with < and contains >, it's probably HTML IsHtmlBlock ← (""⍷⊑⟜𝕩 lastCommentEnd ← ¯1 comInd ← ∾ comments ← { lastCommentEnd ↩ end ← {𝕊⍟(¬EndsComment)1+𝕩}⍟(lastCommentEnd⊸<) 𝕩-1 𝕩 + ↕end¬𝕩 # A list of indices }¨ commentStart newBlock ← (≠𝕩)↑/⁼ ⊑¨ (0<≠¨)⊸/ comments lineType 5¨⌾(comInd⊸⊏)↩ lineDat 2¨⌾(comInd⊸⊏)↩ # Lines that could be included in code blocks (will be refined) codeMask ← nonEmptyMask ∧ (lineType ≠ 5) ∧ blanks ≥ 4 paragraphMask ← 0 = lineType # A header can't have 4 spaces of indentation. If it doesn't become # part of a code block, it will be included in a paragraph. lineType -↩ codeMask ∧ 1 = lineType # Tables are made up of rows that would otherwise be paragraph rows. # They are indicated by the delimiter row, consisting of only a few # allowed characters, preceded (!) by a header row with the same # number of cells. IsTD ← (∧´ ∊ ∾ ⊣ ∊˜ 2↑⊢)⟜"-|: " tableMask ← (0⌾⊑ nonEmptyMask) ∧ paragraphMask ∧¬ codeMask tableDelimMask ← { 𝕩 IsTD¨∘⊣⌾(𝕨⊸/) 𝕨 }⟜𝕩 tableMask delimValid ← (⊢ =○(≠∘⊔∘CutTableRow¨ ⊏⟜𝕩) -⟜1) / tableDelimMask headerMask ← « delimValid⌾(tableDelimMask⊸/) 0¨𝕩 tableMask ↩ headerMask (⊢ ∧ ⊣ ∨ ⊣ PrecedesGroup <) tableMask lineType 3¨⌾(tableMask⊸/)↩ # Code blocks consist of indented lines, possibly with blank lines # in between. They must be separated from paragraphs by blank lines. codeMask ∧↩ ¬ paragraphMask PrecedesGroup codeMask codeMask ∨↩ codeMask (⊢ ∧ PrecedesGroup ∧ PrecedesGroup⌾⌽) lineType < 0 lineType 2¨⌾(codeMask⊸/)↩ # List items continue over following indented lines listMask ← (0=blanks) ∧ 4 = lineType listIndent ← blanks ≥ »blanks + listMask×lineDat listMask ↩ codeMask < listMask (⊣ PrecedesGroup <) listIndent lineType 4¨⌾(listMask⊸/)↩ # Lines continue blocks if they are part of the same multi-line # type as the previous line, and otherwise start new ones. # Headers (type 1) always start new blocks. newBlock ∨↩ 1 = lineType blockStart ← nonEmptyMask ∧ newBlock ∨ ¯1⊸»⊸≠ lineType # Headers and paragraphs ignore leading blanks. drop ← blanks × lineType < 2 # Group blocks based on blockStart, with type ¯1 lines excluded. blocks ← (1 -˜ (lineType ≥ 0) × +`blockStart) ⊔ drop ↓¨ 𝕩 # To process a block, pick the appropriate function from procFns. ProcBlock ← {t‿l G b: f←t⊑procFns ⋄ l F ⊑b } b ← (blockStart / lineType≍˘lineDat) <∘ProcBlock˘ blocks JoinLines b } ################################ # Syntax highlighting # Characters in identifiers. These are also used in ProcCode to detect # if a statement is an assignment. idChars ← ⟨ ('0'+↕10)∾"¯.π∞" "𝕣"∾˜'a'+↕26 'A'+↕26 "_" ⟩ # Return BQN highlights for an string 𝕩, as an ⟨add,pos⟩ list for Modify # (include will be all 1s). # 𝕨 indicates a character from 𝕩 is a divider, which ends comments and # can't be contained in string literals. hlchars‿classTag ← { func‿mod1‿mod2 ← •Import "src/glyphs.bqn" # Characters used by BQN, and the HTML class they are associated with. classes‿chars ← <˘ ⍉ ∘‿2⥊⟨ 0 , " "∾@+9‿10 # Should never be highlighted "Value" , "𝕨𝕩𝕗𝕘𝕤" "Function" , func∾"𝕎𝕏𝔽𝔾𝕊" "Modifier" , mod1 "Modifier2" , mod2 "Number" , ∾idChars # Will be classified among ↑↑ later "Gets" , "←⇐↩→" "Paren" , "()" "Bracket" , "⟨⟩[]" "Brace" , "{}" "Head" , ":;?" "Ligature" , "‿" "Nothing" , "·" "Separator" , "⋄," "String" , "'""@" "Comment" , "#" ⟩ # Turn non-whitespace classes into ⟨open,close⟩ html tags. classTag ← ""‿"" ∾ > {⟨"",""⟩}¨ 1↓classes chars‿classTag } CommentStringLocations ← { c ← 𝕩='#' le← / (𝕨 ⊢⊘∨ 𝕩=lf) ∾ 1 # Line endings (le) end every comment (/c) on the line, so take a copy # for each # before that line but not the previous. ce← le /˜ -⟜» c/⊸⍋le # A single quote can only be used if there's another two places down. s ← /0‿0⊸«⊸∧𝕩=''' d ← /𝕩='"' css ← ∾ ⟨ s ⋄ ¯1↓d ⋄ /c ⟩ # Comment or string start cse ← ∾ ⟨ 2+s ⋄ 1↓d ⋄ ce ⟩ # Corresponding end indices # If 𝕨 is given, filter out strings with ends in different divisions. {css‿cse <∘=○(⊏⟜(+`0∾𝕩))´⊸(/¨)↩} 𝕨 # Table of (start,end) pairs b ← css Trace cse # Return the table, and a mask of which rows are comments ⟨b, (⊏˘b)⊏c⟩ } NotCommentOrString ← { i‿c ← 𝕨 CommentStringLocations 𝕩 i +↩ (¬c) ×⌜ 0‿1 # Strings include ending character; comments don't 1 ≠` (≠𝕩) ↑ 2|/⁼⥊i } GetHighlights ← { # Find each character's group, sending unknowns to 1 and # to 0. col ← (1-˜≠hlchars) (⊢-⊣×≤) hlchars FindGroup 𝕩 col-↩ 4×(𝕩='.')>«𝕩∊'0'+↕10 # Namespace dot: 5→1 # Table of start/end pairs, and which are comments b‿c ← 𝕨 CommentStringLocations 𝕩 # Given a list of pairs, get a mask indicating included regions ToMask ← (≠`∨⊢) (≠𝕩)↑/⁼∘∾ # Split rows and group into text‿comments tc ← c ∾⟜2⊸⊔ <˘b # Color with "String" and "Comment" col ⌈↩ +´ (2‿1-˜≠classTag) × ToMask¨ tc # Color numeric literals and identifiers id ← col=5 # ←→ 𝕩∊idChars w ← »⊸< id # Word (identifier or number) beginning mask wt ← idChars FindGroup w/𝕩 # Type based on first character wt+↩ '_' = («⊸ 𝕨 ⟨c, pos + adj⟩ } ################################ # Creating HTML files ConvertFile ← { MatchStart‿MatchEnd ← { ≤○≠◶0‿(⊣ ≡ (𝕩×≠)⊸↑) }¨ 1‿¯1 ⟨"Input file ",𝕩," is not markdown (*.md)"⟩ ∾⊸! ".md" MatchEnd 𝕩 fileout ← ".html" ∾˜ (¯6⊸↓∾"index"˙)⍟("README"⊸MatchEnd) ¯3↓𝕩 # Contents of file to convert md ← •file.Lines 𝕩 # Verify and remove the html link line: the output *is* the html file. IsView ← "*View this file"⊸MatchStart ∧ (siteURL∾fileout∾").*")⊸MatchEnd ⟨"File ",𝕩," has missing or incorrect view link"⟩ ∾⊸! IsView ⊑md out ← 𝕩 Markdown 2↓md parts ← (1-˜·(¬×1++`)'/'⊸=)⊸⊔ (⊑⊐⟜".")⊸↑ 𝕩 root ← ⊑ up ← ⥊∘/⟜≍⟜"../"¨ ⌽↕≠parts isInd ← "README" ≡ ¯1⊑parts RQ ← {'"'¨⌾(('''=𝕩)⊸/)𝕩} Link ← RQ {∾⟨""⟩} Clean ← "`` ` ``"⊸⍷ ∨´∘⊣◶⟨'`'⊸≠⊸/⊢,≠⊸(1≠`-⊸↑≠↑)˜/⊢⟩ ⊢ # Help pages have backticks in titles h1 ← (2≤≠)◶0‿("# "≡2⊸↑)¨⊸/md "Wrong number of titles in "‿𝕩 ∾⊸! 1=≠h1 head ← "head" Html lf∾JoinLines " "⊸∾¨⟨ "shortcut icon' type='image/x-icon" Link "favicon.ico" "stylesheet" Link "style.css" "title" Html ("BQN"∾":"⊸(¬∘∊/⊣)∾" "∾⊢)⍟(¬·∨´"BQN"⍷⊢) Clean 2↓⊑h1 ⟩ repo ← "("∾")"∾˜ "a href='"‿repoURL‿"'" ∾⊸Html "github" crumbs ← up ("a href='"∾∾⟜"index.html'")⊸Html¨○((-isInd)⊸↓) (<"BQN")»parts nav ← RQ "div class='nav'" Html 3↓∾ " / "⊸∾¨ repo <⊸∾ crumbs front ← head ∾○(∾⟜lf) nav ("docs/"∾fileout) •file.Chars front ∾ out } ConvertFile¨ •args Markdown # Used by tester test/commonmark.bqn