From 2a178f6811e81d575e4aead3ee0eb15e8867d611 Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Sat, 12 Sep 2020 22:56:03 -0400 Subject: Process character and string literals separately --- src/c.bqn | 33 ++++++++++++++++----------------- src/cjs.bqn | 2 +- 2 files changed, 17 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/c.bqn b/src/c.bqn index 4df922db..85bd532d 100644 --- a/src/c.bqn +++ b/src/c.bqn @@ -10,12 +10,11 @@ charSet‿cgl←(∾ ≍○< ≠¨)⟨ "·" # nOthing # Use last character in case of UTF-16 (like dzaima/BQN) ¯1⊏˘10‿∘⥊"𝕊𝕏𝕎𝔽𝔾𝕤𝕩𝕨𝕗𝕘" # Input (𝕣 pending; ℝ not allowed) - "@" # nUll character '0'+↕10 # Digit "¯.π∞" # Numeric "_"∾˜⥊"aA"+⌜↕na←26 # Alphabetic (¯1↓"𝕨")∾" "∾@+9 # Whitespace (or special name prefix in UTF-16) -# #'" eliminated during tokenization + "#'""@" # Preprocessed characters ⟩ ErrUnknownChars←{ ⟨"Unknown character","s"/˜1<≠𝕩,": ",𝕩⟩∾⊸!0 @@ -24,9 +23,9 @@ CharCode←charSet{ Chk ← ⊢⊣ErrUnknownChars∘(≠/⊣)⍟≢⟜(⊏⟜𝕗) g←⍋𝕗 ⋄ ⊢ Chk g⊏˜1-˜1⌈(g⊏𝕗)⍋⊢ } -bF‿bM‿bC‿bS‿bG‿bB‿bL‿bO‿bI‿bU‿bD‿bN‿bA‿bW←≍¨˜⟜(+`≠↑0∾⊢)cgl +bF‿bM‿bC‿bS‿bG‿bB‿bL‿bO‿bI‿bD‿bN‿bA‿bW‿bP←≍¨˜⟜(+`≠↑0∾⊢)cgl M←1⊸⊑(0⊸≤∧>)-⟜⊑ -vi←⊑bU +vi←⊑bD charRole←((⊑bI)↑/0∾3↑cgl)∾(5/⌽↕2)∾0 spc←⥊3‿5‿6-⌜3‿0 @@ -38,27 +37,29 @@ Tokenize←{ st←¯1↓Se⟜(1↑˜≠)∾⟜≠q⍋e⋄q/˜↩st⋄e/˜↩st n←≠𝕩⋄a←n↑/⁼q⋄b←n↑/⁼e f←¬(≠`a∨b)∨1⌽qe←a∧(≠↑0∾⊢)⊸∧dm - qm←sm∨dm⋄a∧↩qm⋄b∧↩qm - "Unclosed quote"!¬∨´qm∧bqe)×≠`a∨b + "Unclosed quote"!¬∨´(sm∨dm)∧bqe)×≠`dm∧a∨b + lv←chr(⊣∾(1+¯1⌈´⊣)+⊢)○⊐str UM←(≠↑¯1∾⌈`)⊸< - str/˜↩UM ls←⊐str - cl←f/b + lit←lv UM⊸/chr∾str + li←(ci∾/si)⊏+`(≠↑0∾⊢)f - c←cl-˜CharCode(⊑charSet)¨⌾(cl⊸/)f/𝕩 + c←CharCode f/𝕩 w←(≠↑0∾⊢)⊸iu⊏ti ti↩(us/˜(1↓0∾˜⊢)⊸