From e69bbebc7c2057b0c31bee42308c75ed8a588f55 Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Sun, 25 Oct 2020 15:40:08 -0400 Subject: Rename some variables in the tokenizer --- src/c.bqn | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/c.bqn b/src/c.bqn index 19ebd508..96e243b8 100644 --- a/src/c.bqn +++ b/src/c.bqn @@ -36,9 +36,9 @@ T←⌈`× ⋄ IT←↕∘≠⊸T ⋄ I1T←(1+↕∘≠)⊸T # Identifiers then literal tokens are numbered starting at vi Tokenize←{ # Resolve comments and strings - r←𝕩='#'⋄s←/0‿0⊸«⊸∧sm←𝕩='''⋄d←/dm←𝕩='"' - g←⍋q←∾⟨ s⋄¯1↓d⋄/r⟩ ⋄q↩g⊏q # Open indices - e← g⊏∾⟨2+s⋄ 1↓d⋄-⟜»∘⊏⟜(0∾+`r)⊸//(𝕩=lf)∾1⟩ # Matching close indices + c←𝕩='#'⋄s←/0‿0⊸«⊸∧sm←𝕩='''⋄d←/dm←𝕩='"' + g←⍋q←∾⟨ s⋄¯1↓d⋄/c⟩ ⋄q↩g⊏q # Open indices + e← g⊏∾⟨2+s⋄ 1↓d⋄-⟜»∘⊏⟜(0∾+`c)⊸//(𝕩=lf)∾1⟩ # Matching close indices Se←{(⊏˜𝕨)Se 1¨⌾((𝕩/𝕨)⊸⊏)𝕩}⍟(0=¯1⊑⊢) # Mark reachable openings St←(≠𝕩)↑·/⁼((≠↑∾⟜≠Se 1∾0¨)q⍋e)⊸/ # All indices → reached mask a←St q⋄b←St e⋄f←¬≠`ab←a∨b # Open/close masks; filter @@ -51,25 +51,25 @@ Tokenize←{ str←1↓¨𝕩⊔˜1-˜(+`si←a>»qe)×≠`dm∧ab # Strings (indices /si) # Extract words: identifiers and numbers - c←CharCode f/𝕩 - w←»⊸n←l∧(+`w)⊏0∾tw<0 # Identifier/Number masks - num←ReadNums n∨⟜«⊸/○(0⊸∾)c×l # Numbers - ir←(us/˜«⊸us)×+`w>n)⊔c # Identifiers + t←CharCode f/𝕩 + w←»⊸n←l∧(+`w)⊏0∾wk<0 # Identifier/Number masks + num←ReadNums n∨⟜«⊸/○(0⊸∾)t×l # Numbers + ir←(us/˜«⊸us)×+`w>n)⊔t # Identifiers # Deduplicate literals and identifiers; other cleanup - ki←((⍋⊏⟜n)⊸⊏/w)∾(ci∾/si)⊏+`»f # Indices in c + ki←((⍋⊏⟜n)⊸⊏/w)∾(ci∾/si)⊏+`»f # Indices in t k←id‿num‿chr‿str⋄k(⊢>¯1»⌈`)⊸/¨˜↩j←⊐¨k # IDs j into uniques k - c↩(w∨¬l∨c M bW)/(∾j++`vi»≠¨k)⌾(ki⊸⊏)c # Add IDs; remove words/whitespace - c-↩c(M×-⟜⊑)bS # Separators are equivalent - c/˜↩¬(1»(c∊2‿4+⊑bB)∨⊢)⊸∧c=sep # Remove repeated and leading separators - c/˜↩¬(1«c∊3‿5+⊑bB)∧c=sep # ...and trailing ones. In sequence for repeated trailing. - cr←ir⌾((c M vi∾≠⊑k)⊸/)(vi⌊c)⊏charRole∾0 # Role - c+↩5×c M⟨⊑bI,5⟩ # Case-insensitive special names - ⟨c,cr,k⟩ + t↩(w∨¬l∨t M bW)/(∾j++`vi»≠¨k)⌾(ki⊸⊏)t # Add IDs; remove words/whitespace + t-↩t(M×-⟜⊑)bS # Separators are equivalent + t/˜↩¬(1»(t∊2‿4+⊑bB)∨⊢)⊸∧t=sep # Remove repeated and leading separators + t/˜↩¬(1«t∊3‿5+⊑bB)∧t=sep # ...and trailing ones. In sequence for repeated trailing. + r←ir⌾((t M vi∾≠⊑k)⊸/)(vi⌊t)⊏charRole∾0 # Role + t+↩5×t M⟨⊑bI,5⟩ # Case-insensitive special names + ⟨t,r,k⟩ } # 𝕩 is a list of tokens that contains the numeric literals, each -- cgit v1.2.3