From f1673cf6b2736c84a320ed50da8d16109d4ea8eb Mon Sep 17 00:00:00 2001 From: Marshall Lochbaum Date: Sun, 25 Oct 2020 21:53:52 -0400 Subject: Return source code positions from Tokenize --- src/c.bqn | 10 +++++----- wc.bqn | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/c.bqn b/src/c.bqn index 1c51773e..4a496a23 100644 --- a/src/c.bqn +++ b/src/c.bqn @@ -51,7 +51,7 @@ Tokenize←{ str←1↓¨𝕩⊔˜1-˜(+`si←a>»qe)×≠`dm∧ab # Strings (indices /si) # Extract words: identifiers and numbers - t←CharCode f/𝕩 + t←CharCode (ind←/f)⊏𝕩⋄Fi←{ind/˜↩𝕨⋄𝕨/𝕩} # Track source code indices w←»⊸¯1»⌈`)⊸/¨˜↩j←⊐¨k # IDs j into uniques k - t↩(w∨¬l∨t M bW)/(∾j++`vi»≠¨k)⌾(ki⊸⊏)t # Add IDs; remove words/whitespace + t↩(w∨¬l∨t M bW)Fi(∾j++`vi»≠¨k)⌾(ki⊸⊏)t # Add IDs; remove words/whitespace t-↩t(M×-⟜⊑)bS # Separators are equivalent p←≠`1¨sb←¯1↓1↓/1(∾≠∾˜)t=sep # Separator group boundaries (excludes leading and trailing) sk←sb/˜p>∨⟜«(p+(sb-p)⊏t)∊3‿5+⊑bB # Keep the first of each group that's not just inside a bracket - t/˜↩1¨⌾(sk⊸⊏)t≠sep # Remove the rest + t Fi˜↩1¨⌾(sk⊸⊏)t≠sep # Remove the rest r←ir⌾((t M vi∾≠⊑k)⊸/)(vi⌊t)⊏charRole∾0 # Role t+↩5×t M⟨⊑bI,5⟩ # Case-insensitive special names - ⟨t,r,k⟩ + ⟨t,r,k,ind⟩ } # 𝕩 is a list of tokens that contains the numeric literals, each @@ -195,7 +195,7 @@ LEB←{ } Compile←{ - ⟨tok,role,val⟩←Tokenize 𝕩 + ⟨tok,role,val,ind⟩←Tokenize 𝕩 ⟨bc,prim,blk⟩←⟨role,≠⊑val⟩ Parse tok ⟨bc, ∾⟨prim⊏𝕨⟩∾1↓val, <˘⍉>blk⟩ } diff --git a/wc.bqn b/wc.bqn index 95dd7720..5a8fc398 100644 --- a/wc.bqn +++ b/wc.bqn @@ -47,7 +47,7 @@ fntab←⍉(0¨tab1)∾tab1≍tab2 f64←127-3 GenFn←{ - ⟨t,r,k⟩←Tokenize𝕩 + ⟨t,r,k,i⟩←Tokenize𝕩 nVar←≠⊑k⋄lits←∾1↓k nLoc←nVar-𝕨⊢0 t⊏˜↩⍋+`-˝(2‿3+⊑bB)=⌜t -- cgit v1.2.3