Created
November 20, 2024 00:52
-
-
Save paulsmith/cbff2376b56b77feef2348f602f5b2fa to your computer and use it in GitHub Desktop.
buffered Go scanner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package bufscan | |
type GoToken struct { | |
Pos token.Pos | |
Tok token.Token | |
Lit string | |
} | |
type BufGoScanner struct { | |
scan *scanner.Scanner | |
buf *GoToken | |
} | |
func NewBufGoScanner(filename string, baseOffset int, source []byte) *BufGoScanner { | |
fset := token.NewFileSet() | |
file := fset.AddFile(filename, baseOffset, len(source)) | |
var scan scanner.Scanner | |
scan.Init(file, source, nil, 0) | |
bs := &BufGoScanner{scan: &scan} | |
return bs | |
} | |
func (bs *BufGoScanner) empty() bool { | |
return bs.buf == nil | |
} | |
func (bs *BufGoScanner) Get() (tok GoToken) { | |
if bs.empty() { | |
tok.Pos, tok.Tok, tok.Lit = bs.scan.Scan() | |
} else { | |
tok = *bs.buf | |
bs.buf = nil | |
} | |
return | |
} | |
func (bs *BufGoScanner) Unget(tok GoToken) { | |
if bs.empty() { | |
bs.buf = &tok | |
} else { | |
panic("Unget() before call to Get()") | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A contrived example unget()ing a token after finish matching an expression | |
func ExampleBufGoScanner() { | |
src := []byte(`x := (1 + 2) * 3`) | |
bs := NewBufGoScanner("", 1, src) | |
scanExpr := func(bs *BufGoScanner) string { | |
// enters one token past opening '(' | |
expr := "" | |
tok := bs.Get() | |
for { | |
if tok.Tok == token.RPAREN || tok.Tok == token.EOF { | |
break | |
} | |
if tok.Lit == "" { | |
expr += tok.Tok.String() | |
} else { | |
expr += tok.Lit | |
} | |
tok = bs.Get() | |
} | |
bs.Unget(tok) | |
return expr | |
} | |
loop: | |
for { | |
tok := bs.Get() | |
switch tok.Tok { | |
case token.EOF: | |
break loop | |
case token.LPAREN: | |
expr := scanExpr(bs) | |
fmt.Println("expr:", expr) | |
break loop | |
} | |
} | |
nextTok := bs.Get() | |
fmt.Println("next token:", nextTok.Tok.String()) | |
// Output: | |
// expr: 1+2 | |
// next token: ) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"testing" | |
) | |
func TestGoScanner(t *testing.T) { | |
tests := []struct { | |
name string | |
in string | |
sequence func(*BufGoScanner) error | |
}{ | |
{ | |
"get consumes token", | |
"a = b", | |
func(s *BufGoScanner) error { | |
t1 := s.Get() | |
t2 := s.Get() | |
if t1 == t2 { | |
return fmt.Errorf("successive Get() return same token: %#v", t1) | |
} | |
return nil | |
}, | |
}, | |
{ | |
"unget after get", | |
"a = b", | |
func(s *BufGoScanner) error { | |
t1 := s.Get() | |
s.Unget(t1) | |
t2 := s.Get() | |
if t1 != t2 { | |
return fmt.Errorf("token after Unget() differs: %#v vs. %#v", t1, t2) | |
} | |
return nil | |
}, | |
}, | |
{ | |
"double unget panics", | |
"a = b", | |
func(s *BufGoScanner) (err error) { | |
defer func() { | |
if r := recover(); r == nil { | |
err = fmt.Errorf("double Unget() did not panic") | |
} | |
}() | |
t := s.Get() | |
s.Unget(t) | |
// should panic | |
s.Unget(t) | |
return | |
}, | |
}, | |
} | |
for _, test := range tests { | |
t.Run(test.name, func(t *testing.T) { | |
t.Parallel() | |
bs := NewBufGoScanner("", 1, []byte(test.in)) | |
if err := test.sequence(bs); err != nil { | |
t.Error(err) | |
} | |
}) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment