Skip to content

Instantly share code, notes, and snippets.

@paulsmith
Created November 20, 2024 00:52
Show Gist options
  • Save paulsmith/cbff2376b56b77feef2348f602f5b2fa to your computer and use it in GitHub Desktop.
Save paulsmith/cbff2376b56b77feef2348f602f5b2fa to your computer and use it in GitHub Desktop.
buffered Go scanner
package bufscan
type GoToken struct {
Pos token.Pos
Tok token.Token
Lit string
}
type BufGoScanner struct {
scan *scanner.Scanner
buf *GoToken
}
func NewBufGoScanner(filename string, baseOffset int, source []byte) *BufGoScanner {
fset := token.NewFileSet()
file := fset.AddFile(filename, baseOffset, len(source))
var scan scanner.Scanner
scan.Init(file, source, nil, 0)
bs := &BufGoScanner{scan: &scan}
return bs
}
func (bs *BufGoScanner) empty() bool {
return bs.buf == nil
}
func (bs *BufGoScanner) Get() (tok GoToken) {
if bs.empty() {
tok.Pos, tok.Tok, tok.Lit = bs.scan.Scan()
} else {
tok = *bs.buf
bs.buf = nil
}
return
}
func (bs *BufGoScanner) Unget(tok GoToken) {
if bs.empty() {
bs.buf = &tok
} else {
panic("Unget() before call to Get()")
}
}
// A contrived example unget()ing a token after finish matching an expression
func ExampleBufGoScanner() {
src := []byte(`x := (1 + 2) * 3`)
bs := NewBufGoScanner("", 1, src)
scanExpr := func(bs *BufGoScanner) string {
// enters one token past opening '('
expr := ""
tok := bs.Get()
for {
if tok.Tok == token.RPAREN || tok.Tok == token.EOF {
break
}
if tok.Lit == "" {
expr += tok.Tok.String()
} else {
expr += tok.Lit
}
tok = bs.Get()
}
bs.Unget(tok)
return expr
}
loop:
for {
tok := bs.Get()
switch tok.Tok {
case token.EOF:
break loop
case token.LPAREN:
expr := scanExpr(bs)
fmt.Println("expr:", expr)
break loop
}
}
nextTok := bs.Get()
fmt.Println("next token:", nextTok.Tok.String())
// Output:
// expr: 1+2
// next token: )
}
package main
import (
"fmt"
"testing"
)
func TestGoScanner(t *testing.T) {
tests := []struct {
name string
in string
sequence func(*BufGoScanner) error
}{
{
"get consumes token",
"a = b",
func(s *BufGoScanner) error {
t1 := s.Get()
t2 := s.Get()
if t1 == t2 {
return fmt.Errorf("successive Get() return same token: %#v", t1)
}
return nil
},
},
{
"unget after get",
"a = b",
func(s *BufGoScanner) error {
t1 := s.Get()
s.Unget(t1)
t2 := s.Get()
if t1 != t2 {
return fmt.Errorf("token after Unget() differs: %#v vs. %#v", t1, t2)
}
return nil
},
},
{
"double unget panics",
"a = b",
func(s *BufGoScanner) (err error) {
defer func() {
if r := recover(); r == nil {
err = fmt.Errorf("double Unget() did not panic")
}
}()
t := s.Get()
s.Unget(t)
// should panic
s.Unget(t)
return
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
bs := NewBufGoScanner("", 1, []byte(test.in))
if err := test.sequence(bs); err != nil {
t.Error(err)
}
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment