Last active
February 27, 2019 08:11
-
-
Save iOliverNguyen/9fbabdf0d7aba3195d4d23d52665d61b to your computer and use it in GitHub Desktop.
Minimal JSON parser which works with correct input only
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Minimal JSON parser which works with correct input only. | |
// Usecase: | |
// 1. Walk through unstructured json | |
// 2. Transform unstructured json | |
// without fully unmarshalling it into a map[string]interface{} | |
// | |
// Caution: Behaviour is undefined on invalid json. Use on trusted input only. | |
package µjson | |
import ( | |
"fmt" | |
"strings" | |
) | |
func Walk(s []byte, i int, fn func(st int, key, value string) bool) error { | |
var si, ei, st int | |
var key string | |
// fn returns false to skip a whole array or object | |
sst := 1024 | |
// Trim the last newline | |
if len(s) > 0 && s[len(s)-1] == '\n' { | |
s = s[:len(s)-1] | |
} | |
value: | |
si = i | |
switch s[i] { | |
case 'n', 't': // null, true | |
i += 4 | |
ei = i | |
if st <= sst { | |
fn(st, key, string(s[si:i])) | |
} | |
key = "" | |
goto closing | |
case 'f': // false | |
i += 5 | |
ei = i | |
if st <= sst { | |
fn(st, key, string(s[si:i])) | |
} | |
key = "" | |
goto closing | |
case '{', '[': | |
if st <= sst && !fn(st, key, string(s[i])) { | |
sst = st | |
} | |
key = "" | |
st++ | |
i++ | |
if s[i] == '}' || s[i] == ']' { | |
goto closing | |
} | |
goto value | |
case '"': // scan string | |
for { | |
i++ | |
switch s[i] { | |
case '\\': // \. - skip 2 | |
i++ | |
case '"': // end of string | |
i++ | |
ei = i // space, ignore | |
for s[i] == ' ' || | |
s[i] == '\t' || | |
s[i] == '\n' || | |
s[i] == '\r' { | |
i++ | |
} | |
if s[i] != ':' { | |
if st <= sst { | |
fn(st, key, string(s[si:ei])) | |
} | |
key = "" | |
} | |
goto closing | |
} | |
} | |
case ' ', '\t', '\n', '\r': // space, ignore | |
i++ | |
goto value | |
default: // scan number | |
for i < len(s) { | |
switch s[i] { | |
case ',', '}', ']', ' ', '\t', '\n', '\r': | |
ei = i | |
for s[i] == ' ' || | |
s[i] == '\t' || | |
s[i] == '\n' || | |
s[i] == '\r' { | |
i++ | |
} | |
if st <= sst { | |
fn(st, key, string(s[si:ei])) | |
} | |
key = "" | |
goto closing | |
} | |
i++ | |
} | |
} | |
closing: | |
if i >= len(s) { | |
return nil | |
} | |
switch s[i] { | |
case ':': | |
key = string(s[si:ei]) | |
i++ | |
goto value | |
case ',': | |
i++ | |
goto value | |
case ']', '}': | |
st-- | |
if st == sst { | |
sst = 1024 | |
} else { | |
fn(st, "", string(s[i])) | |
} | |
if st <= 0 { | |
return nil | |
} | |
i++ | |
goto closing | |
case ' ', '\t', '\n', '\r': | |
i++ // space, ignore | |
goto closing | |
default: | |
return parseError(i, s[i], `expect ']', '}' or ','`) | |
} | |
} | |
func parseError(i int, c byte, msg string) error { | |
return fmt.Errorf("json error at %v '%c' 0x%2x: %v", i, c, c, msg) | |
} | |
func ShouldAddComma(value string, lastChar byte) bool { | |
return value != "}" && value != "]" && | |
lastChar != ',' && lastChar != '{' && lastChar != '[' | |
} | |
func Reconstruct(s []byte) ([]byte, error) { | |
b := make([]byte, 0, 1024) | |
err := Walk(s, 0, func(st int, key, value string) bool { | |
if len(b) != 0 && ShouldAddComma(value, b[len(b)-1]) { | |
b = append(b, ',') | |
} | |
if key != "" { | |
b = append(b, key...) | |
b = append(b, ':') | |
} | |
b = append(b, value...) | |
return true | |
}) | |
return b, err | |
} | |
func FilterAndRename(b []byte, input []byte) (output []byte, _ error) { | |
err := Walk(input, 0, func(st int, key, value string) bool { | |
// Ignore fields with null value | |
if value == "null" { | |
return true | |
} | |
wrap := false | |
if key != "" { | |
// Remove quotes | |
key = key[1 : len(key)-1] | |
// Skip _ids | |
if strings.HasSuffix(key, "_ids") { | |
return false | |
} | |
// Rename external_ to x_ | |
if strings.HasPrefix(key, "external_") { | |
key = "x_" + key[len("external_"):] | |
} else if (key == "id" || strings.HasSuffix(key, "_id")) && | |
value[0] >= '0' && value[0] <= '9' { | |
wrap = true | |
} | |
} | |
if len(b) != 0 && ShouldAddComma(value, b[len(b)-1]) { | |
b = append(b, ',') | |
} | |
if key != "" { | |
b = append(b, '"') | |
b = append(b, key...) | |
b = append(b, '"') | |
b = append(b, ':') | |
} | |
if wrap { | |
b = append(b, '"') | |
b = append(b, value...) | |
b = append(b, '"') | |
} else { | |
b = append(b, value...) | |
} | |
return true | |
}) | |
return b, err | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package µjson | |
import ( | |
"fmt" | |
"strings" | |
"testing" | |
"github.com/stretchr/testify/assert" | |
) | |
func TestWalk(t *testing.T) { | |
tests := []struct { | |
inp string | |
exp string | |
}{ | |
{ | |
`null`, | |
` | |
0 null`, | |
}, | |
{ | |
"null\n", // end with newline | |
` | |
0 null`, | |
}, | |
{ | |
`{}`, | |
` | |
0 { | |
0 }`, | |
}, | |
{ | |
`{"foo":""}`, | |
` | |
0 { | |
1 "foo" "" | |
0 }`, | |
}, | |
{ | |
`{"foo": ""}`, // Space | |
` | |
0 { | |
1 "foo" "" | |
0 }`, | |
}, | |
{ | |
`{"foo":"bar"}`, | |
` | |
0 { | |
1 "foo" "bar" | |
0 }`, | |
}, | |
{ | |
`{"foo":"bar","baz":""}`, | |
` | |
0 { | |
1 "foo" "bar" | |
1 "baz" "" | |
0 }`, | |
}, | |
{ | |
`{ "foo" : "bar" , "baz" : 2 }`, // Space | |
` | |
0 { | |
1 "foo" "bar" | |
1 "baz" 2 | |
0 }`, | |
}, | |
{ | |
`{"foo":null}`, | |
` | |
0 { | |
1 "foo" null | |
0 }`, | |
}, | |
{ | |
`{"foo":123}`, | |
` | |
0 { | |
1 "foo" 123 | |
0 }`, | |
}, | |
{ | |
`{"foo":-123}`, | |
` | |
0 { | |
1 "foo" -123 | |
0 }`, | |
}, | |
{ | |
`{"foo":42.1}`, | |
` | |
0 { | |
1 "foo" 42.1 | |
0 }`, | |
}, | |
{ | |
`{"foo":+0}`, | |
` | |
0 { | |
1 "foo" +0 | |
0 }`, | |
}, | |
{ | |
`{"foo":"b\"ar"}`, | |
` | |
0 { | |
1 "foo" "b\"ar" | |
0 }`, | |
}, | |
{ | |
`{"😀":"🎶\""}`, | |
` | |
0 { | |
1 "😀" "🎶\"" | |
0 }`, | |
}, | |
{ | |
`{"foo":{}}`, | |
` | |
0 { | |
1 "foo" { | |
1 } | |
0 }`, | |
}, | |
{ | |
`{"foo":{"bar":false,"baz":true,"quix":null}}`, | |
` | |
0 { | |
1 "foo" { | |
2 "bar" false | |
2 "baz" true | |
2 "quix" null | |
1 } | |
0 }`, | |
}, | |
{ | |
`{"1":{"1.1":{"1.1.1":"foo","1.1.2":"bar"},"1.2":{"1.2.1":"baz"}}}`, | |
` | |
0 { | |
1 "1" { | |
2 "1.1" { | |
3 "1.1.1" "foo" | |
3 "1.1.2" "bar" | |
2 } | |
2 "1.2" { | |
3 "1.2.1" "baz" | |
2 } | |
1 } | |
0 }`, | |
}, | |
{ | |
`[]`, | |
` | |
0 [ | |
0 ]`, | |
}, | |
{ | |
`[null]`, | |
` | |
0 [ | |
1 null | |
0 ]`, | |
}, | |
{ | |
`[0]`, | |
` | |
0 [ | |
1 0 | |
0 ]`, | |
}, | |
{ | |
`["foo"]`, | |
` | |
0 [ | |
1 "foo" | |
0 ]`, | |
}, | |
{ | |
`["",""]`, | |
` | |
0 [ | |
1 "" | |
1 "" | |
0 ]`, | |
}, | |
{ | |
`["foo","bar"]`, | |
` | |
0 [ | |
1 "foo" | |
1 "bar" | |
0 ]`, | |
}, | |
{ | |
`[[]]`, | |
` | |
0 [ | |
1 [ | |
1 ] | |
0 ]`, | |
}, | |
{ | |
`[{},[]]`, | |
` | |
0 [ | |
1 { | |
1 } | |
1 [ | |
1 ] | |
0 ]`, | |
}, | |
{ | |
`{"foo":[]}`, | |
` | |
0 { | |
1 "foo" [ | |
1 ] | |
0 }`, | |
}, | |
{ | |
`{"foo":[{"k":"v"}]}`, | |
` | |
0 { | |
1 "foo" [ | |
2 { | |
3 "k" "v" | |
2 } | |
1 ] | |
0 }`, | |
}, | |
{ | |
`{"foo":[{"k1":"v1","k2":"v2"}]}`, | |
` | |
0 { | |
1 "foo" [ | |
2 { | |
3 "k1" "v1" | |
3 "k2" "v2" | |
2 } | |
1 ] | |
0 }`, | |
}, | |
{ | |
`{"foo":[{"k1.1":"v1.1","k1.2":"v1.2"},{"k2.1":"v2.1"}],"bar":{}}`, | |
` | |
0 { | |
1 "foo" [ | |
2 { | |
3 "k1.1" "v1.1" | |
3 "k1.2" "v1.2" | |
2 } | |
2 { | |
3 "k2.1" "v2.1" | |
2 } | |
1 ] | |
1 "bar" { | |
1 } | |
0 }`, | |
}, | |
{ | |
`{"1":[{"2":{"k1":"v1","k2":"v2"}}]}`, | |
` | |
0 { | |
1 "1" [ | |
2 { | |
3 "2" { | |
4 "k1" "v1" | |
4 "k2" "v2" | |
3 } | |
2 } | |
1 ] | |
0 }`, | |
}, | |
{ | |
`{"1":[{"2":[{"k1":"v1","k2":"v2"},{"k3":"v3"}]}]}`, | |
` | |
0 { | |
1 "1" [ | |
2 { | |
3 "2" [ | |
4 { | |
5 "k1" "v1" | |
5 "k2" "v2" | |
4 } | |
4 { | |
5 "k3" "v3" | |
4 } | |
3 ] | |
2 } | |
1 ] | |
0 }`, | |
}, | |
{ | |
`{ "1" : [ { "2": [ { "k1" : "v1" , "k2" : "v2" } ,{"k3":"v3" } ] } ] }`, | |
` | |
0 { | |
1 "1" [ | |
2 { | |
3 "2" [ | |
4 { | |
5 "k1" "v1" | |
5 "k2" "v2" | |
4 } | |
4 { | |
5 "k3" "v3" | |
4 } | |
3 ] | |
2 } | |
1 ] | |
0 }`, | |
}, | |
} | |
for _, tt := range tests { | |
t.Run("Walk/"+tt.inp, func(t *testing.T) { | |
var b strings.Builder | |
err := Walk([]byte(tt.inp), 0, | |
func(st int, key, value string) bool { | |
fmt.Fprintf(&b, "\n%v %v %v", st, key, value) | |
return true | |
}) | |
if err != nil { | |
t.Error(err) | |
} else if b.String() != tt.exp { | |
t.Errorf("\nExpect: `%v`\nOutput: `%v`\n", tt.exp, b.String()) | |
} | |
}) | |
} | |
for _, tt := range tests { | |
t.Run("Reconstruct/"+tt.inp, func(t *testing.T) { | |
// Handle the sepcial testcase ending with \n | |
exp := tt.inp | |
if exp[len(exp)-1] == '\n' { | |
exp = exp[:len(exp)-1] | |
} | |
exp = strings.Replace(exp, " ", "", -1) | |
data, err := Reconstruct([]byte(tt.inp)) | |
if err != nil { | |
t.Error(err) | |
} else if s := string(data); s != exp { | |
t.Errorf("\nExpect: %v\nOutput: %v\n", exp, s) | |
} | |
}) | |
} | |
} | |
func TestFilterAndRename(t *testing.T) { | |
msg := `{"external_id":"123","product_id":123456789,"amount":100000,"supplier_ids":[12345678,12345679],"tags":[123],"foo":null,"x":{"bar":null,"baz":false},"y":{}}` | |
var b []byte | |
b, err := FilterAndRename(b, []byte(msg)) | |
assert.NoError(t, err) | |
// TODO: Wrap ids in array | |
exp := `{"x_id":"123","product_id":"123456789","amount":100000,"tags":[123],"x":{"baz":false},"y":{}}` | |
assert.Equal(t, exp, string(b)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment