From edfc7aa91b542978ce28eb109b99a257650b62b4 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 21:57:39 -0700 Subject: decode_scan, jsonparse: Rework the behavior to make a bit more sense - decode_scan: Don't have .Reset() re-play an erroring rune - decode_Scan: Have RuneTypeEOF always be zero-width - jsonparse: Don't replace syntax errors with RuneTypeEOF if there's no barrier --- decode.go | 18 +++++------- decode_scan.go | 71 ++++++++------------------------------------- decode_scan_test.go | 47 +++++++++++++++--------------- internal/jsonparse/parse.go | 6 +++- reencode.go | 17 +++++------ 5 files changed, 56 insertions(+), 103 deletions(-) diff --git a/decode.go b/decode.go index 8514ec4..491971a 100644 --- a/decode.go +++ b/decode.go @@ -207,19 +207,17 @@ func (dec *Decoder) DecodeThenEOF(ptr any) (err error) { if err := dec.Decode(ptr); err != nil { return err } - c, s, t, _ := dec.io.ReadRuneType() - if t != jsonparse.RuneTypeEOF { - panic(fmt.Errorf("should not happen: .ReadRuneType returned non-EOF after decode without .Reset being called: %v", t)) - } - if s > 0 { + _, _, t, err := dec.io.ReadRuneType() + switch t { + case jsonparse.RuneTypeError: return &DecodeError{ - Err: &DecodeSyntaxError{ - Err: fmt.Errorf("invalid character %q after top-level value", c), - Offset: dec.InputOffset(), - }, + Err: err, } + case jsonparse.RuneTypeEOF: + return nil + default: + panic(fmt.Errorf("should not happen: .ReadRuneType returned non-error non-EOF after decode without .Reset being called: '%v'", t)) } - return nil } // Decode reads the next JSON element from the Decoder's input stream diff --git a/decode_scan.go b/decode_scan.go index 7911c01..fcf47ff 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -31,8 +31,7 @@ type runeTypeScanner struct { // The returned error is a *ReadError, a *SyntaxError, or nil. // An EOF condition is represented as one of: // -// end of value but not file: (_, >0, RuneTypeEOF, nil) -// end of both value and file: (_, 0, RuneTypeEOF, nil) +// end of value: (_, 0, RuneTypeEOF, nil) // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { @@ -59,8 +58,14 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) } else { sc.rErr = nil } - if sc.rType == jsonparse.RuneTypeSpace { + switch sc.rType { + case jsonparse.RuneTypeSpace: goto again + case jsonparse.RuneTypeEOF: + sc.offset -= int64(sc.rSize) + sc.rRune = 0 + sc.rSize = 0 + _ = sc.inner.UnreadRune() } case io.EOF: sc.rType, err = sc.parser.HandleEOF() @@ -122,65 +127,13 @@ func (sc *runeTypeScanner) PopReadBarrier() { } else { sc.rErr = nil } - case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - // tell it to use that rType and rErr - _ = sc.UnreadRune() // we set it up to always succeed - case sc.rType == jsonparse.RuneTypeEOF: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleEOF() - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset, - Err: err, - } - } else { - sc.rErr = nil - } + case sc.rTypeOK && sc.rType == jsonparse.RuneTypeEOF: + sc.rTypeOK = false // forget the sticky EOF } } func (sc *runeTypeScanner) Reset() { sc.parser.Reset() - switch { - case sc.repeat: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - // tell it to use that rType and rErr - _ = sc.UnreadRune() // we set it up to always succeed - default: - sc.rTypeOK = false - } + sc.rTypeOK = false // forget any sticky errors/EOF + sc.repeat = false // feed the rune (if any) through the parser again } diff --git a/decode_scan_test.go b/decode_scan_test.go index eaf2f37..17c40d5 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -112,8 +112,8 @@ func TestRuneTypeScanner(t *testing.T) { {',', 1, jsonparse.RuneTypeArrayComma, nil}, {0, pushReadBarrier, 0, nil}, {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {']', 1, jsonparse.RuneTypeEOF, nil}, - {0, unreadRune, 0, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, {0, popReadBarrier, 0, nil}, {']', 1, jsonparse.RuneTypeArrayEnd, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, @@ -149,17 +149,17 @@ func TestRuneTypeScanner(t *testing.T) { "multi-value1": {`1{}`, `{}`, []ReadRuneTypeResult{ {0, pushReadBarrier, 0, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, }}, "multi-value2": {`1{}`, ``, []ReadRuneTypeResult{ {0, pushReadBarrier, 0, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, {0, reset, 0, nil}, {0, pushReadBarrier, 0, nil}, @@ -196,11 +196,11 @@ func TestRuneTypeScanner(t *testing.T) { {0, popReadBarrier, 0, nil}, // Test main. {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, + {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, + {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, }}, - "child-fragment": {`[1,` + `1,`, ``, []ReadRuneTypeResult{ + "child-fragment": {`[1,` + `1,`, `,`, []ReadRuneTypeResult{ // Child prefix. {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, @@ -208,9 +208,9 @@ func TestRuneTypeScanner(t *testing.T) { {0, pushReadBarrier, 0, nil}, // Test main. {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, @@ -225,22 +225,23 @@ func TestRuneTypeScanner(t *testing.T) { {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, - {'}', 1, jsonparse.RuneTypeEOF, nil}, - {'}', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, }}, - "invalid-number": {`1.2.3`, `.3`, []ReadRuneTypeResult{ + "invalid-number": {`1.2.3`, ``, []ReadRuneTypeResult{ {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, {'2', 1, jsonparse.RuneTypeNumberFracDig, nil}, - {'.', 1, jsonparse.RuneTypeEOF, nil}, + {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q after top-level value", '.')}}, {0, reset, 0, nil}, - {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q looking for beginning of value", '.')}}, + {'3', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, - "trailing-garbage": {" 42 x", `x`, []ReadRuneTypeResult{ + "trailing-garbage": {" 42 x", ``, []ReadRuneTypeResult{ {0, pushReadBarrier, 0, nil}, {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, {0, unreadRune, 0, nil}, @@ -249,10 +250,10 @@ func TestRuneTypeScanner(t *testing.T) { {0, pushReadBarrier, 0, nil}, {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'x', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, {0, popReadBarrier, 0, nil}, - {'x', 1, jsonparse.RuneTypeEOF, nil}, + {'x', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q after top-level value", 'x')}}, }}, "unread-reset": {`{}`, ``, []ReadRuneTypeResult{ {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index d867cbc..06efc8c 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -545,7 +545,11 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil default: - return RuneTypeEOF, nil + if len(par.barriers) > 0 { + return RuneTypeEOF, nil + } else { + return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c) + } } } switch par.stack[len(par.stack)-1] { diff --git a/reencode.go b/reencode.go index 7e9b5ff..0745c43 100644 --- a/reencode.go +++ b/reencode.go @@ -276,7 +276,6 @@ func (enc *ReEncoder) Close() error { } func (enc *ReEncoder) handleRune(c rune, size int) { -rehandle: t, err := enc.par.HandleRune(c) if err != nil { enc.err = &ReEncodeSyntaxError{ @@ -293,16 +292,14 @@ rehandle: return } if t == jsonparse.RuneTypeEOF { - if enc.allowMultipleValues && len(enc.barriers) == 0 { - enc.par.Reset() - goto rehandle - } else { - enc.err = &ReEncodeSyntaxError{ - Err: fmt.Errorf("invalid character %q after top-level value", c), - Offset: enc.inputPos, - } - return + if len(enc.barriers) == 0 { + panic(fmt.Errorf("should not happen: EOF for rune %q without write barriers", c)) } + enc.err = &ReEncodeSyntaxError{ + Err: fmt.Errorf("invalid character %q after top-level value", c), + Offset: enc.inputPos, + } + return } enc.inputPos += int64(size) -- cgit v1.2.3 From ab0d686b9bb43a02f8d74c5e881782ab4e94e30b Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 12:53:05 -0700 Subject: jsonparse: Have PushReadBarrier reject trailing whitespace --- decode_scan_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++++ internal/jsonparse/parse.go | 41 +++++++++++++++++++-------------- 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/decode_scan_test.go b/decode_scan_test.go index 17c40d5..ee532c2 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -122,6 +122,32 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "tail-ws": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ + // Disable auto-child. + {0, pushReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + // Test main. + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + }}, + "child-tail-ws": {`[1,` + `{"foo": 12.0} `, ` `, []ReadRuneTypeResult{ + // Child prefix. + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + // Test main. {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, {'"', 1, jsonparse.RuneTypeStringBeg, nil}, {'f', 1, jsonparse.RuneTypeStringChar, nil}, @@ -213,6 +239,36 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ + // Disable auto-child. + {0, pushReadBarrier, 0, nil}, + {0, popReadBarrier, 0, nil}, + // Test main. + {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, + {'"', 1, jsonparse.RuneTypeStringBeg, nil}, + {'f', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'o', 1, jsonparse.RuneTypeStringChar, nil}, + {'"', 1, jsonparse.RuneTypeStringEnd, nil}, + {':', 1, jsonparse.RuneTypeObjectColon, nil}, + {0, pushReadBarrier, 0, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, + {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + }}, + "child-elem": {`[1,` + ` { "foo" : 12.0 } `, ` `, []ReadRuneTypeResult{ + // Child prefix. + {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, + {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {',', 1, jsonparse.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + // Test main. {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, {'"', 1, jsonparse.RuneTypeStringBeg, nil}, {'f', 1, jsonparse.RuneTypeStringChar, nil}, diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index 06efc8c..1c35533 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -324,7 +324,8 @@ type Parser struct { } type barrier struct { - stack []RuneType + allowWS bool + stack []RuneType } func (par *Parser) init() { @@ -387,9 +388,10 @@ func (par *Parser) Reset() { } // PushReadBarrier causes the parser to emit EOF once the end of the -// element that is started by the current top-of-stack is reached, -// until this is un-done with PopBarrier. It essentially turns the -// parser in to a sub-parser. +// element that is started by the current top-of-stack is reached +// (which means that it will reject whitespace between the end of the +// element and EOF), until this is un-done with PopBarrier. It +// essentially turns the parser in to a sub-parser. // // PushReadBarrier may only be called at the beginning of an element, // whether that be @@ -424,14 +426,16 @@ func (par *Parser) PushReadBarrier() { } // Actually push. par.barriers = append(par.barriers, barrier{ - stack: par.stack[:len(par.stack)-1], + allowWS: false, + stack: par.stack[:len(par.stack)-1], }) par.stack = []RuneType{curState} } // PushWriteBarrier causes the parser to emit EOF once the end of the -// about-to-start element is reached, until this is un-done with -// PopBarrier. It essentially turns the parser in to a sub-parser. +// about-to-start element is reached and any trailing whitespace has +// been exhausted, until this is un-done with PopBarrier. It +// essentially turns the parser in to a sub-parser. // // PushWriteBarrier may only be called at the places where an element // of any type may start: @@ -451,13 +455,15 @@ func (par *Parser) PushWriteBarrier() { case runeTypeAny: par.popState() par.barriers = append(par.barriers, barrier{ - stack: par.stack, + allowWS: true, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} case RuneTypeArrayBeg: par.replaceState(RuneTypeArrayComma) par.barriers = append(par.barriers, barrier{ - stack: par.stack, + allowWS: true, + stack: par.stack, }) par.stack = []RuneType{runeTypeAny} default: @@ -541,16 +547,17 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { } par.init() if len(par.stack) == 0 { - switch c { - case 0x0020, 0x000A, 0x000D, 0x0009: - return RuneTypeSpace, nil - default: - if len(par.barriers) > 0 { - return RuneTypeEOF, nil - } else { - return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c) + if len(par.barriers) == 0 || par.barriers[len(par.barriers)-1].allowWS { + switch c { + case 0x0020, 0x000A, 0x000D, 0x0009: + return RuneTypeSpace, nil } } + if len(par.barriers) > 0 { + return RuneTypeEOF, nil + } else { + return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c) + } } switch par.stack[len(par.stack)-1] { // any ///////////////////////////////////////////////////////////////////////////////////// -- cgit v1.2.3