From edfc7aa91b542978ce28eb109b99a257650b62b4 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 18 Feb 2023 21:57:39 -0700 Subject: decode_scan, jsonparse: Rework the behavior to make a bit more sense - decode_scan: Don't have .Reset() re-play an erroring rune - decode_Scan: Have RuneTypeEOF always be zero-width - jsonparse: Don't replace syntax errors with RuneTypeEOF if there's no barrier --- decode.go | 18 +++++------- decode_scan.go | 71 ++++++++------------------------------------- decode_scan_test.go | 47 +++++++++++++++--------------- internal/jsonparse/parse.go | 6 +++- reencode.go | 17 +++++------ 5 files changed, 56 insertions(+), 103 deletions(-) diff --git a/decode.go b/decode.go index 8514ec4..491971a 100644 --- a/decode.go +++ b/decode.go @@ -207,19 +207,17 @@ func (dec *Decoder) DecodeThenEOF(ptr any) (err error) { if err := dec.Decode(ptr); err != nil { return err } - c, s, t, _ := dec.io.ReadRuneType() - if t != jsonparse.RuneTypeEOF { - panic(fmt.Errorf("should not happen: .ReadRuneType returned non-EOF after decode without .Reset being called: %v", t)) - } - if s > 0 { + _, _, t, err := dec.io.ReadRuneType() + switch t { + case jsonparse.RuneTypeError: return &DecodeError{ - Err: &DecodeSyntaxError{ - Err: fmt.Errorf("invalid character %q after top-level value", c), - Offset: dec.InputOffset(), - }, + Err: err, } + case jsonparse.RuneTypeEOF: + return nil + default: + panic(fmt.Errorf("should not happen: .ReadRuneType returned non-error non-EOF after decode without .Reset being called: '%v'", t)) } - return nil } // Decode reads the next JSON element from the Decoder's input stream diff --git a/decode_scan.go b/decode_scan.go index 7911c01..fcf47ff 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -31,8 +31,7 @@ type runeTypeScanner struct { // The returned error is a *ReadError, a *SyntaxError, or nil. // An EOF condition is represented as one of: // -// end of value but not file: (_, >0, RuneTypeEOF, nil) -// end of both value and file: (_, 0, RuneTypeEOF, nil) +// end of value: (_, 0, RuneTypeEOF, nil) // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { @@ -59,8 +58,14 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) } else { sc.rErr = nil } - if sc.rType == jsonparse.RuneTypeSpace { + switch sc.rType { + case jsonparse.RuneTypeSpace: goto again + case jsonparse.RuneTypeEOF: + sc.offset -= int64(sc.rSize) + sc.rRune = 0 + sc.rSize = 0 + _ = sc.inner.UnreadRune() } case io.EOF: sc.rType, err = sc.parser.HandleEOF() @@ -122,65 +127,13 @@ func (sc *runeTypeScanner) PopReadBarrier() { } else { sc.rErr = nil } - case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - // tell it to use that rType and rErr - _ = sc.UnreadRune() // we set it up to always succeed - case sc.rType == jsonparse.RuneTypeEOF: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleEOF() - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset, - Err: err, - } - } else { - sc.rErr = nil - } + case sc.rTypeOK && sc.rType == jsonparse.RuneTypeEOF: + sc.rTypeOK = false // forget the sticky EOF } } func (sc *runeTypeScanner) Reset() { sc.parser.Reset() - switch { - case sc.repeat: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - case sc.rType == jsonparse.RuneTypeEOF && sc.rSize > 0: - // re-figure the rType and rErr - var err error - sc.rType, err = sc.parser.HandleRune(sc.rRune) - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.offset - int64(sc.rSize), - Err: err, - } - } else { - sc.rErr = nil - } - // tell it to use that rType and rErr - _ = sc.UnreadRune() // we set it up to always succeed - default: - sc.rTypeOK = false - } + sc.rTypeOK = false // forget any sticky errors/EOF + sc.repeat = false // feed the rune (if any) through the parser again } diff --git a/decode_scan_test.go b/decode_scan_test.go index eaf2f37..17c40d5 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -112,8 +112,8 @@ func TestRuneTypeScanner(t *testing.T) { {',', 1, jsonparse.RuneTypeArrayComma, nil}, {0, pushReadBarrier, 0, nil}, {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {']', 1, jsonparse.RuneTypeEOF, nil}, - {0, unreadRune, 0, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, {0, popReadBarrier, 0, nil}, {']', 1, jsonparse.RuneTypeArrayEnd, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, @@ -149,17 +149,17 @@ func TestRuneTypeScanner(t *testing.T) { "multi-value1": {`1{}`, `{}`, []ReadRuneTypeResult{ {0, pushReadBarrier, 0, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, }}, "multi-value2": {`1{}`, ``, []ReadRuneTypeResult{ {0, pushReadBarrier, 0, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, - {'{', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, {0, reset, 0, nil}, {0, pushReadBarrier, 0, nil}, @@ -196,11 +196,11 @@ func TestRuneTypeScanner(t *testing.T) { {0, popReadBarrier, 0, nil}, // Test main. {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, + {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, + {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, + {',', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: fmt.Errorf("invalid character %q after top-level value", ',')}}, }}, - "child-fragment": {`[1,` + `1,`, ``, []ReadRuneTypeResult{ + "child-fragment": {`[1,` + `1,`, `,`, []ReadRuneTypeResult{ // Child prefix. {'[', 1, jsonparse.RuneTypeArrayBeg, nil}, {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, @@ -208,9 +208,9 @@ func TestRuneTypeScanner(t *testing.T) { {0, pushReadBarrier, 0, nil}, // Test main. {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, - {',', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, @@ -225,22 +225,23 @@ func TestRuneTypeScanner(t *testing.T) { {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, {'0', 1, jsonparse.RuneTypeNumberFracDig, nil}, - {'}', 1, jsonparse.RuneTypeEOF, nil}, - {'}', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, {'}', 1, jsonparse.RuneTypeObjectEnd, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, {0, 0, jsonparse.RuneTypeEOF, nil}, }}, - "invalid-number": {`1.2.3`, `.3`, []ReadRuneTypeResult{ + "invalid-number": {`1.2.3`, ``, []ReadRuneTypeResult{ {'1', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'.', 1, jsonparse.RuneTypeNumberFracDot, nil}, {'2', 1, jsonparse.RuneTypeNumberFracDig, nil}, - {'.', 1, jsonparse.RuneTypeEOF, nil}, + {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q after top-level value", '.')}}, {0, reset, 0, nil}, - {'.', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 3, Err: fmt.Errorf("invalid character %q looking for beginning of value", '.')}}, + {'3', 1, jsonparse.RuneTypeNumberIntDig, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, }}, - "trailing-garbage": {" 42 x", `x`, []ReadRuneTypeResult{ + "trailing-garbage": {" 42 x", ``, []ReadRuneTypeResult{ {0, pushReadBarrier, 0, nil}, {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, {0, unreadRune, 0, nil}, @@ -249,10 +250,10 @@ func TestRuneTypeScanner(t *testing.T) { {0, pushReadBarrier, 0, nil}, {'4', 1, jsonparse.RuneTypeNumberIntDig, nil}, {'2', 1, jsonparse.RuneTypeNumberIntDig, nil}, - {'x', 1, jsonparse.RuneTypeEOF, nil}, + {0, 0, jsonparse.RuneTypeEOF, nil}, {0, popReadBarrier, 0, nil}, {0, popReadBarrier, 0, nil}, - {'x', 1, jsonparse.RuneTypeEOF, nil}, + {'x', 1, jsonparse.RuneTypeError, &DecodeSyntaxError{Offset: 4, Err: fmt.Errorf("invalid character %q after top-level value", 'x')}}, }}, "unread-reset": {`{}`, ``, []ReadRuneTypeResult{ {'{', 1, jsonparse.RuneTypeObjectBeg, nil}, diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go index d867cbc..06efc8c 100644 --- a/internal/jsonparse/parse.go +++ b/internal/jsonparse/parse.go @@ -545,7 +545,11 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil default: - return RuneTypeEOF, nil + if len(par.barriers) > 0 { + return RuneTypeEOF, nil + } else { + return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c) + } } } switch par.stack[len(par.stack)-1] { diff --git a/reencode.go b/reencode.go index 7e9b5ff..0745c43 100644 --- a/reencode.go +++ b/reencode.go @@ -276,7 +276,6 @@ func (enc *ReEncoder) Close() error { } func (enc *ReEncoder) handleRune(c rune, size int) { -rehandle: t, err := enc.par.HandleRune(c) if err != nil { enc.err = &ReEncodeSyntaxError{ @@ -293,16 +292,14 @@ rehandle: return } if t == jsonparse.RuneTypeEOF { - if enc.allowMultipleValues && len(enc.barriers) == 0 { - enc.par.Reset() - goto rehandle - } else { - enc.err = &ReEncodeSyntaxError{ - Err: fmt.Errorf("invalid character %q after top-level value", c), - Offset: enc.inputPos, - } - return + if len(enc.barriers) == 0 { + panic(fmt.Errorf("should not happen: EOF for rune %q without write barriers", c)) } + enc.err = &ReEncodeSyntaxError{ + Err: fmt.Errorf("invalid character %q after top-level value", c), + Offset: enc.inputPos, + } + return } enc.inputPos += int64(size) -- cgit v1.2.3