diff options
author | Luke Shumaker <lukeshu@datawire.io> | 2022-08-16 22:40:19 -0600 |
---|---|---|
committer | Luke Shumaker <lukeshu@datawire.io> | 2022-08-17 00:12:35 -0600 |
commit | e57bee02e02b7e3697d6c3cb8b75923a92100427 (patch) | |
tree | f32610f2b73fbea1f2a94e108fabca18d31d5d27 | |
parent | 87b02577e50b76d373e3c6b921d776e39cb83346 (diff) |
Add tests for decode reading too far
-rw-r--r-- | decode.go | 5 | ||||
-rw-r--r-- | decode_scan.go | 62 | ||||
-rw-r--r-- | decode_scan_test.go | 128 | ||||
-rw-r--r-- | decode_test.go | 21 | ||||
-rw-r--r-- | errors.go | 1 |
5 files changed, 185 insertions, 32 deletions
@@ -114,7 +114,10 @@ func (dec *Decoder) stackName() string { return strings.Join(fields, ".") } -func Decode(r io.Reader, ptr any) error { +func Decode(r interface { + io.Reader + io.RuneScanner // enforce that the reader have .UnreadRune() so that we don't risk reading too far when decoding a number +}, ptr any) error { return NewDecoder(r).Decode(ptr) } diff --git a/decode_scan.go b/decode_scan.go index fee9ec6..9fa6181 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -11,13 +11,11 @@ import ( type runeTypeScanner interface { // The returned error is a *ReadError, a *SyntaxError, or nil. - // An EOF condition is represented either as + // An EOF condition is represented as one of: // - // (char, size, RuneTypeEOF, nil) - // - // or - // - // (char, size, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) + // end of value but not file: (_, >0, RuneTypeEOF, nil) + // end of both value and file: (_, 0, RuneTypeEOF, nil) + // end of file but not value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) ReadRuneType() (rune, int, RuneType, error) // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil. ReadRune() (rune, int, error) @@ -31,6 +29,8 @@ type runeTypeScanner interface { type runeTypeScannerImpl struct { inner io.RuneReader + initialized bool + parser Parser offset int64 @@ -109,6 +109,7 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, RuneType, error) { } } } + sc.initialized = true sc.repeat = false sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError return sc.rRune, sc.rSize, sc.rType, sc.rErr @@ -128,12 +129,13 @@ func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { var ErrInvalidUnreadRune = errors.New("lowmemjson: invalid use of UnreadRune") -// UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). If the -// last call to .ReadRune() or .ReadRuneType() has already been -// unread, or if that call returned an error or RuneTypeEOF, then -// ErrInvalidRune is returned. Otherwise, nil is returned. +// UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). +// +// If the last call to .ReadRune() or .ReadRuneType() has already been +// unread, or if that call returned a rune with size 0, then +// ErrInvalidUnreadRune is returned. Otherwise, nil is returned. func (sc *runeTypeScannerImpl) UnreadRune() error { - if sc.stuck || sc.repeat { + if !sc.initialized || sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } sc.repeat = true @@ -191,27 +193,46 @@ type elemRuneTypeScanner struct { parser Parser repeat bool + stuck bool rType RuneType + rErr error } var _ runeTypeScanner = (*elemRuneTypeScanner)(nil) func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) { + // Read it, run it through the parent's parser. r, s, t, e := sc.inner.ReadRuneType() - // Check if we need to insert a premature EOF - if t != RuneTypeError && t != RuneTypeEOF { - if sc.repeat { + // Run it through our child parser. + if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) { + if sc.repeat || sc.stuck { sc.repeat = false } else { - sc.rType, _ = sc.parser.HandleRune(r) - } - if sc.rType == RuneTypeEOF { - _ = sc.inner.UnreadRune() + var err error + if s > 0 { + sc.rType, err = sc.parser.HandleRune(r) + } else { + sc.rType, err = sc.parser.HandleEOF() + } + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.inner.InputOffset(), + Err: err, + } + } else { + sc.rErr = nil + } } - t = sc.rType + sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError + t, e = sc.rType, sc.rErr } + + // Check if we need to truncate the result. if t == RuneTypeEOF { + if s > 0 { + _ = sc.inner.UnreadRune() + } return 0, 0, RuneTypeEOF, nil } @@ -231,8 +252,9 @@ func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) { } func (sc *elemRuneTypeScanner) UnreadRune() error { + ret := sc.inner.UnreadRune() sc.repeat = true - return sc.inner.UnreadRune() + return ret } func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } diff --git a/decode_scan_test.go b/decode_scan_test.go index 8bc33e3..6fd9369 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -26,14 +26,16 @@ func (r ReadRuneTypeResult) String() string { } type runeTypeScannerTestcase struct { - Input string - Exp []ReadRuneTypeResult + Input string + ExpRemainder string + Exp []ReadRuneTypeResult } func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestcase, factory func(io.RuneReader) runeTypeScanner) { for tcName, tc := range testcases { t.Run(tcName, func(t *testing.T) { - sc := factory(strings.NewReader(tc.Input)) + reader := strings.NewReader(tc.Input) + sc := factory(reader) var exp, act []string for _, iExp := range tc.Exp { var iAct ReadRuneTypeResult @@ -47,13 +49,14 @@ func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestc act = append(act, iAct.String()) } assert.Equal(t, exp, act) + assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():]) }) } } func TestRuneTypeScanner(t *testing.T) { testcases := map[string]runeTypeScannerTestcase{ - "basic": {`{"foo": 12.0}`, []ReadRuneTypeResult{ + "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ {'{', 1, RuneTypeObjectBeg, nil}, {'"', 1, RuneTypeStringBeg, nil}, {'f', 1, RuneTypeStringChar, nil}, @@ -70,7 +73,7 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeEOF, nil}, {0, 0, RuneTypeEOF, nil}, }}, - "unread": {`{"foo": 12.0}`, []ReadRuneTypeResult{ + "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ {'{', 1, RuneTypeObjectBeg, nil}, {'"', 1, RuneTypeStringBeg, nil}, {'f', 1, RuneTypeStringChar, nil}, @@ -89,7 +92,7 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeEOF, nil}, {0, 0, RuneTypeEOF, nil}, }}, - "unread2": {`{"foo": 12.0}`, []ReadRuneTypeResult{ + "unread2": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ {'{', 1, RuneTypeObjectBeg, nil}, {'"', 1, RuneTypeStringBeg, nil}, {'f', 1, RuneTypeStringChar, nil}, @@ -109,7 +112,7 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeEOF, nil}, {0, 0, RuneTypeEOF, nil}, }}, - "unread-eof": {`{"foo": 12.0}`, []ReadRuneTypeResult{ + "unread-eof": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ {'{', 1, RuneTypeObjectBeg, nil}, {'"', 1, RuneTypeStringBeg, nil}, {'f', 1, RuneTypeStringChar, nil}, @@ -128,12 +131,26 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeEOF, nil}, {0, 0, RuneTypeEOF, nil}, }}, - "syntax-error": {`[[0,]`, []ReadRuneTypeResult{ + "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ {'[', 1, RuneTypeArrayBeg, nil}, {'[', 1, RuneTypeArrayBeg, nil}, {'0', 1, RuneTypeNumberIntZero, nil}, {',', 1, RuneTypeArrayComma, nil}, {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + }}, + "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ + {'1', 1, RuneTypeNumberIntDig, nil}, + {'{', 1, RuneTypeEOF, nil}, + {'{', 1, RuneTypeEOF, nil}, + {'{', 1, RuneTypeEOF, nil}, + }}, + "early-eof": {`{`, ``, []ReadRuneTypeResult{ + {'{', 1, RuneTypeObjectBeg, nil}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, }}, } testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner { @@ -145,7 +162,7 @@ func TestRuneTypeScanner(t *testing.T) { func TestNoWSRuneTypeScanner(t *testing.T) { testcases := map[string]runeTypeScannerTestcase{ - "basic": {`{"foo": 12.0}`, []ReadRuneTypeResult{ + "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ {'{', 1, RuneTypeObjectBeg, nil}, {'"', 1, RuneTypeStringBeg, nil}, {'f', 1, RuneTypeStringChar, nil}, @@ -161,7 +178,7 @@ func TestNoWSRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeEOF, nil}, {0, 0, RuneTypeEOF, nil}, }}, - "unread": {`{"foo": 12.0}`, []ReadRuneTypeResult{ + "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ {'{', 1, RuneTypeObjectBeg, nil}, {'"', 1, RuneTypeStringBeg, nil}, {'f', 1, RuneTypeStringChar, nil}, @@ -179,7 +196,7 @@ func TestNoWSRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeEOF, nil}, {0, 0, RuneTypeEOF, nil}, }}, - "tail": {`{"foo": 12.0} `, []ReadRuneTypeResult{ + "tail": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ {'{', 1, RuneTypeObjectBeg, nil}, {'"', 1, RuneTypeStringBeg, nil}, {'f', 1, RuneTypeStringChar, nil}, @@ -195,6 +212,18 @@ func TestNoWSRuneTypeScanner(t *testing.T) { {0, 0, RuneTypeEOF, nil}, {0, 0, RuneTypeEOF, nil}, }}, + "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ + {'1', 1, RuneTypeNumberIntDig, nil}, + {'{', 1, RuneTypeEOF, nil}, + {'{', 1, RuneTypeEOF, nil}, + {'{', 1, RuneTypeEOF, nil}, + }}, + "early-eof": {` {`, ``, []ReadRuneTypeResult{ + {'{', 1, RuneTypeObjectBeg, nil}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + }}, } testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner { return &noWSRuneTypeScanner{ @@ -206,6 +235,83 @@ func TestNoWSRuneTypeScanner(t *testing.T) { } func TestElemRuneTypeScanner(t *testing.T) { + testcases := map[string]runeTypeScannerTestcase{ + "basic": {`1`, ``, []ReadRuneTypeResult{ + {'1', 1, RuneTypeNumberIntDig, nil}, + {0, 0, RuneTypeEOF, nil}, + {0, 0, RuneTypeEOF, nil}, + {0, 0, RuneTypeEOF, nil}, + }}, + "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ + {'[', 1, RuneTypeArrayBeg, nil}, + {'[', 1, RuneTypeArrayBeg, nil}, + {'0', 1, RuneTypeNumberIntZero, nil}, + {',', 1, RuneTypeArrayComma, nil}, + {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + {']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, + }}, + "multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{ + {'1', 1, RuneTypeNumberIntDig, nil}, + {0, 0, RuneTypeEOF, nil}, + {0, 0, RuneTypeEOF, nil}, + {0, 0, RuneTypeEOF, nil}, + }}, + "fragment": {`1,`, `,`, []ReadRuneTypeResult{ + {'1', 1, RuneTypeNumberIntDig, nil}, + {0, 0, RuneTypeEOF, nil}, + {0, 0, RuneTypeEOF, nil}, + {0, 0, RuneTypeEOF, nil}, + }}, + "early-eof": {`{`, ``, []ReadRuneTypeResult{ + {'{', 1, RuneTypeObjectBeg, nil}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + }}, + } + t.Run("top-level", func(t *testing.T) { + testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner { + return &elemRuneTypeScanner{ + inner: &noWSRuneTypeScanner{ + inner: &runeTypeScannerImpl{ + inner: reader, + }, + }, + } + }) + }) + + for tcName, tc := range testcases { + tc.Input = `[` + tc.Input + for _, res := range tc.Exp { + if se, ok := res.e.(*DecodeSyntaxError); ok { + se.Offset++ + } + } + testcases[tcName] = tc + } + t.Run("child", func(t *testing.T) { + testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner { + inner := &noWSRuneTypeScanner{ + inner: &runeTypeScannerImpl{ + inner: reader, + }, + } + var res ReadRuneTypeResult + res.r, res.s, res.t, res.e = inner.ReadRuneType() + require.Equal(t, + ReadRuneTypeResult{'[', 1, RuneTypeArrayBeg, nil}.String(), + res.String()) + + return &elemRuneTypeScanner{ + inner: inner, + } + }) + }) +} + +func TestElemRuneTypeScanner2(t *testing.T) { parent := &noWSRuneTypeScanner{ inner: &runeTypeScannerImpl{ inner: strings.NewReader(` { "foo" : 12.0 } `), diff --git a/decode_test.go b/decode_test.go new file mode 100644 index 0000000..8220e39 --- /dev/null +++ b/decode_test.go @@ -0,0 +1,21 @@ +// Copyright (C) 2022 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDecodeNumber(t *testing.T) { + r := strings.NewReader(`1{}`) + + var num int + assert.NoError(t, Decode(r, &num)) + assert.Equal(t, 1, num) + assert.Equal(t, 2, r.Len()) // check that it didn't read too far +} @@ -48,6 +48,7 @@ type DecodeSyntaxError struct { func (e *DecodeSyntaxError) Error() string { return fmt.Sprintf("json: syntax error at input byte %v: %v", e.Offset, e.Err) } +func (e *DecodeSyntaxError) Unwrap() error { return e.Err } // A *DecodeTypeError is returned from Decode if the JSON input is not // appropriate for the given Go type. |