Add tests for decode reading too far

author: Luke Shumaker <lukeshu@datawire.io> 2022-08-16 22:40:19 -0600
committer: Luke Shumaker <lukeshu@datawire.io> 2022-08-17 00:12:35 -0600
commit: e57bee02e02b7e3697d6c3cb8b75923a92100427 (patch)
tree: f32610f2b73fbea1f2a94e108fabca18d31d5d27
parent: 87b02577e50b76d373e3c6b921d776e39cb83346 (diff)
5 files changed, 185 insertions, 32 deletions
diff --git a/decode.go b/decode.go
index e42c115..a17a572 100644
--- a/decode.go
+++ b/decode.go
@@ -114,7 +114,10 @@ func (dec *Decoder) stackName() string {
 	return strings.Join(fields, ".")
 }
 
-func Decode(r io.Reader, ptr any) error {
+func Decode(r interface {
+	io.Reader
+	io.RuneScanner // enforce that the reader have .UnreadRune() so that we don't risk reading too far when decoding a number
+}, ptr any) error {
 	return NewDecoder(r).Decode(ptr)
 }
 
diff --git a/decode_scan.go b/decode_scan.go
index fee9ec6..9fa6181 100644
--- a/decode_scan.go
+++ b/decode_scan.go
@@ -11,13 +11,11 @@ import (
 
 type runeTypeScanner interface {
 	// The returned error is a *ReadError, a *SyntaxError, or nil.
-	// An EOF condition is represented either as
+	// An EOF condition is represented as one of:
 	//
-	//   (char, size, RuneTypeEOF, nil)
-	//
-	// or
-	//
-	//   (char, size, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
+	//   end of value but not file:  (_, >0, RuneTypeEOF, nil)
+	//   end of both value and file: (_, 0, RuneTypeEOF, nil)
+	//   end of file but not value:  (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF})
 	ReadRuneType() (rune, int, RuneType, error)
 	// The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil.
 	ReadRune() (rune, int, error)
@@ -31,6 +29,8 @@ type runeTypeScanner interface {
 type runeTypeScannerImpl struct {
 	inner io.RuneReader
 
+	initialized bool
+
 	parser Parser
 	offset int64
 
@@ -109,6 +109,7 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, RuneType, error) {
 			}
 		}
 	}
+	sc.initialized = true
 	sc.repeat = false
 	sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError
 	return sc.rRune, sc.rSize, sc.rType, sc.rErr
@@ -128,12 +129,13 @@ func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) {
 
 var ErrInvalidUnreadRune = errors.New("lowmemjson: invalid use of UnreadRune")
 
-// UnreadRune undoes a call to .ReadRune() or .ReadRuneType().  If the
-// last call to .ReadRune() or .ReadRuneType() has already been
-// unread, or if that call returned an error or RuneTypeEOF, then
-// ErrInvalidRune is returned.  Otherwise, nil is returned.
+// UnreadRune undoes a call to .ReadRune() or .ReadRuneType().
+//
+// If the last call to .ReadRune() or .ReadRuneType() has already been
+// unread, or if that call returned a rune with size 0, then
+// ErrInvalidUnreadRune is returned.  Otherwise, nil is returned.
 func (sc *runeTypeScannerImpl) UnreadRune() error {
-	if sc.stuck || sc.repeat {
+	if !sc.initialized || sc.repeat || sc.rSize == 0 {
 		return ErrInvalidUnreadRune
 	}
 	sc.repeat = true
@@ -191,27 +193,46 @@ type elemRuneTypeScanner struct {
 
 	parser Parser
 	repeat bool
+	stuck  bool
 	rType  RuneType
+	rErr   error
 }
 
 var _ runeTypeScanner = (*elemRuneTypeScanner)(nil)
 
 func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, RuneType, error) {
+	// Read it, run it through the parent's parser.
 	r, s, t, e := sc.inner.ReadRuneType()
 
-	// Check if we need to insert a premature EOF
-	if t != RuneTypeError && t != RuneTypeEOF {
-		if sc.repeat {
+	// Run it through our child parser.
+	if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) {
+		if sc.repeat || sc.stuck {
 			sc.repeat = false
 		} else {
-			sc.rType, _ = sc.parser.HandleRune(r)
-		}
-		if sc.rType == RuneTypeEOF {
-			_ = sc.inner.UnreadRune()
+			var err error
+			if s > 0 {
+				sc.rType, err = sc.parser.HandleRune(r)
+			} else {
+				sc.rType, err = sc.parser.HandleEOF()
+			}
+			if err != nil {
+				sc.rErr = &DecodeSyntaxError{
+					Offset: sc.inner.InputOffset(),
+					Err:    err,
+				}
+			} else {
+				sc.rErr = nil
+			}
 		}
-		t = sc.rType
+		sc.stuck = sc.rType == RuneTypeEOF || sc.rType == RuneTypeError
+		t, e = sc.rType, sc.rErr
 	}
+
+	// Check if we need to truncate the result.
 	if t == RuneTypeEOF {
+		if s > 0 {
+			_ = sc.inner.UnreadRune()
+		}
 		return 0, 0, RuneTypeEOF, nil
 	}
 
@@ -231,8 +252,9 @@ func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) {
 }
 
 func (sc *elemRuneTypeScanner) UnreadRune() error {
+	ret := sc.inner.UnreadRune()
 	sc.repeat = true
-	return sc.inner.UnreadRune()
+	return ret
 }
 
 func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() }
diff --git a/decode_scan_test.go b/decode_scan_test.go
index 8bc33e3..6fd9369 100644
--- a/decode_scan_test.go
+++ b/decode_scan_test.go
@@ -26,14 +26,16 @@ func (r ReadRuneTypeResult) String() string {
 }
 
 type runeTypeScannerTestcase struct {
-	Input string
-	Exp   []ReadRuneTypeResult
+	Input        string
+	ExpRemainder string
+	Exp          []ReadRuneTypeResult
 }
 
 func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestcase, factory func(io.RuneReader) runeTypeScanner) {
 	for tcName, tc := range testcases {
 		t.Run(tcName, func(t *testing.T) {
-			sc := factory(strings.NewReader(tc.Input))
+			reader := strings.NewReader(tc.Input)
+			sc := factory(reader)
 			var exp, act []string
 			for _, iExp := range tc.Exp {
 				var iAct ReadRuneTypeResult
@@ -47,13 +49,14 @@ func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestc
 				act = append(act, iAct.String())
 			}
 			assert.Equal(t, exp, act)
+			assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():])
 		})
 	}
 }
 
 func TestRuneTypeScanner(t *testing.T) {
 	testcases := map[string]runeTypeScannerTestcase{
-		"basic": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+		"basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
 			{'{', 1, RuneTypeObjectBeg, nil},
 			{'"', 1, RuneTypeStringBeg, nil},
 			{'f', 1, RuneTypeStringChar, nil},
@@ -70,7 +73,7 @@ func TestRuneTypeScanner(t *testing.T) {
 			{0, 0, RuneTypeEOF, nil},
 			{0, 0, RuneTypeEOF, nil},
 		}},
-		"unread": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+		"unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
 			{'{', 1, RuneTypeObjectBeg, nil},
 			{'"', 1, RuneTypeStringBeg, nil},
 			{'f', 1, RuneTypeStringChar, nil},
@@ -89,7 +92,7 @@ func TestRuneTypeScanner(t *testing.T) {
 			{0, 0, RuneTypeEOF, nil},
 			{0, 0, RuneTypeEOF, nil},
 		}},
-		"unread2": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+		"unread2": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
 			{'{', 1, RuneTypeObjectBeg, nil},
 			{'"', 1, RuneTypeStringBeg, nil},
 			{'f', 1, RuneTypeStringChar, nil},
@@ -109,7 +112,7 @@ func TestRuneTypeScanner(t *testing.T) {
 			{0, 0, RuneTypeEOF, nil},
 			{0, 0, RuneTypeEOF, nil},
 		}},
-		"unread-eof": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+		"unread-eof": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
 			{'{', 1, RuneTypeObjectBeg, nil},
 			{'"', 1, RuneTypeStringBeg, nil},
 			{'f', 1, RuneTypeStringChar, nil},
@@ -128,12 +131,26 @@ func TestRuneTypeScanner(t *testing.T) {
 			{0, 0, RuneTypeEOF, nil},
 			{0, 0, RuneTypeEOF, nil},
 		}},
-		"syntax-error": {`[[0,]`, []ReadRuneTypeResult{
+		"syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{
 			{'[', 1, RuneTypeArrayBeg, nil},
 			{'[', 1, RuneTypeArrayBeg, nil},
 			{'0', 1, RuneTypeNumberIntZero, nil},
 			{',', 1, RuneTypeArrayComma, nil},
 			{']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+			{']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+			{']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+		}},
+		"multi-value": {`1{}`, `}`, []ReadRuneTypeResult{
+			{'1', 1, RuneTypeNumberIntDig, nil},
+			{'{', 1, RuneTypeEOF, nil},
+			{'{', 1, RuneTypeEOF, nil},
+			{'{', 1, RuneTypeEOF, nil},
+		}},
+		"early-eof": {`{`, ``, []ReadRuneTypeResult{
+			{'{', 1, RuneTypeObjectBeg, nil},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
 		}},
 	}
 	testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
@@ -145,7 +162,7 @@ func TestRuneTypeScanner(t *testing.T) {
 
 func TestNoWSRuneTypeScanner(t *testing.T) {
 	testcases := map[string]runeTypeScannerTestcase{
-		"basic": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+		"basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
 			{'{', 1, RuneTypeObjectBeg, nil},
 			{'"', 1, RuneTypeStringBeg, nil},
 			{'f', 1, RuneTypeStringChar, nil},
@@ -161,7 +178,7 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
 			{0, 0, RuneTypeEOF, nil},
 			{0, 0, RuneTypeEOF, nil},
 		}},
-		"unread": {`{"foo": 12.0}`, []ReadRuneTypeResult{
+		"unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{
 			{'{', 1, RuneTypeObjectBeg, nil},
 			{'"', 1, RuneTypeStringBeg, nil},
 			{'f', 1, RuneTypeStringChar, nil},
@@ -179,7 +196,7 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
 			{0, 0, RuneTypeEOF, nil},
 			{0, 0, RuneTypeEOF, nil},
 		}},
-		"tail": {`{"foo": 12.0}  `, []ReadRuneTypeResult{
+		"tail": {`{"foo": 12.0}  `, ``, []ReadRuneTypeResult{
 			{'{', 1, RuneTypeObjectBeg, nil},
 			{'"', 1, RuneTypeStringBeg, nil},
 			{'f', 1, RuneTypeStringChar, nil},
@@ -195,6 +212,18 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
 			{0, 0, RuneTypeEOF, nil},
 			{0, 0, RuneTypeEOF, nil},
 		}},
+		"multi-value": {`1{}`, `}`, []ReadRuneTypeResult{
+			{'1', 1, RuneTypeNumberIntDig, nil},
+			{'{', 1, RuneTypeEOF, nil},
+			{'{', 1, RuneTypeEOF, nil},
+			{'{', 1, RuneTypeEOF, nil},
+		}},
+		"early-eof": {` {`, ``, []ReadRuneTypeResult{
+			{'{', 1, RuneTypeObjectBeg, nil},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}},
+		}},
 	}
 	testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
 		return &noWSRuneTypeScanner{
@@ -206,6 +235,83 @@ func TestNoWSRuneTypeScanner(t *testing.T) {
 }
 
 func TestElemRuneTypeScanner(t *testing.T) {
+	testcases := map[string]runeTypeScannerTestcase{
+		"basic": {`1`, ``, []ReadRuneTypeResult{
+			{'1', 1, RuneTypeNumberIntDig, nil},
+			{0, 0, RuneTypeEOF, nil},
+			{0, 0, RuneTypeEOF, nil},
+			{0, 0, RuneTypeEOF, nil},
+		}},
+		"syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{
+			{'[', 1, RuneTypeArrayBeg, nil},
+			{'[', 1, RuneTypeArrayBeg, nil},
+			{'0', 1, RuneTypeNumberIntZero, nil},
+			{',', 1, RuneTypeArrayComma, nil},
+			{']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+			{']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+			{']', 1, RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}},
+		}},
+		"multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{
+			{'1', 1, RuneTypeNumberIntDig, nil},
+			{0, 0, RuneTypeEOF, nil},
+			{0, 0, RuneTypeEOF, nil},
+			{0, 0, RuneTypeEOF, nil},
+		}},
+		"fragment": {`1,`, `,`, []ReadRuneTypeResult{
+			{'1', 1, RuneTypeNumberIntDig, nil},
+			{0, 0, RuneTypeEOF, nil},
+			{0, 0, RuneTypeEOF, nil},
+			{0, 0, RuneTypeEOF, nil},
+		}},
+		"early-eof": {`{`, ``, []ReadRuneTypeResult{
+			{'{', 1, RuneTypeObjectBeg, nil},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+			{0, 0, RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}},
+		}},
+	}
+	t.Run("top-level", func(t *testing.T) {
+		testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
+			return &elemRuneTypeScanner{
+				inner: &noWSRuneTypeScanner{
+					inner: &runeTypeScannerImpl{
+						inner: reader,
+					},
+				},
+			}
+		})
+	})
+
+	for tcName, tc := range testcases {
+		tc.Input = `[` + tc.Input
+		for _, res := range tc.Exp {
+			if se, ok := res.e.(*DecodeSyntaxError); ok {
+				se.Offset++
+			}
+		}
+		testcases[tcName] = tc
+	}
+	t.Run("child", func(t *testing.T) {
+		testRuneTypeScanner(t, testcases, func(reader io.RuneReader) runeTypeScanner {
+			inner := &noWSRuneTypeScanner{
+				inner: &runeTypeScannerImpl{
+					inner: reader,
+				},
+			}
+			var res ReadRuneTypeResult
+			res.r, res.s, res.t, res.e = inner.ReadRuneType()
+			require.Equal(t,
+				ReadRuneTypeResult{'[', 1, RuneTypeArrayBeg, nil}.String(),
+				res.String())
+
+			return &elemRuneTypeScanner{
+				inner: inner,
+			}
+		})
+	})
+}
+
+func TestElemRuneTypeScanner2(t *testing.T) {
 	parent := &noWSRuneTypeScanner{
 		inner: &runeTypeScannerImpl{
 			inner: strings.NewReader(` { "foo" : 12.0 } `),
diff --git a/decode_test.go b/decode_test.go
new file mode 100644
index 0000000..8220e39
--- /dev/null
+++ b/decode_test.go
@@ -0,0 +1,21 @@
+// Copyright (C) 2022  Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package lowmemjson
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestDecodeNumber(t *testing.T) {
+	r := strings.NewReader(`1{}`)
+
+	var num int
+	assert.NoError(t, Decode(r, &num))
+	assert.Equal(t, 1, num)
+	assert.Equal(t, 2, r.Len()) // check that it didn't read too far
+}
diff --git a/errors.go b/errors.go
index e71d79a..3978d62 100644
--- a/errors.go
+++ b/errors.go
@@ -48,6 +48,7 @@ type DecodeSyntaxError struct {
 func (e *DecodeSyntaxError) Error() string {
 	return fmt.Sprintf("json: syntax error at input byte %v: %v", e.Offset, e.Err)
 }
+func (e *DecodeSyntaxError) Unwrap() error { return e.Err }
 
 // A *DecodeTypeError is returned from Decode if the JSON input is not
 // appropriate for the given Go type.
author	Luke Shumaker <lukeshu@datawire.io>	2022-08-16 22:40:19 -0600
committer	Luke Shumaker <lukeshu@datawire.io>	2022-08-17 00:12:35 -0600
commit	e57bee02e02b7e3697d6c3cb8b75923a92100427 (patch)
tree	f32610f2b73fbea1f2a94e108fabca18d31d5d27
parent	87b02577e50b76d373e3c6b921d776e39cb83346 (diff)