From a4ffa83919375653415b670fdbf136e29c1b1c2c Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Thu, 23 Feb 2023 22:08:27 -0700
Subject: compat/json: Unify the error conversion

---
 compat/json/compat.go | 168 ++++++++++++++++++++++++++++----------------------
 1 file changed, 93 insertions(+), 75 deletions(-)

diff --git a/compat/json/compat.go b/compat/json/compat.go
index 695c1a8..c2d47c0 100644
--- a/compat/json/compat.go
+++ b/compat/json/compat.go
@@ -40,22 +40,100 @@ type (
 	// MarshalerError        = json.MarshalerError // Duplicated to access a private field.
 )
 
-// Encode wrappers ///////////////////////////////////////////////////
+// Error conversion //////////////////////////////////////////////////
 
-func convertEncodeError(err error) error {
-	if me, ok := err.(*lowmemjson.EncodeMethodError); ok {
-		err = &MarshalerError{
-			Type:       me.Type,
-			Err:        me.Err,
-			sourceFunc: me.SourceFunc,
+func convertError(err error, isUnmarshal bool) error {
+	switch err := err.(type) {
+	case nil:
+		return nil
+	case *lowmemjson.DecodeArgumentError:
+		return err
+	case *lowmemjson.DecodeError:
+		switch suberr := err.Err.(type) {
+		case *lowmemjson.DecodeReadError:
+			return err
+		case *lowmemjson.DecodeSyntaxError:
+			switch {
+			case errors.Is(err, io.EOF):
+				return io.EOF
+			case errors.Is(err, io.ErrUnexpectedEOF) && isUnmarshal:
+				return &SyntaxError{
+					msg:    "unexpected end of JSON input",
+					Offset: suberr.Offset,
+				}
+			default:
+				return &SyntaxError{
+					msg:    suberr.Err.Error(),
+					Offset: suberr.Offset + 1,
+				}
+			}
+		case *lowmemjson.DecodeTypeError:
+			switch subsuberr := suberr.Err.(type) {
+			case *UnmarshalTypeError:
+				// Populate the .Struct and .Field members.
+				subsuberr.Struct = err.FieldParent
+				subsuberr.Field = err.FieldName
+				return subsuberr
+			default:
+				switch {
+				case errors.Is(err, lowmemjson.ErrDecodeNonEmptyInterface),
+					errors.Is(err, strconv.ErrSyntax),
+					errors.Is(err, strconv.ErrRange):
+					return &UnmarshalTypeError{
+						Value:  suberr.JSONType,
+						Type:   suberr.GoType,
+						Offset: suberr.Offset,
+						Struct: err.FieldParent,
+						Field:  err.FieldName,
+					}
+				default:
+					return subsuberr
+				}
+			case nil, *lowmemjson.DecodeArgumentError:
+				return &UnmarshalTypeError{
+					Value:  suberr.JSONType,
+					Type:   suberr.GoType,
+					Offset: suberr.Offset,
+					Struct: err.FieldParent,
+					Field:  err.FieldName,
+				}
+			}
+		default:
+			panic(fmt.Errorf("should not happen: unexpected lowmemjson.DecodeError sub-type: %T: %w", suberr, err))
+		}
+	case *lowmemjson.EncodeWriteError:
+		return err
+	case *lowmemjson.EncodeTypeError:
+		return err
+	case *lowmemjson.EncodeValueError:
+		return err
+	case *lowmemjson.EncodeMethodError:
+		return &MarshalerError{
+			Type:       err.Type,
+			Err:        err.Err,
+			sourceFunc: err.SourceFunc,
+		}
+	case *lowmemjson.ReEncodeWriteError:
+		return err
+	case *lowmemjson.ReEncodeSyntaxError:
+		ret := &SyntaxError{
+			msg:    err.Err.Error(),
+			Offset: err.Offset + 1,
 		}
+		if errors.Is(err, io.ErrUnexpectedEOF) {
+			ret.msg = "unexpected end of JSON input"
+		}
+		return ret
+	default:
+		panic(fmt.Errorf("should not happen: unexpected lowmemjson error type: %T: %w", err, err))
 	}
-	return err
 }
 
+// Encode wrappers ///////////////////////////////////////////////////
+
 func marshal(v any, cfg lowmemjson.ReEncoderConfig) ([]byte, error) {
 	var buf bytes.Buffer
-	if err := convertEncodeError(lowmemjson.NewEncoder(lowmemjson.NewReEncoder(&buf, cfg)).Encode(v)); err != nil {
+	if err := convertError(lowmemjson.NewEncoder(lowmemjson.NewReEncoder(&buf, cfg)).Encode(v), false); err != nil {
 		return nil, err
 	}
 	return buf.Bytes(), nil
@@ -105,7 +183,7 @@ func (enc *Encoder) refreshConfig() {
 }
 
 func (enc *Encoder) Encode(v any) error {
-	if err := convertEncodeError(enc.encoder.Encode(v)); err != nil {
+	if err := convertError(enc.encoder.Encode(v), false); err != nil {
 		enc.buf.Reset()
 		return err
 	}
@@ -133,19 +211,6 @@ func (enc *Encoder) SetIndent(prefix, indent string) {
 
 // ReEncode wrappers /////////////////////////////////////////////////
 
-func convertReEncodeError(err error) error {
-	if se, ok := err.(*lowmemjson.ReEncodeSyntaxError); ok {
-		err = &SyntaxError{
-			msg:    se.Err.Error(),
-			Offset: se.Offset + 1,
-		}
-		if errors.Is(se.Err, io.ErrUnexpectedEOF) {
-			err.(*SyntaxError).msg = "unexpected end of JSON input"
-		}
-	}
-	return err
-}
-
 func HTMLEscape(dst *bytes.Buffer, src []byte) {
 	for n := 0; n < len(src); {
 		c, size := utf8.DecodeRune(src[n:])
@@ -172,7 +237,7 @@ func reencode(dst io.Writer, src []byte, cfg lowmemjson.ReEncoderConfig) error {
 	if err == nil {
 		err = formatter.Close()
 	}
-	return convertReEncodeError(err)
+	return convertError(err, false)
 }
 
 func Compact(dst *bytes.Buffer, src []byte) error {
@@ -237,53 +302,6 @@ func Valid(data []byte) bool {
 
 // Decode wrappers ///////////////////////////////////////////////////
 
-func convertDecodeError(err error, isUnmarshal bool) error {
-	if derr, ok := err.(*lowmemjson.DecodeError); ok {
-		switch terr := derr.Err.(type) {
-		case *lowmemjson.DecodeSyntaxError:
-			switch {
-			case errors.Is(terr.Err, io.EOF):
-				err = io.EOF
-			case errors.Is(terr.Err, io.ErrUnexpectedEOF) && isUnmarshal:
-				err = &SyntaxError{
-					msg:    "unexpected end of JSON input",
-					Offset: terr.Offset,
-				}
-			default:
-				err = &SyntaxError{
-					msg:    terr.Err.Error(),
-					Offset: terr.Offset + 1,
-				}
-			}
-		case *lowmemjson.DecodeTypeError:
-			if typeErr, ok := terr.Err.(*json.UnmarshalTypeError); ok {
-				err = &UnmarshalTypeError{
-					Value:  typeErr.Value,
-					Type:   typeErr.Type,
-					Offset: typeErr.Offset,
-					Struct: derr.FieldParent,
-					Field:  derr.FieldName,
-				}
-			} else if _, isArgErr := terr.Err.(*lowmemjson.DecodeArgumentError); terr.Err != nil &&
-				!isArgErr &&
-				!errors.Is(terr.Err, lowmemjson.ErrDecodeNonEmptyInterface) &&
-				!errors.Is(terr.Err, strconv.ErrSyntax) &&
-				!errors.Is(terr.Err, strconv.ErrRange) {
-				err = terr.Err
-			} else {
-				err = &UnmarshalTypeError{
-					Value:  terr.JSONType,
-					Type:   terr.GoType,
-					Offset: terr.Offset,
-					Struct: derr.FieldParent,
-					Field:  derr.FieldName,
-				}
-			}
-		}
-	}
-	return err
-}
-
 type decodeValidator struct{}
 
 func (*decodeValidator) DecodeJSON(r io.RuneScanner) error {
@@ -301,10 +319,10 @@ func (*decodeValidator) DecodeJSON(r io.RuneScanner) error {
 var _ lowmemjson.Decodable = (*decodeValidator)(nil)
 
 func Unmarshal(data []byte, ptr any) error {
-	if err := convertDecodeError(lowmemjson.NewDecoder(bytes.NewReader(data)).DecodeThenEOF(&decodeValidator{}), true); err != nil {
+	if err := convertError(lowmemjson.NewDecoder(bytes.NewReader(data)).DecodeThenEOF(&decodeValidator{}), true); err != nil {
 		return err
 	}
-	if err := convertDecodeError(lowmemjson.NewDecoder(bytes.NewReader(data)).DecodeThenEOF(ptr), true); err != nil {
+	if err := convertError(lowmemjson.NewDecoder(bytes.NewReader(data)).DecodeThenEOF(ptr), true); err != nil {
 		return err
 	}
 	return nil
@@ -363,10 +381,10 @@ func NewDecoder(r io.Reader) *Decoder {
 }
 
 func (dec *Decoder) Decode(ptr any) error {
-	if err := convertDecodeError(dec.validator.Decode(&decodeValidator{}), false); err != nil {
+	if err := convertError(dec.validator.Decode(&decodeValidator{}), false); err != nil {
 		return err
 	}
-	if err := convertDecodeError(dec.Decoder.Decode(ptr), false); err != nil {
+	if err := convertError(dec.Decoder.Decode(ptr), false); err != nil {
 		return err
 	}
 	return nil
-- 
cgit v1.2.3


From 5c33db4970fcb35ae3823d61f9576fd65b5fcbdf Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 24 Feb 2023 20:51:49 -0700
Subject: decode: Touch up a panic error message

---
 decode.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/decode.go b/decode.go
index d4ecd4f..44eaba4 100644
--- a/decode.go
+++ b/decode.go
@@ -316,7 +316,7 @@ func (dec *Decoder) expectRuneOrPanic(ec rune, et jsonparse.RuneType) *DecodeErr
 		return err
 	}
 	if ac != ec || at != et {
-		panic(fmt.Errorf("should not happen: expected %q/%v but got %q/%v", ec, et, ac, at))
+		panic(fmt.Errorf("should not happen: expected r=%q t=%#v but got r=%q t=%#v", ec, et, ac, at))
 	}
 	return nil
 }
-- 
cgit v1.2.3


From d35495540df2b6d3ba16c84ce21627d9dbae000c Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Fri, 10 Feb 2023 23:38:26 -0700
Subject: Fuzz for equivalence between stdlib and lowmemjson

---
 compat/json/equiv_test.go                          | 160 +++++++++++++++++++++
 .../json/testdata/fuzz/FuzzEquiv/0064ebc3507e959b  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/19981bffc2abbaf1  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/57365320c0968611  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/5cd6893f25481dae  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/6a6612e05e0f9e32  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/77e6e971d8684f84  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/8727b16d337d7b81  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/96aac43014471adc  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/9cc52906ed53ef5f  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/a0b9ecf4e99fd85d  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/a5775dd298b90a6c  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/af9bedcb9e0a31e8  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/f6b0960dd3331a00  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/fbbce5ea61559cc6  |   2 +
 .../json/testdata/fuzz/FuzzEquiv/fd29ccbb2af92d4f  |   2 +
 16 files changed, 190 insertions(+)
 create mode 100644 compat/json/equiv_test.go
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/0064ebc3507e959b
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/19981bffc2abbaf1
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/57365320c0968611
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/5cd6893f25481dae
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/6a6612e05e0f9e32
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/77e6e971d8684f84
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/8727b16d337d7b81
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/96aac43014471adc
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/9cc52906ed53ef5f
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/a0b9ecf4e99fd85d
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/a5775dd298b90a6c
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/af9bedcb9e0a31e8
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/f6b0960dd3331a00
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/fbbce5ea61559cc6
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/fd29ccbb2af92d4f

diff --git a/compat/json/equiv_test.go b/compat/json/equiv_test.go
new file mode 100644
index 0000000..246e4b3
--- /dev/null
+++ b/compat/json/equiv_test.go
@@ -0,0 +1,160 @@
+// Copyright (C) 2023  Luke Shumaker <lukeshu@lukeshu.com>
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+package json_test
+
+import (
+	"bytes"
+	std "encoding/json"
+	"errors"
+	"io"
+	"strconv"
+	"strings"
+	"testing"
+	"unicode/utf8"
+
+	"github.com/stretchr/testify/assert"
+
+	low "git.lukeshu.com/go/lowmemjson/compat/json"
+)
+
+func assertEquivErr(t *testing.T, stdErr, lowErr error) {
+	if (stdErr == nil) || (lowErr == nil) {
+		// Nil-equal.
+		assert.Equal(t, stdErr, lowErr)
+		return
+	}
+	switch stdErr.(type) {
+	case *std.SyntaxError:
+		if lowErr != nil {
+			stdMsg := stdErr.Error()
+			lowMsg := lowErr.Error()
+
+			// https://github.com/golang/go/issues/58680
+			if strings.HasPrefix(stdMsg, `invalid character ' ' `) &&
+				(errors.Is(lowErr, io.ErrUnexpectedEOF) || lowMsg == "unexpected end of JSON input") {
+				return
+			}
+
+			// https://github.com/golang/go/issues/58713
+			prefix := `invalid character '`
+			if stdMsg != lowMsg && strings.HasPrefix(stdMsg, prefix) && strings.HasPrefix(lowMsg, prefix) {
+				stdRune, stdRuneSize := utf8.DecodeRuneInString(stdMsg[len(prefix):])
+				lowByte := lowMsg[len(prefix)]
+				if lowByte == '\\' {
+					switch lowMsg[len(prefix)+1] {
+					case 'u':
+						lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 32)
+						var buf [4]byte
+						utf8.EncodeRune(buf[:], rune(lowRune))
+						lowByte = buf[0]
+					case 'U':
+						lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:8], 16, 32)
+						var buf [4]byte
+						utf8.EncodeRune(buf[:], rune(lowRune))
+						lowByte = buf[0]
+					}
+				}
+				if stdRune == rune(lowByte) {
+					lowRuneStr := lowMsg[len(prefix):]
+					lowRuneStr = lowRuneStr[:strings.IndexByte(lowRuneStr, '\'')]
+					stdMsg = prefix + lowRuneStr + stdMsg[len(prefix)+stdRuneSize:]
+					stdErr = errors.New(stdMsg)
+				}
+			}
+		}
+		// Text-equal.
+		assert.Equal(t, stdErr.Error(), lowErr.Error())
+		// TODO: Assert that they are deep-equal (but be permissive of these not being type aliases).
+	case *std.MarshalerError:
+		// Text-equal.
+		assert.Equal(t, stdErr.Error(), lowErr.Error())
+		// TODO: Assert that they are deep-equal (but be permissive of these not being type aliases).
+	default:
+		// Text-equal.
+		assert.Equal(t, stdErr.Error(), lowErr.Error())
+		// TODO: Assert that they are deep-equal.
+	}
+}
+
+func FuzzEquiv(f *testing.F) {
+	f.Fuzz(func(t *testing.T, str []byte) {
+		t.Logf("str=%q", str)
+		t.Run("HTMLEscape", func(t *testing.T) {
+			var stdOut bytes.Buffer
+			std.HTMLEscape(&stdOut, str)
+
+			var lowOut bytes.Buffer
+			low.HTMLEscape(&lowOut, str)
+
+			assert.Equal(t, stdOut.String(), lowOut.String())
+		})
+		t.Run("Compact", func(t *testing.T) {
+			var stdOut bytes.Buffer
+			stdErr := std.Compact(&stdOut, str)
+
+			var lowOut bytes.Buffer
+			lowErr := low.Compact(&lowOut, str)
+
+			assert.Equal(t, stdOut.String(), lowOut.String())
+			assertEquivErr(t, stdErr, lowErr)
+		})
+		t.Run("Indent", func(t *testing.T) {
+			var stdOut bytes.Buffer
+			stdErr := std.Indent(&stdOut, str, "»", "\t")
+
+			var lowOut bytes.Buffer
+			lowErr := low.Indent(&lowOut, str, "»", "\t")
+
+			assert.Equal(t, stdOut.String(), lowOut.String())
+			assertEquivErr(t, stdErr, lowErr)
+		})
+		t.Run("Valid", func(t *testing.T) {
+			stdValid := std.Valid(str) && utf8.Valid(str) // https://github.com/golang/go/issues/58517
+			lowValid := low.Valid(str)
+			assert.Equal(t, stdValid, lowValid)
+		})
+		t.Run("Decode-Encode", func(t *testing.T) {
+			var stdObj any
+			stdErr := std.NewDecoder(bytes.NewReader(str)).Decode(&stdObj)
+
+			var lowObj any
+			lowErr := low.NewDecoder(bytes.NewReader(str)).Decode(&lowObj)
+
+			assert.Equal(t, stdObj, lowObj)
+			assertEquivErr(t, stdErr, lowErr)
+			if t.Failed() {
+				return
+			}
+
+			var stdOut bytes.Buffer
+			stdErr = std.NewEncoder(&stdOut).Encode(stdObj)
+
+			var lowOut bytes.Buffer
+			lowErr = low.NewEncoder(&lowOut).Encode(lowObj)
+
+			assert.Equal(t, stdOut.String(), lowOut.String())
+			assertEquivErr(t, stdErr, lowErr)
+		})
+		t.Run("Unmarshal-Marshal", func(t *testing.T) {
+			var stdObj any
+			stdErr := std.Unmarshal(str, &stdObj)
+
+			var lowObj any
+			lowErr := low.Unmarshal(str, &lowObj)
+
+			assert.Equal(t, stdObj, lowObj)
+			assertEquivErr(t, stdErr, lowErr)
+			if t.Failed() {
+				return
+			}
+
+			stdOut, stdErr := std.Marshal(stdObj)
+			lowOut, lowErr := low.Marshal(lowObj)
+
+			assert.Equal(t, string(stdOut), string(lowOut))
+			assertEquivErr(t, stdErr, lowErr)
+		})
+	})
+}
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/0064ebc3507e959b b/compat/json/testdata/fuzz/FuzzEquiv/0064ebc3507e959b
new file mode 100644
index 0000000..96e9e53
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/0064ebc3507e959b
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("𐠁")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/19981bffc2abbaf1 b/compat/json/testdata/fuzz/FuzzEquiv/19981bffc2abbaf1
new file mode 100644
index 0000000..ecbe8af
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/19981bffc2abbaf1
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("A")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/57365320c0968611 b/compat/json/testdata/fuzz/FuzzEquiv/57365320c0968611
new file mode 100644
index 0000000..5aace7f
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/57365320c0968611
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("[200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000]")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/5cd6893f25481dae b/compat/json/testdata/fuzz/FuzzEquiv/5cd6893f25481dae
new file mode 100644
index 0000000..a51778b
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/5cd6893f25481dae
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("0E00")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/6a6612e05e0f9e32 b/compat/json/testdata/fuzz/FuzzEquiv/6a6612e05e0f9e32
new file mode 100644
index 0000000..fe2e128
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/6a6612e05e0f9e32
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"\\uD800\"")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/77e6e971d8684f84 b/compat/json/testdata/fuzz/FuzzEquiv/77e6e971d8684f84
new file mode 100644
index 0000000..e3c530f
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/77e6e971d8684f84
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\uebae")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/8727b16d337d7b81 b/compat/json/testdata/fuzz/FuzzEquiv/8727b16d337d7b81
new file mode 100644
index 0000000..e8000f3
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/8727b16d337d7b81
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("00")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/96aac43014471adc b/compat/json/testdata/fuzz/FuzzEquiv/96aac43014471adc
new file mode 100644
index 0000000..9461c7a
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/96aac43014471adc
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"\\")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/9cc52906ed53ef5f b/compat/json/testdata/fuzz/FuzzEquiv/9cc52906ed53ef5f
new file mode 100644
index 0000000..1edfb06
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/9cc52906ed53ef5f
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/a0b9ecf4e99fd85d b/compat/json/testdata/fuzz/FuzzEquiv/a0b9ecf4e99fd85d
new file mode 100644
index 0000000..b3c523c
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/a0b9ecf4e99fd85d
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("0.")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/a5775dd298b90a6c b/compat/json/testdata/fuzz/FuzzEquiv/a5775dd298b90a6c
new file mode 100644
index 0000000..ca6f6f5
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/a5775dd298b90a6c
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"\\u")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/af9bedcb9e0a31e8 b/compat/json/testdata/fuzz/FuzzEquiv/af9bedcb9e0a31e8
new file mode 100644
index 0000000..778cc61
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/af9bedcb9e0a31e8
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("0 ")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/f6b0960dd3331a00 b/compat/json/testdata/fuzz/FuzzEquiv/f6b0960dd3331a00
new file mode 100644
index 0000000..9644b51
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/f6b0960dd3331a00
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"0\x85\xcd\xc0\xf3\xcb\xc1\xb3\xf2\xf5\xa4\xc1\xd40\xba\xe9\"")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/fbbce5ea61559cc6 b/compat/json/testdata/fuzz/FuzzEquiv/fbbce5ea61559cc6
new file mode 100644
index 0000000..712fab9
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/fbbce5ea61559cc6
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\U00054516")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/fd29ccbb2af92d4f b/compat/json/testdata/fuzz/FuzzEquiv/fd29ccbb2af92d4f
new file mode 100644
index 0000000..9dc2675
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/fd29ccbb2af92d4f
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("Ǒ")
-- 
cgit v1.2.3


From 051f966039028d257f27fc3a42c10cbff9f7c738 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Thu, 23 Feb 2023 21:30:12 -0700
Subject: decode: Include the invalid UTF-8 byte in error messages

---
 ReleaseNotes.md                                    |  4 ++
 compat/json/compat.go                              | 33 ++++++++++++++--
 compat/json/compat_test.go                         | 46 ++++++++++++----------
 .../json/testdata/fuzz/FuzzEquiv/9e35149f0eb0866b  |  2 +
 decode_scan.go                                     | 15 +++++++
 5 files changed, 75 insertions(+), 25 deletions(-)
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/9e35149f0eb0866b

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 48982e4..af2adcc 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -14,6 +14,10 @@
      then the first type error encountered is returned.  This is
      consistent with the behavior of `encoding/json`.
 
+   - Bugfix: Decoder: If there is a syntax error in a byte that
+     invalid UTF-8, include that byte value in the error message
+     rather than including the U+FFFD Unicode replacement character.
+
 # v0.3.7 (2023-02-20)
 
   Theme: Fixes from fuzzing (part 1?)
diff --git a/compat/json/compat.go b/compat/json/compat.go
index c2d47c0..6f13fbb 100644
--- a/compat/json/compat.go
+++ b/compat/json/compat.go
@@ -329,7 +329,10 @@ func Unmarshal(data []byte, ptr any) error {
 }
 
 type teeRuneScanner struct {
-	src      io.RuneScanner
+	src interface {
+		io.RuneScanner
+		io.ByteScanner
+	}
 	dst      *bytes.Buffer
 	lastSize int
 }
@@ -337,11 +340,14 @@ type teeRuneScanner struct {
 func (tee *teeRuneScanner) ReadRune() (r rune, size int, err error) {
 	r, size, err = tee.src.ReadRune()
 	if err == nil {
-		if _, err := tee.dst.WriteRune(r); err != nil {
-			return 0, 0, err
+		if r == utf8.RuneError && size == 1 {
+			_ = tee.src.UnreadRune()
+			b, _ := tee.src.ReadByte()
+			_ = tee.dst.WriteByte(b)
+		} else {
+			_, _ = tee.dst.WriteRune(r)
 		}
 	}
-
 	tee.lastSize = size
 	return
 }
@@ -356,6 +362,25 @@ func (tee *teeRuneScanner) UnreadRune() error {
 	return nil
 }
 
+func (tee *teeRuneScanner) ReadByte() (b byte, err error) {
+	b, err = tee.src.ReadByte()
+	if err == nil {
+		_ = tee.dst.WriteByte(b)
+		tee.lastSize = 1
+	}
+	return
+}
+
+func (tee *teeRuneScanner) UnreadByte() error {
+	if tee.lastSize != 1 {
+		return lowmemjson.ErrInvalidUnreadRune
+	}
+	_ = tee.src.UnreadByte()
+	tee.dst.Truncate(tee.dst.Len() - tee.lastSize)
+	tee.lastSize = 0
+	return nil
+}
+
 type Decoder struct {
 	validatorBuf *bufio.Reader
 	validator    *lowmemjson.Decoder
diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index 098ac85..6aab103 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -72,13 +72,14 @@ func TestCompatCompact(t *testing.T) {
 		Err string
 	}
 	testcases := map[string]testcase{
-		"trunc":     {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
-		"object":    {In: `{}`, Out: `{}`},
-		"non-utf8":  {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
-		"float":     {In: `1.200e003`, Out: `1.200e003`},
-		"hex-lower": {In: `"\uabcd"`, Out: `"\uabcd"`},
-		"hex-upper": {In: `"\uABCD"`, Out: `"\uABCD"`},
-		"hex-mixed": {In: `"\uAbCd"`, Out: `"\uAbCd"`},
+		"trunc":        {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
+		"object":       {In: `{}`, Out: `{}`},
+		"non-utf8":     {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
+		"float":        {In: `1.200e003`, Out: `1.200e003`},
+		"hex-lower":    {In: `"\uabcd"`, Out: `"\uabcd"`},
+		"hex-upper":    {In: `"\uABCD"`, Out: `"\uABCD"`},
+		"hex-mixed":    {In: `"\uAbCd"`, Out: `"\uAbCd"`},
+		"invalid-utf8": {In: "\x85", Err: `invalid character '\u0085' looking for beginning of value`},
 	}
 	for tcName, tc := range testcases {
 		tc := tc
@@ -105,20 +106,21 @@ func TestCompatIndent(t *testing.T) {
 		Err string
 	}
 	testcases := map[string]testcase{
-		"trunc":     {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
-		"object":    {In: `{}`, Out: `{}`},
-		"non-utf8":  {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
-		"float":     {In: `1.200e003`, Out: `1.200e003`},
-		"tailws0":   {In: `0`, Out: `0`},
-		"tailws1":   {In: `0 `, Out: `0 `},
-		"tailws2":   {In: `0  `, Out: `0  `},
-		"tailws3":   {In: "0\n", Out: "0\n"},
-		"headws1":   {In: ` 0`, Out: `0`},
-		"objws1":    {In: `{"a"  :  1}`, Out: "{\n>.\"a\": 1\n>}"},
-		"objws2":    {In: "{\"a\"\n:\n1}", Out: "{\n>.\"a\": 1\n>}"},
-		"hex-lower": {In: `"\uabcd"`, Out: `"\uabcd"`},
-		"hex-upper": {In: `"\uABCD"`, Out: `"\uABCD"`},
-		"hex-mixed": {In: `"\uAbCd"`, Out: `"\uAbCd"`},
+		"trunc":        {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
+		"object":       {In: `{}`, Out: `{}`},
+		"non-utf8":     {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
+		"float":        {In: `1.200e003`, Out: `1.200e003`},
+		"tailws0":      {In: `0`, Out: `0`},
+		"tailws1":      {In: `0 `, Out: `0 `},
+		"tailws2":      {In: `0  `, Out: `0  `},
+		"tailws3":      {In: "0\n", Out: "0\n"},
+		"headws1":      {In: ` 0`, Out: `0`},
+		"objws1":       {In: `{"a"  :  1}`, Out: "{\n>.\"a\": 1\n>}"},
+		"objws2":       {In: "{\"a\"\n:\n1}", Out: "{\n>.\"a\": 1\n>}"},
+		"hex-lower":    {In: `"\uabcd"`, Out: `"\uabcd"`},
+		"hex-upper":    {In: `"\uABCD"`, Out: `"\uABCD"`},
+		"hex-mixed":    {In: `"\uAbCd"`, Out: `"\uAbCd"`},
+		"invalid-utf8": {In: "\x85", Err: `invalid character '\u0085' looking for beginning of value`},
 	}
 	for tcName, tc := range testcases {
 		tc := tc
@@ -181,6 +183,7 @@ func TestCompatUnmarshal(t *testing.T) {
 		"two-objs":             {In: `{} {}`, ExpOut: nil, ExpErr: `invalid character '{' after top-level value`},
 		"two-numbers1":         {In: `00`, ExpOut: nil, ExpErr: `invalid character '0' after top-level value`},
 		"two-numbers2":         {In: `1 2`, ExpOut: nil, ExpErr: `invalid character '2' after top-level value`},
+		"invalid-utf8":         {In: "\x85", ExpErr: `invalid character '\u0085' looking for beginning of value`},
 		// 2e308 is slightly more than math.MaxFloat64 (~1.79e308)
 		"obj-overflow":      {In: `{"foo":"bar", "baz":2e308, "qux": "orb"}`, ExpOut: map[string]any{"foo": "bar", "baz": nil, "qux": "orb"}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
 		"ary-overflow":      {In: `["foo",2e308,"bar",3e308]`, ExpOut: []any{"foo", nil, "bar", nil}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
@@ -223,6 +226,7 @@ func TestCompatDecode(t *testing.T) {
 		"two-objs":             {In: `{} {}`, ExpOut: map[string]any{}},
 		"two-numbers1":         {In: `00`, ExpOut: float64(0)},
 		"two-numbers2":         {In: `1 2`, ExpOut: float64(1)},
+		"invalid-utf8":         {In: "\x85", ExpErr: `invalid character '\u0085' looking for beginning of value`},
 		// 2e308 is slightly more than math.MaxFloat64 (~1.79e308)
 		"obj-overflow":      {In: `{"foo":"bar", "baz":2e308, "qux": "orb"}`, ExpOut: map[string]any{"foo": "bar", "baz": nil, "qux": "orb"}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
 		"ary-overflow":      {In: `["foo",2e308,"bar",3e308]`, ExpOut: []any{"foo", nil, "bar", nil}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/9e35149f0eb0866b b/compat/json/testdata/fuzz/FuzzEquiv/9e35149f0eb0866b
new file mode 100644
index 0000000..bb8752b
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/9e35149f0eb0866b
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\x85")
diff --git a/decode_scan.go b/decode_scan.go
index 63694c4..940de49 100644
--- a/decode_scan.go
+++ b/decode_scan.go
@@ -6,6 +6,7 @@ package lowmemjson
 
 import (
 	"io"
+	"unicode/utf8"
 
 	"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
 )
@@ -55,6 +56,17 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error)
 		sc.offset += int64(sc.rSize)
 		switch err {
 		case nil:
+			invalidUTF8 := false
+			if sc.rRune == utf8.RuneError && sc.rSize == 1 {
+				if bs, ok := sc.inner.(io.ByteScanner); ok {
+					_ = bs.UnreadByte() // UnreadRune doesn't back up the ReadByte-pos
+					b, _ := bs.ReadByte()
+					_ = bs.UnreadByte()
+					_, _, _ = sc.inner.ReadRune()
+					sc.rRune = rune(b)
+					invalidUTF8 = true
+				}
+			}
 			sc.rType, err = sc.parser.HandleRune(sc.rRune)
 			if err != nil {
 				sc.rErr = &DecodeSyntaxError{
@@ -62,6 +74,9 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error)
 					Err:    err,
 				}
 			} else {
+				if invalidUTF8 {
+					sc.rRune = utf8.RuneError
+				}
 				sc.rErr = nil
 			}
 			switch sc.rType {
-- 
cgit v1.2.3


From e38edfa53173c054ff97a5c51f90df0da60f16f5 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Tue, 14 Feb 2023 18:55:57 -0700
Subject: jsonparse: Reword error messages to match encoding/json

---
 ReleaseNotes.md                                      |  3 +++
 compat/json/compat_test.go                           | 18 ++++++++++++++++++
 compat/json/testdata/fuzz/FuzzEquiv/1071d2f6e5b5f7d3 |  2 ++
 compat/json/testdata/fuzz/FuzzEquiv/6bced2300496f15c |  2 ++
 compat/json/testdata/fuzz/FuzzEquiv/6daf246742074967 |  2 ++
 compat/json/testdata/fuzz/FuzzEquiv/7c3168c77fc059cb |  2 ++
 compat/json/testdata/fuzz/FuzzEquiv/a955c588d78b5c3a |  2 ++
 compat/json/testdata/fuzz/FuzzEquiv/cf667c6f1f3282c1 |  2 ++
 compat/json/testdata/fuzz/FuzzEquiv/ef2c8755a89034da |  2 ++
 internal/jsonparse/parse.go                          | 20 ++++++++++----------
 methods_test.go                                      |  4 ++--
 11 files changed, 47 insertions(+), 12 deletions(-)
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/1071d2f6e5b5f7d3
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/6bced2300496f15c
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/6daf246742074967
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/7c3168c77fc059cb
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/a955c588d78b5c3a
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/cf667c6f1f3282c1
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/ef2c8755a89034da

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index af2adcc..1981678 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -14,6 +14,9 @@
      then the first type error encountered is returned.  This is
      consistent with the behavior of `encoding/json`.
 
+   - Change: Several error strings have been reworded to match
+     `encoding/json`.
+
    - Bugfix: Decoder: If there is a syntax error in a byte that
      invalid UTF-8, include that byte value in the error message
      rather than including the U+FFFD Unicode replacement character.
diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index 6aab103..cf9e359 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -188,6 +188,24 @@ func TestCompatUnmarshal(t *testing.T) {
 		"obj-overflow":      {In: `{"foo":"bar", "baz":2e308, "qux": "orb"}`, ExpOut: map[string]any{"foo": "bar", "baz": nil, "qux": "orb"}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
 		"ary-overflow":      {In: `["foo",2e308,"bar",3e308]`, ExpOut: []any{"foo", nil, "bar", nil}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
 		"existing-overflow": {In: `2e308`, InPtr: func() any { x := 4; return &x }(), ExpOut: 4, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type int`},
+		// syntax error messages
+		"syntax-01": {In: `{}x`, ExpErr: `invalid character 'x' after top-level value`},
+		"syntax-02": {In: `x`, ExpErr: `invalid character 'x' looking for beginning of value`},
+		"syntax-03": {In: `{x`, ExpErr: `invalid character 'x' looking for beginning of object key string`},
+		"syntax-04": {In: `{""x`, ExpErr: `invalid character 'x' after object key`},
+		"syntax-05": {In: `{"":0x`, ExpErr: `invalid character 'x' after object key:value pair`},
+		"syntax-06": {In: `[0x`, ExpErr: `invalid character 'x' after array element`},
+		"syntax-07": {In: "\"\x01\"", ExpErr: `invalid character '\x01' in string literal`},
+		"syntax-08": {In: `"\x`, ExpErr: `invalid character 'x' in string escape code`},
+		"syntax-09": {In: `"\ux`, ExpErr: `invalid character 'x' in \u hexadecimal character escape`},
+		"syntax-10": {In: `"\u0x`, ExpErr: `invalid character 'x' in \u hexadecimal character escape`},
+		"syntax-11": {In: `"\u00x`, ExpErr: `invalid character 'x' in \u hexadecimal character escape`},
+		"syntax-12": {In: `"\u000x`, ExpErr: `invalid character 'x' in \u hexadecimal character escape`},
+		"syntax-13": {In: `-x`, ExpErr: `invalid character 'x' in numeric literal`},
+		"syntax-14": {In: `0.x`, ExpErr: `invalid character 'x' after decimal point in numeric literal`},
+		"syntax-15": {In: `1ex`, ExpErr: `invalid character 'x' in exponent of numeric literal`},
+		"syntax-16": {In: `1e+x`, ExpErr: `invalid character 'x' in exponent of numeric literal`},
+		"syntax-17": {In: `fx`, ExpErr: `invalid character 'x' in literal false (expecting 'a')`},
 	}
 	for tcName, tc := range testcases {
 		tc := tc
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/1071d2f6e5b5f7d3 b/compat/json/testdata/fuzz/FuzzEquiv/1071d2f6e5b5f7d3
new file mode 100644
index 0000000..1095817
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/1071d2f6e5b5f7d3
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("0EA")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/6bced2300496f15c b/compat/json/testdata/fuzz/FuzzEquiv/6bced2300496f15c
new file mode 100644
index 0000000..4bc9c61
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/6bced2300496f15c
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("{0")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/6daf246742074967 b/compat/json/testdata/fuzz/FuzzEquiv/6daf246742074967
new file mode 100644
index 0000000..b1c3453
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/6daf246742074967
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"\\uX")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/7c3168c77fc059cb b/compat/json/testdata/fuzz/FuzzEquiv/7c3168c77fc059cb
new file mode 100644
index 0000000..b95f079
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/7c3168c77fc059cb
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"\x1e")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/a955c588d78b5c3a b/compat/json/testdata/fuzz/FuzzEquiv/a955c588d78b5c3a
new file mode 100644
index 0000000..b135daa
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/a955c588d78b5c3a
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("0.A")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/cf667c6f1f3282c1 b/compat/json/testdata/fuzz/FuzzEquiv/cf667c6f1f3282c1
new file mode 100644
index 0000000..f6ab571
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/cf667c6f1f3282c1
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\"\\0")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/ef2c8755a89034da b/compat/json/testdata/fuzz/FuzzEquiv/ef2c8755a89034da
new file mode 100644
index 0000000..7d9478d
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/ef2c8755a89034da
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("0E+A")
diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go
index 6432d75..c641f1b 100644
--- a/internal/jsonparse/parse.go
+++ b/internal/jsonparse/parse.go
@@ -619,7 +619,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.popState()
 			return RuneTypeObjectEnd, nil
 		default:
-			return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c)
+			return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of object key string", c)
 		}
 	case RuneTypeStringEnd: // waiting for ':'
 		switch c {
@@ -682,7 +682,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 0x0020 <= c && c <= 0x10FFFF:
 			return RuneTypeStringChar, nil
 		default:
-			return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c)
+			return RuneTypeError, fmt.Errorf("invalid character %q in string literal", c)
 		}
 	case RuneTypeStringEsc: // waiting for escape char
 		switch c {
@@ -692,26 +692,26 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 'u':
 			return par.replaceState(RuneTypeStringEscU), nil
 		default:
-			return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c)
+			return RuneTypeError, fmt.Errorf("invalid character %q in string escape code", c)
 		}
 	case RuneTypeStringEscU:
 		if !isHex(c) {
-			return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
 		}
 		return par.replaceState(RuneTypeStringEscUA), nil
 	case RuneTypeStringEscUA:
 		if !isHex(c) {
-			return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
 		}
 		return par.replaceState(RuneTypeStringEscUB), nil
 	case RuneTypeStringEscUB:
 		if !isHex(c) {
-			return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
 		}
 		return par.replaceState(RuneTypeStringEscUC), nil
 	case RuneTypeStringEscUC:
 		if !isHex(c) {
-			return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c)
+			return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c)
 		}
 		par.replaceState(RuneTypeStringBeg)
 		return RuneTypeStringEscUD, nil
@@ -791,7 +791,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberFracDig), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+			return RuneTypeError, fmt.Errorf("invalid character %q after decimal point in numeric literal", c)
 		}
 	case RuneTypeNumberFracDig: // F
 		switch c {
@@ -810,14 +810,14 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberExpDig), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+			return RuneTypeError, fmt.Errorf("invalid character %q in exponent of numeric literal", c)
 		}
 	case RuneTypeNumberExpSign: // H
 		switch c {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberExpDig), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+			return RuneTypeError, fmt.Errorf("invalid character %q in exponent of numeric literal", c)
 		}
 	case RuneTypeNumberExpDig: // I
 		switch c {
diff --git a/methods_test.go b/methods_test.go
index 64af28c..23a6c4f 100644
--- a/methods_test.go
+++ b/methods_test.go
@@ -304,8 +304,8 @@ func TestMethodsDecode(t *testing.T) {
 	testcases := map[string]testcase{
 		"decode-basic":           {In: `{}`, Obj: &tstDecoder{n: 2}},
 		"decode-basic-eof":       {In: `{}`, Obj: &tstDecoder{n: 5}},
-		"decode-syntax-error":    {In: `{x}`, Obj: &tstDecoder{n: 5}, ExpectedErr: `json: v: syntax error at input byte 1: object: unexpected character: 'x'`},
-		"unmarshal-syntax-error": {In: `{x}`, Obj: &strUnmarshaler{}, ExpectedErr: `json: v: syntax error at input byte 1: object: unexpected character: 'x'`},
+		"decode-syntax-error":    {In: `{x}`, Obj: &tstDecoder{n: 5}, ExpectedErr: `json: v: syntax error at input byte 1: invalid character 'x' looking for beginning of object key string`},
+		"unmarshal-syntax-error": {In: `{x}`, Obj: &strUnmarshaler{}, ExpectedErr: `json: v: syntax error at input byte 1: invalid character 'x' looking for beginning of object key string`},
 		"decode-short":           {In: `{}`, Obj: &tstDecoder{n: 1}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.tstDecoder: did not consume entire object`},
 		"decode-err":             {In: `{}`, Obj: &tstDecoder{err: "xxx"}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.tstDecoder: xxx`},
 		"decode-err2":            {In: `{}`, Obj: &tstDecoder{n: 1, err: "yyy"}, ExpectedErr: `json: v: cannot decode JSON object at input byte 0 into Go *lowmemjson_test.tstDecoder: yyy`},
-- 
cgit v1.2.3


From 7301cd6c4e97b272bd708d4c87d26609510e6ca7 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Sat, 25 Feb 2023 17:12:39 -0700
Subject: jsonparse: Define an InvalidCharacterError type instead of using
 fmt.Errorf

---
 internal/jsonparse/parse.go | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go
index c641f1b..214e3ba 100644
--- a/internal/jsonparse/parse.go
+++ b/internal/jsonparse/parse.go
@@ -14,6 +14,15 @@ import (
 
 var ErrParserExceededMaxDepth = errors.New("exceeded max depth")
 
+type InvalidCharacterError struct {
+	Char  rune
+	Where string
+}
+
+func (e *InvalidCharacterError) Error() string {
+	return fmt.Sprintf("invalid character %q %s", e.Char, e.Where)
+}
+
 func isHex(c rune) bool {
 	return ('0' <= c && c <= '9') ||
 		('a' <= c && c <= 'f') ||
@@ -571,7 +580,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		if len(par.barriers) > 0 {
 			return RuneTypeEOF, nil
 		} else {
-			return RuneTypeError, fmt.Errorf("invalid character %q after top-level value", c)
+			return RuneTypeError, &InvalidCharacterError{c, "after top-level value"}
 		}
 	}
 	switch par.stack[len(par.stack)-1] {
@@ -605,7 +614,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 'n':
 			return par.replaceState(RuneTypeNullN), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of value", c)
+			return RuneTypeError, &InvalidCharacterError{c, "looking for beginning of value"}
 		}
 	// object //////////////////////////////////////////////////////////////////////////////////
 	case RuneTypeObjectBeg: // waiting for key to start or '}'
@@ -619,7 +628,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.popState()
 			return RuneTypeObjectEnd, nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q looking for beginning of object key string", c)
+			return RuneTypeError, &InvalidCharacterError{c, "looking for beginning of object key string"}
 		}
 	case RuneTypeStringEnd: // waiting for ':'
 		switch c {
@@ -630,7 +639,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.pushState(runeTypeAny)
 			return RuneTypeObjectColon, nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q after object key", c)
+			return RuneTypeError, &InvalidCharacterError{c, "after object key"}
 		}
 	case RuneTypeObjectComma: // waiting for ',' or '}'
 		switch c {
@@ -643,7 +652,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.popState()
 			return RuneTypeObjectEnd, nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q after object key:value pair", c)
+			return RuneTypeError, &InvalidCharacterError{c, "after object key:value pair"}
 		}
 	// array ///////////////////////////////////////////////////////////////////////////////////
 	case RuneTypeArrayBeg: // waiting for item to start or ']'
@@ -669,7 +678,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.popState()
 			return RuneTypeArrayEnd, nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q after array element", c)
+			return RuneTypeError, &InvalidCharacterError{c, "after array element"}
 		}
 	// string //////////////////////////////////////////////////////////////////////////////////
 	case RuneTypeStringBeg: // waiting for char or '"'
@@ -682,7 +691,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 0x0020 <= c && c <= 0x10FFFF:
 			return RuneTypeStringChar, nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in string literal", c)
+			return RuneTypeError, &InvalidCharacterError{c, "in string literal"}
 		}
 	case RuneTypeStringEsc: // waiting for escape char
 		switch c {
@@ -692,7 +701,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 'u':
 			return par.replaceState(RuneTypeStringEscU), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in string escape code", c)
+			return RuneTypeError, &InvalidCharacterError{c, "in string escape code"}
 		}
 	case RuneTypeStringEscU:
 		if !isHex(c) {
@@ -762,7 +771,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberIntDig), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in numeric literal", c)
+			return RuneTypeError, &InvalidCharacterError{c, "in numeric literal"}
 		}
 	case RuneTypeNumberIntZero: // C
 		switch c {
@@ -791,7 +800,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberFracDig), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q after decimal point in numeric literal", c)
+			return RuneTypeError, &InvalidCharacterError{c, "after decimal point in numeric literal"}
 		}
 	case RuneTypeNumberFracDig: // F
 		switch c {
@@ -810,14 +819,14 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberExpDig), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in exponent of numeric literal", c)
+			return RuneTypeError, &InvalidCharacterError{c, "in exponent of numeric literal"}
 		}
 	case RuneTypeNumberExpSign: // H
 		switch c {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberExpDig), nil
 		default:
-			return RuneTypeError, fmt.Errorf("invalid character %q in exponent of numeric literal", c)
+			return RuneTypeError, &InvalidCharacterError{c, "in exponent of numeric literal"}
 		}
 	case RuneTypeNumberExpDig: // I
 		switch c {
@@ -858,7 +867,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 
 func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) {
 	if c != exp {
-		return RuneTypeError, fmt.Errorf("invalid character %q in literal %s (expecting %q)", c, context, exp)
+		return RuneTypeError, &InvalidCharacterError{c, fmt.Sprintf("in literal %s (expecting %q)", context, exp)}
 	}
 	if pop {
 		par.popState()
-- 
cgit v1.2.3


From f68498a6fdb421483d9aebb45527452f6255bb68 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Sat, 25 Feb 2023 16:17:01 -0700
Subject: jsonparse: Don't show raw bytes as Unicode

---
 ReleaseNotes.md                  |  4 +++
 compat/json/compat_test.go       |  8 ++---
 compat/json/equiv_test.go        | 29 +++++++++++++++-
 compat/json/testcompat_test.go   |  2 +-
 decode_scan.go                   | 23 +++++++------
 internal/jsonparse/parse.go      | 73 +++++++++++++++++++++-------------------
 internal/jsonparse/parse_test.go |  2 +-
 reencode.go                      |  2 +-
 8 files changed, 90 insertions(+), 53 deletions(-)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index 1981678..e047f7d 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -21,6 +21,10 @@
      invalid UTF-8, include that byte value in the error message
      rather than including the U+FFFD Unicode replacement character.
 
+   - Bugfix: Syntax errors on raw-bytes (for invalid UTF-8) no longer
+     show the raw byte as a `\u00XX` Unicode codepoint, but now as a
+     `\xXX` byte.
+
 # v0.3.7 (2023-02-20)
 
   Theme: Fixes from fuzzing (part 1?)
diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index cf9e359..2380c53 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -79,7 +79,7 @@ func TestCompatCompact(t *testing.T) {
 		"hex-lower":    {In: `"\uabcd"`, Out: `"\uabcd"`},
 		"hex-upper":    {In: `"\uABCD"`, Out: `"\uABCD"`},
 		"hex-mixed":    {In: `"\uAbCd"`, Out: `"\uAbCd"`},
-		"invalid-utf8": {In: "\x85", Err: `invalid character '\u0085' looking for beginning of value`},
+		"invalid-utf8": {In: "\x85", Err: `invalid character '\x85' looking for beginning of value`},
 	}
 	for tcName, tc := range testcases {
 		tc := tc
@@ -120,7 +120,7 @@ func TestCompatIndent(t *testing.T) {
 		"hex-lower":    {In: `"\uabcd"`, Out: `"\uabcd"`},
 		"hex-upper":    {In: `"\uABCD"`, Out: `"\uABCD"`},
 		"hex-mixed":    {In: `"\uAbCd"`, Out: `"\uAbCd"`},
-		"invalid-utf8": {In: "\x85", Err: `invalid character '\u0085' looking for beginning of value`},
+		"invalid-utf8": {In: "\x85", Err: `invalid character '\x85' looking for beginning of value`},
 	}
 	for tcName, tc := range testcases {
 		tc := tc
@@ -183,7 +183,7 @@ func TestCompatUnmarshal(t *testing.T) {
 		"two-objs":             {In: `{} {}`, ExpOut: nil, ExpErr: `invalid character '{' after top-level value`},
 		"two-numbers1":         {In: `00`, ExpOut: nil, ExpErr: `invalid character '0' after top-level value`},
 		"two-numbers2":         {In: `1 2`, ExpOut: nil, ExpErr: `invalid character '2' after top-level value`},
-		"invalid-utf8":         {In: "\x85", ExpErr: `invalid character '\u0085' looking for beginning of value`},
+		"invalid-utf8":         {In: "\x85", ExpErr: `invalid character '\x85' looking for beginning of value`},
 		// 2e308 is slightly more than math.MaxFloat64 (~1.79e308)
 		"obj-overflow":      {In: `{"foo":"bar", "baz":2e308, "qux": "orb"}`, ExpOut: map[string]any{"foo": "bar", "baz": nil, "qux": "orb"}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
 		"ary-overflow":      {In: `["foo",2e308,"bar",3e308]`, ExpOut: []any{"foo", nil, "bar", nil}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
@@ -244,7 +244,7 @@ func TestCompatDecode(t *testing.T) {
 		"two-objs":             {In: `{} {}`, ExpOut: map[string]any{}},
 		"two-numbers1":         {In: `00`, ExpOut: float64(0)},
 		"two-numbers2":         {In: `1 2`, ExpOut: float64(1)},
-		"invalid-utf8":         {In: "\x85", ExpErr: `invalid character '\u0085' looking for beginning of value`},
+		"invalid-utf8":         {In: "\x85", ExpErr: `invalid character '\x85' looking for beginning of value`},
 		// 2e308 is slightly more than math.MaxFloat64 (~1.79e308)
 		"obj-overflow":      {In: `{"foo":"bar", "baz":2e308, "qux": "orb"}`, ExpOut: map[string]any{"foo": "bar", "baz": nil, "qux": "orb"}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
 		"ary-overflow":      {In: `["foo",2e308,"bar",3e308]`, ExpOut: []any{"foo", nil, "bar", nil}, ExpErr: `json: cannot unmarshal number 2e308 into Go value of type float64`},
diff --git a/compat/json/equiv_test.go b/compat/json/equiv_test.go
index 246e4b3..cb02f43 100644
--- a/compat/json/equiv_test.go
+++ b/compat/json/equiv_test.go
@@ -44,8 +44,27 @@ func assertEquivErr(t *testing.T, stdErr, lowErr error) {
 				lowByte := lowMsg[len(prefix)]
 				if lowByte == '\\' {
 					switch lowMsg[len(prefix)+1] {
+					case 'a':
+						lowByte = '\a'
+					case 'b':
+						lowByte = '\b'
+					case 'f':
+						lowByte = '\f'
+					case 'n':
+						lowByte = '\n'
+					case 'r':
+						lowByte = '\r'
+					case 't':
+						lowByte = '\t'
+					case 'v':
+						lowByte = '\v'
+					case '\\', '\'':
+						lowByte = lowMsg[len(prefix)+1]
+					case 'x':
+						lowByte64, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:2], 16, 8)
+						lowByte = byte(lowByte64)
 					case 'u':
-						lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 32)
+						lowRune, _ := strconv.ParseUint(lowMsg[len(prefix)+2:][:4], 16, 16)
 						var buf [4]byte
 						utf8.EncodeRune(buf[:], rune(lowRune))
 						lowByte = buf[0]
@@ -63,6 +82,14 @@ func assertEquivErr(t *testing.T, stdErr, lowErr error) {
 					stdErr = errors.New(stdMsg)
 				}
 			}
+
+			// I'd file a ticket for this, but @dsnet (one of the encoding/json maintainers) says that he's
+			// working on a parser-rewrite that would fix a bunch of this type of issue.
+			// https://github.com/golang/go/issues/58680#issuecomment-1444224084
+			if strings.HasPrefix(stdMsg, `invalid character '\u00`) && strings.HasPrefix(lowMsg, `invalid character '\x`) {
+				stdMsg = `invalid character '\x` + strings.TrimPrefix(stdMsg, `invalid character '\u00`)
+				stdErr = errors.New(stdMsg)
+			}
 		}
 		// Text-equal.
 		assert.Equal(t, stdErr.Error(), lowErr.Error())
diff --git a/compat/json/testcompat_test.go b/compat/json/testcompat_test.go
index 73153d9..affcd7c 100644
--- a/compat/json/testcompat_test.go
+++ b/compat/json/testcompat_test.go
@@ -32,7 +32,7 @@ func checkValid(in []byte, scan *lowmemjson.ReEncoderConfig) error {
 func isValidNumber(s string) bool {
 	var parser jsonparse.Parser
 	for _, r := range s {
-		if t, _ := parser.HandleRune(r); !t.IsNumber() {
+		if t, _ := parser.HandleRune(r, true); !t.IsNumber() {
 			return false
 		}
 	}
diff --git a/decode_scan.go b/decode_scan.go
index 940de49..7ef3e71 100644
--- a/decode_scan.go
+++ b/decode_scan.go
@@ -23,10 +23,11 @@ type runeTypeScanner struct {
 	rTypeOK bool
 	repeat  bool
 
-	rRune rune
-	rSize int
-	rType jsonparse.RuneType
-	rErr  error
+	rRune   rune
+	rSize   int
+	rIsRune bool
+	rType   jsonparse.RuneType
+	rErr    error
 }
 
 // The returned error is a *ReadError, a *SyntaxError, or nil.
@@ -56,7 +57,7 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error)
 		sc.offset += int64(sc.rSize)
 		switch err {
 		case nil:
-			invalidUTF8 := false
+			sc.rIsRune = true
 			if sc.rRune == utf8.RuneError && sc.rSize == 1 {
 				if bs, ok := sc.inner.(io.ByteScanner); ok {
 					_ = bs.UnreadByte() // UnreadRune doesn't back up the ReadByte-pos
@@ -64,19 +65,16 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error)
 					_ = bs.UnreadByte()
 					_, _, _ = sc.inner.ReadRune()
 					sc.rRune = rune(b)
-					invalidUTF8 = true
+					sc.rIsRune = false
 				}
 			}
-			sc.rType, err = sc.parser.HandleRune(sc.rRune)
+			sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune)
 			if err != nil {
 				sc.rErr = &DecodeSyntaxError{
 					Offset: sc.offset - int64(sc.rSize),
 					Err:    err,
 				}
 			} else {
-				if invalidUTF8 {
-					sc.rRune = utf8.RuneError
-				}
 				sc.rErr = nil
 			}
 			switch sc.rType {
@@ -107,6 +105,9 @@ func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error)
 		}
 	}
 	sc.repeat = false
+	if sc.rSize > 0 && !sc.rIsRune {
+		return utf8.RuneError, sc.rSize, sc.rType, sc.rErr
+	}
 	return sc.rRune, sc.rSize, sc.rType, sc.rErr
 }
 
@@ -139,7 +140,7 @@ func (sc *runeTypeScanner) PopReadBarrier() {
 	case sc.repeat:
 		// re-figure the rType and rErr
 		var err error
-		sc.rType, err = sc.parser.HandleRune(sc.rRune)
+		sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune)
 		if err != nil {
 			sc.rErr = &DecodeSyntaxError{
 				Offset: sc.offset - int64(sc.rSize),
diff --git a/internal/jsonparse/parse.go b/internal/jsonparse/parse.go
index 214e3ba..5547df4 100644
--- a/internal/jsonparse/parse.go
+++ b/internal/jsonparse/parse.go
@@ -15,12 +15,17 @@ import (
 var ErrParserExceededMaxDepth = errors.New("exceeded max depth")
 
 type InvalidCharacterError struct {
-	Char  rune
-	Where string
+	Char   rune
+	IsRune bool
+	Where  string
 }
 
 func (e *InvalidCharacterError) Error() string {
-	return fmt.Sprintf("invalid character %q %s", e.Char, e.Where)
+	if e.IsRune {
+		return fmt.Sprintf("invalid character %q %s", e.Char, e.Where)
+	} else {
+		return fmt.Sprintf("invalid character '\\x%02x' %s", e.Char, e.Where)
+	}
 }
 
 func isHex(c rune) bool {
@@ -520,7 +525,7 @@ func (par *Parser) HandleEOF() (RuneType, error) {
 	case 1:
 		switch {
 		case par.stack[0].IsNumber():
-			if _, err := par.HandleRune('\n'); err == nil {
+			if _, err := par.HandleRune('\n', true); err == nil {
 				return RuneTypeEOF, nil
 			}
 		case par.stack[0] == runeTypeAny:
@@ -562,7 +567,7 @@ func (par *Parser) IsAtBarrier() bool {
 // RuneTypeEOF indicates that the rune cannot be appended to the JSON
 // document; a new JSON document must be started in order to process
 // that rune.
-func (par *Parser) HandleRune(c rune) (RuneType, error) {
+func (par *Parser) HandleRune(c rune, isRune bool) (RuneType, error) {
 	if par.closed {
 		return RuneTypeError, iofs.ErrClosed
 	}
@@ -580,7 +585,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		if len(par.barriers) > 0 {
 			return RuneTypeEOF, nil
 		} else {
-			return RuneTypeError, &InvalidCharacterError{c, "after top-level value"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "after top-level value"}
 		}
 	}
 	switch par.stack[len(par.stack)-1] {
@@ -614,7 +619,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 'n':
 			return par.replaceState(RuneTypeNullN), nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "looking for beginning of value"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of value"}
 		}
 	// object //////////////////////////////////////////////////////////////////////////////////
 	case RuneTypeObjectBeg: // waiting for key to start or '}'
@@ -628,7 +633,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.popState()
 			return RuneTypeObjectEnd, nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "looking for beginning of object key string"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of object key string"}
 		}
 	case RuneTypeStringEnd: // waiting for ':'
 		switch c {
@@ -639,7 +644,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.pushState(runeTypeAny)
 			return RuneTypeObjectColon, nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "after object key"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key"}
 		}
 	case RuneTypeObjectComma: // waiting for ',' or '}'
 		switch c {
@@ -652,7 +657,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.popState()
 			return RuneTypeObjectEnd, nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "after object key:value pair"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key:value pair"}
 		}
 	// array ///////////////////////////////////////////////////////////////////////////////////
 	case RuneTypeArrayBeg: // waiting for item to start or ']'
@@ -665,7 +670,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		default:
 			par.replaceState(RuneTypeArrayComma)
 			par.pushState(runeTypeAny)
-			return par.HandleRune(c)
+			return par.HandleRune(c, isRune)
 		}
 	case RuneTypeArrayComma: // waiting for ',' or ']'
 		switch c {
@@ -678,7 +683,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			par.popState()
 			return RuneTypeArrayEnd, nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "after array element"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "after array element"}
 		}
 	// string //////////////////////////////////////////////////////////////////////////////////
 	case RuneTypeStringBeg: // waiting for char or '"'
@@ -691,7 +696,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 0x0020 <= c && c <= 0x10FFFF:
 			return RuneTypeStringChar, nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "in string literal"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "in string literal"}
 		}
 	case RuneTypeStringEsc: // waiting for escape char
 		switch c {
@@ -701,7 +706,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case 'u':
 			return par.replaceState(RuneTypeStringEscU), nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "in string escape code"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "in string escape code"}
 		}
 	case RuneTypeStringEscU:
 		if !isHex(c) {
@@ -771,7 +776,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberIntDig), nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "in numeric literal"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "in numeric literal"}
 		}
 	case RuneTypeNumberIntZero: // C
 		switch c {
@@ -781,7 +786,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			return par.replaceState(RuneTypeNumberExpE), nil
 		default:
 			par.popState()
-			return par.HandleRune(c)
+			return par.HandleRune(c, isRune)
 		}
 	case RuneTypeNumberIntDig: // D
 		switch c {
@@ -793,14 +798,14 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			return par.replaceState(RuneTypeNumberExpE), nil
 		default:
 			par.popState()
-			return par.HandleRune(c)
+			return par.HandleRune(c, isRune)
 		}
 	case RuneTypeNumberFracDot: // E
 		switch c {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberFracDig), nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "after decimal point in numeric literal"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "after decimal point in numeric literal"}
 		}
 	case RuneTypeNumberFracDig: // F
 		switch c {
@@ -810,7 +815,7 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			return par.replaceState(RuneTypeNumberExpE), nil
 		default:
 			par.popState()
-			return par.HandleRune(c)
+			return par.HandleRune(c, isRune)
 		}
 	case RuneTypeNumberExpE: // G
 		switch c {
@@ -819,14 +824,14 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberExpDig), nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "in exponent of numeric literal"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"}
 		}
 	case RuneTypeNumberExpSign: // H
 		switch c {
 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 			return par.replaceState(RuneTypeNumberExpDig), nil
 		default:
-			return RuneTypeError, &InvalidCharacterError{c, "in exponent of numeric literal"}
+			return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"}
 		}
 	case RuneTypeNumberExpDig: // I
 		switch c {
@@ -834,40 +839,40 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) {
 			return par.replaceState(RuneTypeNumberExpDig), nil
 		default:
 			par.popState()
-			return par.HandleRune(c)
+			return par.HandleRune(c, isRune)
 		}
 	// literals ////////////////////////////////////////////////////////////////////////////////
 	// true
 	case RuneTypeTrueT:
-		return par.expectRune(c, 'r', RuneTypeTrueR, "true", false)
+		return par.expectRune(c, isRune, 'r', RuneTypeTrueR, "true", false)
 	case RuneTypeTrueR:
-		return par.expectRune(c, 'u', RuneTypeTrueU, "true", false)
+		return par.expectRune(c, isRune, 'u', RuneTypeTrueU, "true", false)
 	case RuneTypeTrueU:
-		return par.expectRune(c, 'e', RuneTypeTrueE, "true", true)
+		return par.expectRune(c, isRune, 'e', RuneTypeTrueE, "true", true)
 	// false
 	case RuneTypeFalseF:
-		return par.expectRune(c, 'a', RuneTypeFalseA, "false", false)
+		return par.expectRune(c, isRune, 'a', RuneTypeFalseA, "false", false)
 	case RuneTypeFalseA:
-		return par.expectRune(c, 'l', RuneTypeFalseL, "false", false)
+		return par.expectRune(c, isRune, 'l', RuneTypeFalseL, "false", false)
 	case RuneTypeFalseL:
-		return par.expectRune(c, 's', RuneTypeFalseS, "false", false)
+		return par.expectRune(c, isRune, 's', RuneTypeFalseS, "false", false)
 	case RuneTypeFalseS:
-		return par.expectRune(c, 'e', RuneTypeFalseE, "false", true)
+		return par.expectRune(c, isRune, 'e', RuneTypeFalseE, "false", true)
 	// null
 	case RuneTypeNullN:
-		return par.expectRune(c, 'u', RuneTypeNullU, "null", false)
+		return par.expectRune(c, isRune, 'u', RuneTypeNullU, "null", false)
 	case RuneTypeNullU:
-		return par.expectRune(c, 'l', RuneTypeNullL1, "null", false)
+		return par.expectRune(c, isRune, 'l', RuneTypeNullL1, "null", false)
 	case RuneTypeNullL1:
-		return par.expectRune(c, 'l', RuneTypeNullL2, "null", true)
+		return par.expectRune(c, isRune, 'l', RuneTypeNullL2, "null", true)
 	default:
 		panic(fmt.Errorf(`should not happen: invalid stack: "%s"`, par.stackString()))
 	}
 }
 
-func (par *Parser) expectRune(c, exp rune, typ RuneType, context string, pop bool) (RuneType, error) {
+func (par *Parser) expectRune(c rune, isRune bool, exp rune, typ RuneType, context string, pop bool) (RuneType, error) {
 	if c != exp {
-		return RuneTypeError, &InvalidCharacterError{c, fmt.Sprintf("in literal %s (expecting %q)", context, exp)}
+		return RuneTypeError, &InvalidCharacterError{c, isRune, fmt.Sprintf("in literal %s (expecting %q)", context, exp)}
 	}
 	if pop {
 		par.popState()
diff --git a/internal/jsonparse/parse_test.go b/internal/jsonparse/parse_test.go
index e531daf..acb43e8 100644
--- a/internal/jsonparse/parse_test.go
+++ b/internal/jsonparse/parse_test.go
@@ -69,7 +69,7 @@ func TestParserHandleRune(t *testing.T) {
 			}
 			for i, r := range tc.Input {
 				assert.Equal(t, tc.ExpStack[i], par.stackString())
-				_, err := par.HandleRune(r)
+				_, err := par.HandleRune(r, true)
 				assert.NoError(t, err)
 				assert.Equal(t, tc.ExpStack[i+1], par.stackString())
 			}
diff --git a/reencode.go b/reencode.go
index 7439bf0..8b08aad 100644
--- a/reencode.go
+++ b/reencode.go
@@ -352,7 +352,7 @@ func (enc *ReEncoder) Close() error {
 
 // isRune=false indicates that 'c' is a raw byte from invalid UTF-8.
 func (enc *ReEncoder) handleRune(c rune, size int, isRune bool) {
-	t, err := enc.par.HandleRune(c)
+	t, err := enc.par.HandleRune(c, isRune)
 	if err != nil {
 		enc.err = &ReEncodeSyntaxError{
 			Err:    err,
-- 
cgit v1.2.3


From 2a41777072f48467bef02bb3bd670d95c2b02102 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Tue, 14 Feb 2023 18:55:57 -0700
Subject: compat/json: Handle io.EOF and io.ErrUnexpectedEOF the same

---
 ReleaseNotes.md                                    |  3 +++
 compat/json/compat.go                              | 25 +++++++++++-----------
 compat/json/compat_test.go                         |  2 ++
 .../json/testdata/fuzz/FuzzEquiv/930f49fab2367014  |  2 ++
 .../json/testdata/fuzz/FuzzEquiv/caf81e9797b19c76  |  2 ++
 5 files changed, 21 insertions(+), 13 deletions(-)
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/930f49fab2367014
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/caf81e9797b19c76

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index e047f7d..b7a8f76 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -25,6 +25,9 @@
      show the raw byte as a `\u00XX` Unicode codepoint, but now as a
      `\xXX` byte.
 
+   - Bugfix: compat/json: `io.EOF` is now correctly converted to
+     "unexpected end of JSON input", same as `io.ErrUnexpectedEOF`.
+
 # v0.3.7 (2023-02-20)
 
   Theme: Fixes from fuzzing (part 1?)
diff --git a/compat/json/compat.go b/compat/json/compat.go
index 6f13fbb..4dc15ab 100644
--- a/compat/json/compat.go
+++ b/compat/json/compat.go
@@ -53,19 +53,18 @@ func convertError(err error, isUnmarshal bool) error {
 		case *lowmemjson.DecodeReadError:
 			return err
 		case *lowmemjson.DecodeSyntaxError:
-			switch {
-			case errors.Is(err, io.EOF):
-				return io.EOF
-			case errors.Is(err, io.ErrUnexpectedEOF) && isUnmarshal:
-				return &SyntaxError{
-					msg:    "unexpected end of JSON input",
-					Offset: suberr.Offset,
-				}
-			default:
-				return &SyntaxError{
-					msg:    suberr.Err.Error(),
-					Offset: suberr.Offset + 1,
+			if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
+				if isUnmarshal {
+					return &SyntaxError{
+						msg:    "unexpected end of JSON input",
+						Offset: suberr.Offset,
+					}
 				}
+				return suberr.Err
+			}
+			return &SyntaxError{
+				msg:    suberr.Err.Error(),
+				Offset: suberr.Offset + 1,
 			}
 		case *lowmemjson.DecodeTypeError:
 			switch subsuberr := suberr.Err.(type) {
@@ -120,7 +119,7 @@ func convertError(err error, isUnmarshal bool) error {
 			msg:    err.Err.Error(),
 			Offset: err.Offset + 1,
 		}
-		if errors.Is(err, io.ErrUnexpectedEOF) {
+		if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
 			ret.msg = "unexpected end of JSON input"
 		}
 		return ret
diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index 2380c53..43f17f1 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -72,6 +72,7 @@ func TestCompatCompact(t *testing.T) {
 		Err string
 	}
 	testcases := map[string]testcase{
+		"empty":        {In: ``, Out: ``, Err: `unexpected end of JSON input`},
 		"trunc":        {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
 		"object":       {In: `{}`, Out: `{}`},
 		"non-utf8":     {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
@@ -106,6 +107,7 @@ func TestCompatIndent(t *testing.T) {
 		Err string
 	}
 	testcases := map[string]testcase{
+		"empty":        {In: ``, Out: ``, Err: `unexpected end of JSON input`},
 		"trunc":        {In: `{`, Out: ``, Err: `unexpected end of JSON input`},
 		"object":       {In: `{}`, Out: `{}`},
 		"non-utf8":     {In: "\"\x85\xcd\"", Out: "\"\x85\xcd\""},
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/930f49fab2367014 b/compat/json/testdata/fuzz/FuzzEquiv/930f49fab2367014
new file mode 100644
index 0000000..7390d06
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/930f49fab2367014
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte(" ")
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/caf81e9797b19c76 b/compat/json/testdata/fuzz/FuzzEquiv/caf81e9797b19c76
new file mode 100644
index 0000000..67322c7
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/caf81e9797b19c76
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("")
-- 
cgit v1.2.3


From 4233e5012ece6d5a7fee3b5a518c41d916e1cf52 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Sat, 25 Feb 2023 18:12:15 -0700
Subject: compat/json: compat_test.go: Change the package to "json_test"

---
 compat/json/compat_test.go | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go
index 43f17f1..3de48f7 100644
--- a/compat/json/compat_test.go
+++ b/compat/json/compat_test.go
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-package json
+package json_test
 
 import (
 	"bytes"
@@ -11,6 +11,8 @@ import (
 	"testing"
 
 	"github.com/stretchr/testify/assert"
+
+	"git.lukeshu.com/go/lowmemjson/compat/json"
 )
 
 func TestCompatHTMLEscape(t *testing.T) {
@@ -31,7 +33,7 @@ func TestCompatHTMLEscape(t *testing.T) {
 			t.Parallel()
 			t.Logf("in=%q", tc.In)
 			var dst bytes.Buffer
-			HTMLEscape(&dst, []byte(tc.In))
+			json.HTMLEscape(&dst, []byte(tc.In))
 			assert.Equal(t, tc.Out, dst.String())
 		})
 	}
@@ -58,7 +60,7 @@ func TestCompatValid(t *testing.T) {
 		t.Run(tcName, func(t *testing.T) {
 			t.Parallel()
 			t.Logf("in=%q", tc.In)
-			act := Valid([]byte(tc.In))
+			act := json.Valid([]byte(tc.In))
 			assert.Equal(t, tc.Exp, act)
 		})
 	}
@@ -88,7 +90,7 @@ func TestCompatCompact(t *testing.T) {
 			t.Parallel()
 			t.Logf("in=%q", tc.In)
 			var out bytes.Buffer
-			err := Compact(&out, []byte(tc.In))
+			err := json.Compact(&out, []byte(tc.In))
 			assert.Equal(t, tc.Out, out.String())
 			if tc.Err == "" {
 				assert.NoError(t, err)
@@ -130,7 +132,7 @@ func TestCompatIndent(t *testing.T) {
 			t.Parallel()
 			t.Logf("in=%q", tc.In)
 			var out bytes.Buffer
-			err := Indent(&out, []byte(tc.In), ">", ".")
+			err := json.Indent(&out, []byte(tc.In), ">", ".")
 			assert.Equal(t, tc.Out, out.String())
 			if tc.Err == "" {
 				assert.NoError(t, err)
@@ -157,7 +159,7 @@ func TestCompatMarshal(t *testing.T) {
 		tc := tc
 		t.Run(tcName, func(t *testing.T) {
 			t.Parallel()
-			out, err := Marshal(tc.In)
+			out, err := json.Marshal(tc.In)
 			assert.Equal(t, tc.Out, string(out))
 			if tc.Err == "" {
 				assert.NoError(t, err)
@@ -218,7 +220,7 @@ func TestCompatUnmarshal(t *testing.T) {
 				var out any
 				ptr = &out
 			}
-			err := Unmarshal([]byte(tc.In), ptr)
+			err := json.Unmarshal([]byte(tc.In), ptr)
 			assert.Equal(t, tc.ExpOut, reflect.ValueOf(ptr).Elem().Interface())
 			if tc.ExpErr == "" {
 				assert.NoError(t, err)
@@ -261,7 +263,7 @@ func TestCompatDecode(t *testing.T) {
 				var out any
 				ptr = &out
 			}
-			err := NewDecoder(strings.NewReader(tc.In)).Decode(ptr)
+			err := json.NewDecoder(strings.NewReader(tc.In)).Decode(ptr)
 			assert.Equal(t, tc.ExpOut, reflect.ValueOf(ptr).Elem().Interface())
 			if tc.ExpErr == "" {
 				assert.NoError(t, err)
-- 
cgit v1.2.3


From 7bd0072b5896bfc4172b6bda778cf149dd6282fa Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Sat, 25 Feb 2023 16:17:06 -0700
Subject: reencode: Fix the byte count for partial writes

---
 ReleaseNotes.md  |  5 ++++
 reencode.go      | 14 +++++++---
 reencode_test.go | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index b7a8f76..c9d1233 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -28,6 +28,11 @@
    - Bugfix: compat/json: `io.EOF` is now correctly converted to
      "unexpected end of JSON input", same as `io.ErrUnexpectedEOF`.
 
+   - Bugfix: ReEncoder: Don't count bytes already in the UTF-8 decode
+     buffer toward the number of bytes returned from `.Write` and
+     `.WriteString`.  This only comes up if there is an I/O causing a
+     partial write.
+
 # v0.3.7 (2023-02-20)
 
   Theme: Fixes from fuzzing (part 1?)
diff --git a/reencode.go b/reencode.go
index 8b08aad..fd36875 100644
--- a/reencode.go
+++ b/reencode.go
@@ -243,10 +243,13 @@ func (enc *ReEncoder) getRuneFromString(str string, pos int) (c rune, size int,
 // but *ReEncoder does because it transforms the data written to it,
 // and the number of bytes written may be wildly different than the
 // number of bytes handled.
+//
+//nolint:dupl // Yes, this is mostly a duplicate of .WriteString().
 func (enc *ReEncoder) Write(str []byte) (int, error) {
 	if len(str) == 0 {
 		return 0, nil
 	}
+	origBufLen := enc.bufLen
 	var n int
 	for {
 		c, size, full, isRune := enc.getRuneFromBytes(str, n)
@@ -261,14 +264,14 @@ func (enc *ReEncoder) Write(str []byte) (int, error) {
 			return len(str), nil
 		}
 		if enc.utf == InvalidUTF8Error && !isRune {
-			return n, &ReEncodeSyntaxError{
+			return n - origBufLen, &ReEncodeSyntaxError{
 				Offset: enc.inputPos,
 				Err:    fmt.Errorf("invalid UTF-8: %#02x", c),
 			}
 		}
 		enc.handleRune(c, size, isRune)
 		if enc.err != nil {
-			return n, enc.err
+			return n - origBufLen, enc.err
 		}
 		n += size
 	}
@@ -276,10 +279,13 @@ func (enc *ReEncoder) Write(str []byte) (int, error) {
 
 // WriteString implements io.StringWriter; it does what you'd expect,
 // but see the notes on the Write method.
+//
+//nolint:dupl // Yes, this is mostly a duplicate of .Write().
 func (enc *ReEncoder) WriteString(str string) (int, error) {
 	if len(str) == 0 {
 		return 0, nil
 	}
+	origBufLen := enc.bufLen
 	var n int
 	for {
 		c, size, full, isRune := enc.getRuneFromString(str, n)
@@ -294,14 +300,14 @@ func (enc *ReEncoder) WriteString(str string) (int, error) {
 			return len(str), nil
 		}
 		if enc.utf == InvalidUTF8Error && !isRune {
-			return n, &ReEncodeSyntaxError{
+			return n - origBufLen, &ReEncodeSyntaxError{
 				Offset: enc.inputPos,
 				Err:    fmt.Errorf("invalid UTF-8: %#02x", c),
 			}
 		}
 		enc.handleRune(c, size, isRune)
 		if enc.err != nil {
-			return n, enc.err
+			return n - origBufLen, enc.err
 		}
 		n += size
 	}
diff --git a/reencode_test.go b/reencode_test.go
index 715e976..feabde5 100644
--- a/reencode_test.go
+++ b/reencode_test.go
@@ -5,6 +5,8 @@
 package lowmemjson
 
 import (
+	"errors"
+	"io"
 	"strings"
 	"testing"
 
@@ -240,3 +242,84 @@ func TestReEncoderStackSize(t *testing.T) {
 		assert.Equal(t, i+2, enc.stackSize())
 	}
 }
+
+var errNoSpace = errors.New("no space left on device")
+
+type limitedWriter struct {
+	Limit int
+	Inner io.Writer
+
+	n int
+}
+
+func (w *limitedWriter) Write(p []byte) (int, error) {
+	switch {
+	case w.n >= w.Limit:
+		return 0, errNoSpace
+	case w.n+len(p) > w.Limit:
+		n, err := w.Inner.Write(p[:w.Limit-w.n])
+		if n > 0 {
+			w.n += n
+		}
+		if err == nil {
+			err = errNoSpace
+		}
+		return n, err
+	default:
+		n, err := w.Inner.Write(p)
+		if n > 0 {
+			w.n += n
+		}
+		return n, err
+	}
+}
+
+func TestReEncodeIOErr(t *testing.T) {
+	t.Parallel()
+
+	input := `"😀"`
+	assert.Len(t, input, 6)
+
+	t.Run("bytes", func(t *testing.T) {
+		t.Parallel()
+
+		var out strings.Builder
+		enc := NewReEncoder(&limitedWriter{Limit: 5, Inner: &out}, ReEncoderConfig{})
+
+		n, err := enc.Write([]byte(input[:2]))
+		assert.NoError(t, err)
+		assert.Equal(t, 2, n)
+		// Of the 2 bytes "written", only one should be in
+		// `out` yet; the other should be in the UTF-8 buffer.
+		assert.Equal(t, input[:1], out.String())
+
+		n, err = enc.Write([]byte(input[2:]))
+		assert.ErrorIs(t, err, errNoSpace)
+		// Check that the byte in the UTF-8 buffer from the
+		// first .Write didn't count toward the total for this
+		// .Write.
+		assert.Equal(t, 3, n)
+		assert.Equal(t, input[:5], out.String())
+	})
+	t.Run("string", func(t *testing.T) {
+		t.Parallel()
+
+		var out strings.Builder
+		enc := NewReEncoder(&limitedWriter{Limit: 5, Inner: &out}, ReEncoderConfig{})
+
+		n, err := enc.WriteString(input[:2])
+		assert.NoError(t, err)
+		assert.Equal(t, 2, n)
+		// Of the 2 bytes "written", only one should be in
+		// `out` yet; the other should be in the UTF-8 buffer.
+		assert.Equal(t, input[:1], out.String())
+
+		n, err = enc.WriteString(input[2:])
+		assert.ErrorIs(t, err, errNoSpace)
+		// Check that the byte in the UTF-8 buffer from the
+		// first .Write didn't count toward the total for this
+		// .Write.
+		assert.Equal(t, 3, n)
+		assert.Equal(t, input[:5], out.String())
+	})
+}
-- 
cgit v1.2.3


From 22edcf6a68a057ed04368d5f78c8ba3ddfee8d57 Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Sat, 25 Feb 2023 11:11:36 -0700
Subject: reencode: Improve the error messages for trailing partial-UTF-8

---
 ReleaseNotes.md                                    |  4 +++
 .../json/testdata/fuzz/FuzzEquiv/95640f7d88708118  |  2 ++
 reencode.go                                        | 18 ++++++++--
 reencode_test.go                                   | 39 +++++++++++++++++++++-
 4 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118

diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index c9d1233..71973aa 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -33,6 +33,10 @@
      `.WriteString`.  This only comes up if there is an I/O causing a
      partial write.
 
+   - Bugfix: ReEncoder: The error messages for trailing partial UTF-8
+     now reflect the `InvalidUTF8` setting, rather than simply saying
+     "unflushed unicode garbage".
+
 # v0.3.7 (2023-02-20)
 
   Theme: Fixes from fuzzing (part 1?)
diff --git a/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118 b/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118
new file mode 100644
index 0000000..77924f3
--- /dev/null
+++ b/compat/json/testdata/fuzz/FuzzEquiv/95640f7d88708118
@@ -0,0 +1,2 @@
+go test fuzz v1
+[]byte("\xf0")
diff --git a/reencode.go b/reencode.go
index fd36875..c19e296 100644
--- a/reencode.go
+++ b/reencode.go
@@ -329,9 +329,21 @@ func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
 // if enc.AllowMultipleValues is set.
 func (enc *ReEncoder) Close() error {
 	if enc.bufLen > 0 {
-		return &ReEncodeSyntaxError{
-			Offset: enc.inputPos,
-			Err:    fmt.Errorf("%w: unflushed unicode garbage: %q", io.ErrUnexpectedEOF, enc.buf[:enc.bufLen]),
+		if enc.utf == InvalidUTF8Error {
+			return &ReEncodeSyntaxError{
+				Offset: enc.inputPos,
+				Err:    fmt.Errorf("truncated UTF-8: %q", enc.buf[:enc.bufLen]),
+			}
+		}
+		for i := 0; i < enc.bufLen; i++ {
+			if enc.utf == InvalidUTF8Replace {
+				enc.handleRune(utf8.RuneError, 1, true)
+			} else {
+				enc.handleRune(rune(enc.buf[i]), 1, false)
+			}
+			if enc.err != nil {
+				return enc.err
+			}
 		}
 	}
 	if _, err := enc.par.HandleEOF(); err != nil {
diff --git a/reencode_test.go b/reencode_test.go
index feabde5..60180c8 100644
--- a/reencode_test.go
+++ b/reencode_test.go
@@ -15,7 +15,7 @@ import (
 	"git.lukeshu.com/go/lowmemjson/internal/fastio"
 )
 
-func TestReEncode(t *testing.T) {
+func TestEncodeReEncode(t *testing.T) {
 	t.Parallel()
 	type testcase struct {
 		enc ReEncoderConfig
@@ -168,6 +168,43 @@ func TestReEncode(t *testing.T) {
 	}
 }
 
+func TestReEncode(t *testing.T) {
+	t.Parallel()
+	type testcase struct {
+		Cfg         ReEncoderConfig
+		In          string
+		ExpOut      string
+		ExpWriteErr string
+		ExpCloseErr string
+	}
+	testcases := map[string]testcase{
+		"partial-utf8-replace":  {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Replace}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: "json: syntax error at input byte 0: invalid character '\uFFFD' looking for beginning of value"},
+		"partial-utf8-preserve": {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Preserve}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: `json: syntax error at input byte 0: invalid character '\xf0' looking for beginning of value`},
+		"partial-utf8-error":    {Cfg: ReEncoderConfig{InvalidUTF8: InvalidUTF8Error}, In: "\xf0\xbf", ExpOut: ``, ExpCloseErr: `json: syntax error at input byte 0: truncated UTF-8: "\xf0\xbf"`},
+	}
+	for tcName, tc := range testcases {
+		tc := tc
+		t.Run(tcName, func(t *testing.T) {
+			t.Parallel()
+			var out strings.Builder
+			enc := NewReEncoder(&out, tc.Cfg)
+			_, err := enc.WriteString(tc.In)
+			assert.Equal(t, tc.ExpOut, out.String())
+			if tc.ExpWriteErr == "" {
+				assert.NoError(t, err)
+			} else {
+				assert.EqualError(t, err, tc.ExpWriteErr)
+			}
+			err = enc.Close()
+			if tc.ExpCloseErr == "" {
+				assert.NoError(t, err)
+			} else {
+				assert.EqualError(t, err, tc.ExpCloseErr)
+			}
+		})
+	}
+}
+
 func TestReEncodeWriteSize(t *testing.T) {
 	t.Parallel()
 
-- 
cgit v1.2.3