summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-25 16:17:06 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-25 18:42:05 -0700
commit7bd0072b5896bfc4172b6bda778cf149dd6282fa (patch)
tree518c9e62aace98fcca449fe73996abb8ee769db6
parent4233e5012ece6d5a7fee3b5a518c41d916e1cf52 (diff)
reencode: Fix the byte count for partial writes
-rw-r--r--ReleaseNotes.md5
-rw-r--r--reencode.go14
-rw-r--r--reencode_test.go83
3 files changed, 98 insertions, 4 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
index b7a8f76..c9d1233 100644
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -28,6 +28,11 @@
- Bugfix: compat/json: `io.EOF` is now correctly converted to
"unexpected end of JSON input", same as `io.ErrUnexpectedEOF`.
+ - Bugfix: ReEncoder: Don't count bytes already in the UTF-8 decode
+ buffer toward the number of bytes returned from `.Write` and
+ `.WriteString`. This only comes up if there is an I/O causing a
+ partial write.
+
# v0.3.7 (2023-02-20)
Theme: Fixes from fuzzing (part 1?)
diff --git a/reencode.go b/reencode.go
index 8b08aad..fd36875 100644
--- a/reencode.go
+++ b/reencode.go
@@ -243,10 +243,13 @@ func (enc *ReEncoder) getRuneFromString(str string, pos int) (c rune, size int,
// but *ReEncoder does because it transforms the data written to it,
// and the number of bytes written may be wildly different than the
// number of bytes handled.
+//
+//nolint:dupl // Yes, this is mostly a duplicate of .WriteString().
func (enc *ReEncoder) Write(str []byte) (int, error) {
if len(str) == 0 {
return 0, nil
}
+ origBufLen := enc.bufLen
var n int
for {
c, size, full, isRune := enc.getRuneFromBytes(str, n)
@@ -261,14 +264,14 @@ func (enc *ReEncoder) Write(str []byte) (int, error) {
return len(str), nil
}
if enc.utf == InvalidUTF8Error && !isRune {
- return n, &ReEncodeSyntaxError{
+ return n - origBufLen, &ReEncodeSyntaxError{
Offset: enc.inputPos,
Err: fmt.Errorf("invalid UTF-8: %#02x", c),
}
}
enc.handleRune(c, size, isRune)
if enc.err != nil {
- return n, enc.err
+ return n - origBufLen, enc.err
}
n += size
}
@@ -276,10 +279,13 @@ func (enc *ReEncoder) Write(str []byte) (int, error) {
// WriteString implements io.StringWriter; it does what you'd expect,
// but see the notes on the Write method.
+//
+//nolint:dupl // Yes, this is mostly a duplicate of .Write().
func (enc *ReEncoder) WriteString(str string) (int, error) {
if len(str) == 0 {
return 0, nil
}
+ origBufLen := enc.bufLen
var n int
for {
c, size, full, isRune := enc.getRuneFromString(str, n)
@@ -294,14 +300,14 @@ func (enc *ReEncoder) WriteString(str string) (int, error) {
return len(str), nil
}
if enc.utf == InvalidUTF8Error && !isRune {
- return n, &ReEncodeSyntaxError{
+ return n - origBufLen, &ReEncodeSyntaxError{
Offset: enc.inputPos,
Err: fmt.Errorf("invalid UTF-8: %#02x", c),
}
}
enc.handleRune(c, size, isRune)
if enc.err != nil {
- return n, enc.err
+ return n - origBufLen, enc.err
}
n += size
}
diff --git a/reencode_test.go b/reencode_test.go
index 715e976..feabde5 100644
--- a/reencode_test.go
+++ b/reencode_test.go
@@ -5,6 +5,8 @@
package lowmemjson
import (
+ "errors"
+ "io"
"strings"
"testing"
@@ -240,3 +242,84 @@ func TestReEncoderStackSize(t *testing.T) {
assert.Equal(t, i+2, enc.stackSize())
}
}
+
+var errNoSpace = errors.New("no space left on device")
+
+type limitedWriter struct {
+ Limit int
+ Inner io.Writer
+
+ n int
+}
+
+func (w *limitedWriter) Write(p []byte) (int, error) {
+ switch {
+ case w.n >= w.Limit:
+ return 0, errNoSpace
+ case w.n+len(p) > w.Limit:
+ n, err := w.Inner.Write(p[:w.Limit-w.n])
+ if n > 0 {
+ w.n += n
+ }
+ if err == nil {
+ err = errNoSpace
+ }
+ return n, err
+ default:
+ n, err := w.Inner.Write(p)
+ if n > 0 {
+ w.n += n
+ }
+ return n, err
+ }
+}
+
+func TestReEncodeIOErr(t *testing.T) {
+ t.Parallel()
+
+ input := `"😀"`
+ assert.Len(t, input, 6)
+
+ t.Run("bytes", func(t *testing.T) {
+ t.Parallel()
+
+ var out strings.Builder
+ enc := NewReEncoder(&limitedWriter{Limit: 5, Inner: &out}, ReEncoderConfig{})
+
+ n, err := enc.Write([]byte(input[:2]))
+ assert.NoError(t, err)
+ assert.Equal(t, 2, n)
+ // Of the 2 bytes "written", only one should be in
+ // `out` yet; the other should be in the UTF-8 buffer.
+ assert.Equal(t, input[:1], out.String())
+
+ n, err = enc.Write([]byte(input[2:]))
+ assert.ErrorIs(t, err, errNoSpace)
+ // Check that the byte in the UTF-8 buffer from the
+ // first .Write didn't count toward the total for this
+ // .Write.
+ assert.Equal(t, 3, n)
+ assert.Equal(t, input[:5], out.String())
+ })
+ t.Run("string", func(t *testing.T) {
+ t.Parallel()
+
+ var out strings.Builder
+ enc := NewReEncoder(&limitedWriter{Limit: 5, Inner: &out}, ReEncoderConfig{})
+
+ n, err := enc.WriteString(input[:2])
+ assert.NoError(t, err)
+ assert.Equal(t, 2, n)
+ // Of the 2 bytes "written", only one should be in
+ // `out` yet; the other should be in the UTF-8 buffer.
+ assert.Equal(t, input[:1], out.String())
+
+ n, err = enc.WriteString(input[2:])
+ assert.ErrorIs(t, err, errNoSpace)
+ // Check that the byte in the UTF-8 buffer from the
+ // first .Write didn't count toward the total for this
+ // .Write.
+ assert.Equal(t, 3, n)
+ assert.Equal(t, input[:5], out.String())
+ })
+}