From 7bd0072b5896bfc4172b6bda778cf149dd6282fa Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 25 Feb 2023 16:17:06 -0700 Subject: reencode: Fix the byte count for partial writes --- ReleaseNotes.md | 5 ++++ reencode.go | 14 +++++++--- reencode_test.go | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 4 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index b7a8f76..c9d1233 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -28,6 +28,11 @@ - Bugfix: compat/json: `io.EOF` is now correctly converted to "unexpected end of JSON input", same as `io.ErrUnexpectedEOF`. + - Bugfix: ReEncoder: Don't count bytes already in the UTF-8 decode + buffer toward the number of bytes returned from `.Write` and + `.WriteString`. This only comes up if there is an I/O causing a + partial write. + # v0.3.7 (2023-02-20) Theme: Fixes from fuzzing (part 1?) diff --git a/reencode.go b/reencode.go index 8b08aad..fd36875 100644 --- a/reencode.go +++ b/reencode.go @@ -243,10 +243,13 @@ func (enc *ReEncoder) getRuneFromString(str string, pos int) (c rune, size int, // but *ReEncoder does because it transforms the data written to it, // and the number of bytes written may be wildly different than the // number of bytes handled. +// +//nolint:dupl // Yes, this is mostly a duplicate of .WriteString(). func (enc *ReEncoder) Write(str []byte) (int, error) { if len(str) == 0 { return 0, nil } + origBufLen := enc.bufLen var n int for { c, size, full, isRune := enc.getRuneFromBytes(str, n) @@ -261,14 +264,14 @@ func (enc *ReEncoder) Write(str []byte) (int, error) { return len(str), nil } if enc.utf == InvalidUTF8Error && !isRune { - return n, &ReEncodeSyntaxError{ + return n - origBufLen, &ReEncodeSyntaxError{ Offset: enc.inputPos, Err: fmt.Errorf("invalid UTF-8: %#02x", c), } } enc.handleRune(c, size, isRune) if enc.err != nil { - return n, enc.err + return n - origBufLen, enc.err } n += size } @@ -276,10 +279,13 @@ func (enc *ReEncoder) Write(str []byte) (int, error) { // WriteString implements io.StringWriter; it does what you'd expect, // but see the notes on the Write method. +// +//nolint:dupl // Yes, this is mostly a duplicate of .Write(). func (enc *ReEncoder) WriteString(str string) (int, error) { if len(str) == 0 { return 0, nil } + origBufLen := enc.bufLen var n int for { c, size, full, isRune := enc.getRuneFromString(str, n) @@ -294,14 +300,14 @@ func (enc *ReEncoder) WriteString(str string) (int, error) { return len(str), nil } if enc.utf == InvalidUTF8Error && !isRune { - return n, &ReEncodeSyntaxError{ + return n - origBufLen, &ReEncodeSyntaxError{ Offset: enc.inputPos, Err: fmt.Errorf("invalid UTF-8: %#02x", c), } } enc.handleRune(c, size, isRune) if enc.err != nil { - return n, enc.err + return n - origBufLen, enc.err } n += size } diff --git a/reencode_test.go b/reencode_test.go index 715e976..feabde5 100644 --- a/reencode_test.go +++ b/reencode_test.go @@ -5,6 +5,8 @@ package lowmemjson import ( + "errors" + "io" "strings" "testing" @@ -240,3 +242,84 @@ func TestReEncoderStackSize(t *testing.T) { assert.Equal(t, i+2, enc.stackSize()) } } + +var errNoSpace = errors.New("no space left on device") + +type limitedWriter struct { + Limit int + Inner io.Writer + + n int +} + +func (w *limitedWriter) Write(p []byte) (int, error) { + switch { + case w.n >= w.Limit: + return 0, errNoSpace + case w.n+len(p) > w.Limit: + n, err := w.Inner.Write(p[:w.Limit-w.n]) + if n > 0 { + w.n += n + } + if err == nil { + err = errNoSpace + } + return n, err + default: + n, err := w.Inner.Write(p) + if n > 0 { + w.n += n + } + return n, err + } +} + +func TestReEncodeIOErr(t *testing.T) { + t.Parallel() + + input := `"😀"` + assert.Len(t, input, 6) + + t.Run("bytes", func(t *testing.T) { + t.Parallel() + + var out strings.Builder + enc := NewReEncoder(&limitedWriter{Limit: 5, Inner: &out}, ReEncoderConfig{}) + + n, err := enc.Write([]byte(input[:2])) + assert.NoError(t, err) + assert.Equal(t, 2, n) + // Of the 2 bytes "written", only one should be in + // `out` yet; the other should be in the UTF-8 buffer. + assert.Equal(t, input[:1], out.String()) + + n, err = enc.Write([]byte(input[2:])) + assert.ErrorIs(t, err, errNoSpace) + // Check that the byte in the UTF-8 buffer from the + // first .Write didn't count toward the total for this + // .Write. + assert.Equal(t, 3, n) + assert.Equal(t, input[:5], out.String()) + }) + t.Run("string", func(t *testing.T) { + t.Parallel() + + var out strings.Builder + enc := NewReEncoder(&limitedWriter{Limit: 5, Inner: &out}, ReEncoderConfig{}) + + n, err := enc.WriteString(input[:2]) + assert.NoError(t, err) + assert.Equal(t, 2, n) + // Of the 2 bytes "written", only one should be in + // `out` yet; the other should be in the UTF-8 buffer. + assert.Equal(t, input[:1], out.String()) + + n, err = enc.WriteString(input[2:]) + assert.ErrorIs(t, err, errNoSpace) + // Check that the byte in the UTF-8 buffer from the + // first .Write didn't count toward the total for this + // .Write. + assert.Equal(t, 3, n) + assert.Equal(t, input[:5], out.String()) + }) +} -- cgit v1.2.3