From 2eb60b8be25a4b0fe3f1c5d5ca302e7e68190bad Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 16 Feb 2023 17:20:41 -0700 Subject: compat/json: Don't do actual JSON parsing in HTMLEscape --- ReleaseNotes.md | 5 +++++ compat/json/compat.go | 21 ++++++++++++++++++++- compat/json/compat_test.go | 21 +++++++++++++++++++++ internal/jsonstring/encode_string.go | 6 +++--- 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/ReleaseNotes.md b/ReleaseNotes.md index ae147b1..c949fd6 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -24,6 +24,11 @@ + compat/json.Compact, compat/json.Indent: No longer compact floating-point numbers; as `encoding/json` doesn't. + + compat/json.HTMLEscape: Just look for problematic UTF-8 runes, + don't actually parse as JSON. This is consistent with the + function's lack of an `error` return value, and with the + behavior of `encoding/json`. + - Unicode: + Feature: Encoder, ReEncoder: Add an `InvalidUTF8` diff --git a/compat/json/compat.go b/compat/json/compat.go index d326514..edc6908 100644 --- a/compat/json/compat.go +++ b/compat/json/compat.go @@ -11,10 +11,13 @@ import ( "bytes" "encoding/json" "errors" + "fmt" "io" "strconv" + "unicode/utf8" "git.lukeshu.com/go/lowmemjson" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) //nolint:stylecheck // ST1021 False positive; these aren't comments on individual types. @@ -144,7 +147,23 @@ func convertReEncodeError(err error) error { } func HTMLEscape(dst *bytes.Buffer, src []byte) { - _, _ = lowmemjson.NewReEncoder(dst, lowmemjson.ReEncoderConfig{}).Write(src) + for n := 0; n < len(src); { + c, size := utf8.DecodeRune(src[n:]) + if c == utf8.RuneError && size == 1 { + dst.WriteByte(src[n]) + } else { + mode := lowmemjson.EscapeHTMLSafe(c, lowmemjson.BackslashEscapeNone) + switch mode { + case lowmemjson.BackslashEscapeNone: + dst.WriteRune(c) + case lowmemjson.BackslashEscapeUnicode: + _ = jsonstring.WriteStringUnicodeEscape(dst, c) + default: + panic(fmt.Errorf("lowmemjson.EscapeHTMLSafe returned an unexpected escape mode=%d", mode)) + } + } + n += size + } } func reencode(dst io.Writer, src []byte, cfg lowmemjson.ReEncoderConfig) error { diff --git a/compat/json/compat_test.go b/compat/json/compat_test.go index 128bd1b..0c14a60 100644 --- a/compat/json/compat_test.go +++ b/compat/json/compat_test.go @@ -11,6 +11,27 @@ import ( "github.com/stretchr/testify/assert" ) +func TestCompatHTMLEscape(t *testing.T) { + t.Parallel() + type testcase struct { + In string + Out string + } + testcases := map[string]testcase{ + "invalid": {In: `x`, Out: `x`}, + } + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + t.Logf("in=%q", tc.In) + var dst bytes.Buffer + HTMLEscape(&dst, []byte(tc.In)) + assert.Equal(t, tc.Out, dst.String()) + }) + } +} + func TestCompatValid(t *testing.T) { t.Parallel() type testcase struct { diff --git a/internal/jsonstring/encode_string.go b/internal/jsonstring/encode_string.go index 76bbb38..2488cb2 100644 --- a/internal/jsonstring/encode_string.go +++ b/internal/jsonstring/encode_string.go @@ -38,7 +38,7 @@ const ( // BackslashEscaper is describe in the main lowmemjson package docs. type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode -func writeStringUnicodeEscape(w io.Writer, c rune) error { +func WriteStringUnicodeEscape(w io.Writer, c rune) error { const alphabet = "0123456789abcdef" buf := [6]byte{ '\\', @@ -84,7 +84,7 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) err case '\b', '\f', '\n', '\r', '\t': // short-escape if possible return writeStringShortEscape(w, c) default: - return writeStringUnicodeEscape(w, c) + return WriteStringUnicodeEscape(w, c) } case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) @@ -106,7 +106,7 @@ func WriteStringChar(w fastio.AllWriter, c rune, escape BackslashEscapeMode) err _, err := w.WriteRune(c) return err default: // obey - return writeStringUnicodeEscape(w, c) + return WriteStringUnicodeEscape(w, c) } case BackslashEscapeRawByte: switch { -- cgit v1.2.3