summaryrefslogtreecommitdiff
path: root/encode_escape.go
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-14 22:36:25 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-18 22:45:54 -0700
commitdfc67cecbd95344d296c31b537fa3ae8aec8c292 (patch)
tree1e2e820cbd288d1ebef7b0e9dea14a07e2f33fc5 /encode_escape.go
parent38989a9c4f69abfe04c3eb4ec3382be88802141c (diff)
encode, reencode: Fix handling of invalid UTF-8
Diffstat (limited to 'encode_escape.go')
-rw-r--r--encode_escape.go37
1 files changed, 29 insertions, 8 deletions
diff --git a/encode_escape.go b/encode_escape.go
index 97da6e9..c9e2bc9 100644
--- a/encode_escape.go
+++ b/encode_escape.go
@@ -6,12 +6,29 @@ package lowmemjson
import (
"fmt"
- "unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
-// BackslashEscapeMode identifies one of the three ways that a
+// InvalidUTF8Mode identifies one of the 3 ways that an Encoder or
+// ReEncoder can behave when encountering invalid UTF-8 in a string
+// value:
+//
+// - Replace the byte with the Unicode replacement character U+FFFD.
+//
+// - Allow the byte through to the string-encoder, with an
+// escape-mode of BackslashEscapeRawByte.
+//
+// - Emit a syntax error.
+type InvalidUTF8Mode = jsonstring.InvalidUTF8Mode
+
+const (
+ InvalidUTF8Replace = jsonstring.InvalidUTF8Replace
+ InvalidUTF8Preserve = jsonstring.InvalidUTF8Preserve
+ InvalidUTF8Error = jsonstring.InvalidUTF8Error
+)
+
+// BackslashEscapeMode identifies one of the four ways that a
// character may be represented in a JSON string:
//
// - literally (no backslash escaping)
@@ -20,12 +37,18 @@ import (
// single-character)
//
// - as a long Unicode `\uXXXX` backslash sequence
+//
+// - as a raw byte; this allows you to emit invalid JSON; JSON must
+// be valid UTF-8, but this allows you to emit arbitrary binary
+// data. If the character does not satisfy `utf8.RuneSelf <= char
+// <= 0xFF`, then the encoder will panic.
type BackslashEscapeMode = jsonstring.BackslashEscapeMode
const (
BackslashEscapeNone = jsonstring.BackslashEscapeNone
BackslashEscapeShort = jsonstring.BackslashEscapeShort
BackslashEscapeUnicode = jsonstring.BackslashEscapeUnicode
+ BackslashEscapeRawByte = jsonstring.BackslashEscapeRawByte
)
func hexToInt(c byte) rune {
@@ -96,14 +119,13 @@ func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode
// behavior of encoding/json.
//
// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX`
-// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
-// character.
+// sequences for `\b` and `\f`
//
// A ReEncoder uses EscapeDefault if a BackslashEscaper is not
// specified.
func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
switch c {
- case '\b', '\f', utf8.RuneError:
+ case '\b', '\f':
return BackslashEscapeUnicode
default:
return EscapeHTMLSafe(c, wasEscaped)
@@ -115,11 +137,10 @@ func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
// SetEscapeHTML(false) called on it.
//
// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX`
-// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
-// character.
+// sequences for `\b` and `\f`.
func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
switch c {
- case '\b', '\f', utf8.RuneError:
+ case '\b', '\f':
return BackslashEscapeUnicode
default:
return EscapeJSSafe(c, wasEscaped)