diff options
-rw-r--r-- | encode.go | 2 | ||||
-rw-r--r-- | misc.go | 81 | ||||
-rw-r--r-- | reencode.go | 28 |
3 files changed, 68 insertions, 43 deletions
@@ -302,7 +302,7 @@ func encodeString[T interface{ []byte | string }](w io.Writer, str T) { encodeWriteByte(w, '"') for i := 0; i < len(str); { c, size := decodeRune(str[i:]) - if _, err := writeStringChar(w, c, false, nil); err != nil { + if _, err := writeStringChar(w, c, BackslashEscapeNone, nil); err != nil { panic(encodeError{err}) } i += size @@ -68,34 +68,42 @@ func writeRune(w io.Writer, c rune) (int, error) { // JSON string encoding //////////////////////////////////////////////////////// -func UnicodeEscapeJSSafe(c rune, _ bool) bool { +type BackslashEscapeMode uint8 + +const ( + BackslashEscapeNone = BackslashEscapeMode(iota) + BackslashEscapeShort + BackslashEscapeUnicode +) + +func EscapeJSSafe(c rune, _ BackslashEscapeMode) BackslashEscapeMode { // JSON is notionally a JS subset, but that's not actually // true. // // http://timelessrepo.com/json-isnt-a-javascript-subset switch c { case '\u2028', '\u2029': - return true + return BackslashEscapeUnicode default: - return false + return BackslashEscapeNone } } -func UnicodeEscapeHTMLSafe(c rune, wasEscaped bool) bool { +func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '&', '<', '>': - return true + return BackslashEscapeUnicode default: - return UnicodeEscapeJSSafe(c, wasEscaped) + return EscapeJSSafe(c, wasEscaped) } } -func UnicodeEscapeDefault(c rune, wasEscaped bool) bool { +func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode { switch c { case '\b', '\f', utf8.RuneError: - return true + return BackslashEscapeUnicode default: - return UnicodeEscapeHTMLSafe(c, wasEscaped) + return EscapeHTMLSafe(c, wasEscaped) } } @@ -114,31 +122,48 @@ func writeStringShortEscape(w io.Writer, c byte) (int, error) { buf := [2]byte{'\\', c} return w.Write(buf[:]) } -func writeStringChar(w io.Writer, c rune, wasEscaped bool, escaper func(rune, bool) bool) (int, error) { +func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper func(rune, BackslashEscapeMode) BackslashEscapeMode) (int, error) { if escaper == nil { - escaper = UnicodeEscapeDefault + escaper = EscapeDefault } - switch { - case c <= 0xFFFF && escaper(c, wasEscaped): - return writeStringUnicodeEscape(w, c) - case c == '"' || c == '\\': - return writeStringShortEscape(w, byte(c)) - case c < 0x0020: + switch escaper(c, wasEscaped) { + case BackslashEscapeNone: + switch { + case c < 0x0020: + switch c { + case '\b': + return writeStringShortEscape(w, 'b') + case '\f': + return writeStringShortEscape(w, 'f') + case '\n': + return writeStringShortEscape(w, 'n') + case '\r': + return writeStringShortEscape(w, 'r') + case '\t': + return writeStringShortEscape(w, 't') + default: + return writeStringUnicodeEscape(w, c) + } + case c == '"' || c == '\\': + return writeStringShortEscape(w, byte(c)) + default: + return writeRune(w, c) + } + case BackslashEscapeShort: switch c { - case '\b': - return writeStringShortEscape(w, 'b') - case '\f': - return writeStringShortEscape(w, 'f') - case '\n': - return writeStringShortEscape(w, 'n') - case '\r': - return writeStringShortEscape(w, 'r') - case '\t': - return writeStringShortEscape(w, 't') + case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': + return writeStringShortEscape(w, byte(c)) + default: + return writeRune(w, c) + } + case BackslashEscapeUnicode: + switch { + case c > 0xFFFF: + return writeRune(w, c) default: return writeStringUnicodeEscape(w, c) } default: - return writeRune(w, c) + panic("escaper returned an invalid escape mode") } } diff --git a/reencode.go b/reencode.go index 50c8ba3..66f25da 100644 --- a/reencode.go +++ b/reencode.go @@ -24,12 +24,12 @@ type ReEncoder struct { // encoding/json only. prefix string // Returns whether a given character in a string should be - // "\uXXXX" escaped. The bool argument is whether it was + // backslash-escaped. The bool argument is whether it was // \u-escaped in the input. This does not affect characters - // that must or must-not be \u-escaped to be valid JSON. + // that must or must-not be escaped to be valid JSON. // - // If not set, then EscapeUnicodeDefault is used. - UnicodeEscape func(rune, bool) bool + // If not set, then EscapeDefault is used. + BackslashEscape func(rune, BackslashEscapeMode) BackslashEscapeMode bailAfterCurrent bool @@ -339,7 +339,7 @@ func (enc *ReEncoder) stateInString(c rune) error { enc.popState() return enc.emitByte(byte(c)) case 0x0020 <= c && c <= 0x10FFFF: - return enc.emit(writeStringChar(enc.Out, c, false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeNone, enc.BackslashEscape)) default: return &SyntaxError{fmt.Sprintf("string: unexpected character: %c", c), enc.inputPos} } @@ -348,28 +348,28 @@ func (enc *ReEncoder) stateInBackslash(c rune) error { switch c { case '"': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '"', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '"', BackslashEscapeShort, enc.BackslashEscape)) case '\\': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\\', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '\\', BackslashEscapeShort, enc.BackslashEscape)) case '/': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '/', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '/', BackslashEscapeShort, enc.BackslashEscape)) case 'b': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\b', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '\b', BackslashEscapeShort, enc.BackslashEscape)) case 'f': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\f', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '\f', BackslashEscapeShort, enc.BackslashEscape)) case 'n': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\n', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '\n', BackslashEscapeShort, enc.BackslashEscape)) case 'r': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\r', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '\r', BackslashEscapeShort, enc.BackslashEscape)) case 't': enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\t', false, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, '\t', BackslashEscapeShort, enc.BackslashEscape)) case 'u': enc.replaceState(enc.stateInUnicode, false) return nil @@ -396,7 +396,7 @@ func (enc *ReEncoder) stateInUnicode(c rune) error { rune(enc.stateBuf[2])<<4 | rune(enc.stateBuf[3])<<0 enc.stateBuf = enc.stateBuf[:0] - return enc.emit(writeStringChar(enc.Out, c, true, enc.UnicodeEscape)) + return enc.emit(writeStringChar(enc.Out, c, BackslashEscapeUnicode, enc.BackslashEscape)) } return nil } |