summaryrefslogtreecommitdiff
path: root/misc.go
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-01-26 22:31:32 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-01-29 21:01:42 -0700
commitff6dc0bc519886905e758a84e572f5e34d6c03d1 (patch)
tree8906c4b8bb0c7dc468419efc17b872db62a4e068 /misc.go
parentd1b5bc1f05624614f43ef85597f4aa9d7a166d23 (diff)
Move things between files
Diffstat (limited to 'misc.go')
-rw-r--r--misc.go211
1 files changed, 0 insertions, 211 deletions
diff --git a/misc.go b/misc.go
deleted file mode 100644
index fb96b4e..0000000
--- a/misc.go
+++ /dev/null
@@ -1,211 +0,0 @@
-// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com>
-//
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-package lowmemjson
-
-import (
- "encoding/json"
- "io"
- "reflect"
- "unicode/utf8"
-
- "git.lukeshu.com/go/lowmemjson/internal"
-)
-
-var (
- numberType = reflect.TypeOf(json.Number(""))
- byteType = reflect.TypeOf(byte(0))
- byteSliceType = reflect.TypeOf(([]byte)(nil))
-)
-
-// generic I/O /////////////////////////////////////////////////////////////////
-
-func writeByte(w io.Writer, c byte) error {
- if br, ok := w.(interface{ WriteByte(byte) error }); ok {
- return br.WriteByte(c)
- }
- var buf [1]byte
- buf[0] = c
- if _, err := w.Write(buf[:]); err != nil {
- return err
- }
- return nil
-}
-
-func writeRune(w io.Writer, c rune) (int, error) {
- if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok {
- return rw.WriteRune(c)
- }
- var buf [utf8.UTFMax]byte
- n := utf8.EncodeRune(buf[:], c)
- return w.Write(buf[:n])
-}
-
-// JSON string encoding ////////////////////////////////////////////////////////
-
-// BackslashEscapeMode identifies one of the three ways that a
-// character may be represented in a JSON string:
-//
-// - literally (no backslash escaping)
-//
-// - as a short "well-known" `\X` backslash sequence (where `X` is a
-// single-character)
-//
-// - as a long Unicode `\uXXXX` backslash sequence
-type BackslashEscapeMode uint8
-
-const (
- BackslashEscapeNone BackslashEscapeMode = iota
- BackslashEscapeShort
- BackslashEscapeUnicode
-)
-
-// A BackslashEscaper controls how a ReEncoder emits a character in a
-// JSON string. The `rune` argument is the character being
-// considered, and the `BackslashEscapeMode` argument is how it was
-// originally encoded in the input.
-//
-// The ReEncoder will panic if a BackslashEscaper returns an unknown
-// BackslashEscapeMode.
-type BackslashEscaper = func(rune, BackslashEscapeMode) BackslashEscapeMode
-
-// EscapePreserve is a BackslashEscaper that preserves the original
-// input escaping.
-func EscapePreserve(_ rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
- return wasEscaped
-}
-
-// EscapeJSSafe is a BackslashEscaper that escapes strings such that
-// the JSON safe to embed in JS; it otherwise preserves the original
-// input escaping.
-//
-// JSON is notionally a JS subset, but that's not actually true; so
-// more conservative backslash-escaping is necessary to safely embed
-// it in JS. http://timelessrepo.com/json-isnt-a-javascript-subset
-func EscapeJSSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
- switch c {
- case '\u2028', '\u2029':
- return BackslashEscapeUnicode
- default:
- return wasEscaped
- }
-}
-
-// EscapeHTMLSafe is a BackslashEscaper that escapes strings such that
-// the JSON is safe to embed in HTML; it otherwise preserves the
-// original input escaping.
-func EscapeHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
- switch c {
- case '&', '<', '>':
- return BackslashEscapeUnicode
- default:
- return EscapeJSSafe(c, wasEscaped)
- }
-}
-
-// EscapeDefault is a BackslashEscaper that mimics the default
-// behavior of encoding/json.
-//
-// It is like EscapeHTMLSafe, but also uses long Unicode `\uXXXX`
-// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
-// character.
-//
-// A ReEncoder uses EscapeDefault if a BackslashEscaper is not
-// specified.
-func EscapeDefault(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
- switch c {
- case '\b', '\f', utf8.RuneError:
- return BackslashEscapeUnicode
- default:
- return EscapeHTMLSafe(c, wasEscaped)
- }
-}
-
-// EscapeDefaultNonHTMLSafe is a BackslashEscaper that mimics the
-// default behavior of an encoding/json.Encoder that has had
-// SetEscapeHTML(false) called on it.
-//
-// It is like EscapeJSSafe, but also uses long Unicode `\uXXXX`
-// sequences for `\b`, `\f`, and the `\uFFFD` Unicode replacement
-// character.
-func EscapeDefaultNonHTMLSafe(c rune, wasEscaped BackslashEscapeMode) BackslashEscapeMode {
- switch c {
- case '\b', '\f', utf8.RuneError:
- return BackslashEscapeUnicode
- default:
- return EscapeJSSafe(c, wasEscaped)
- }
-}
-
-func writeStringUnicodeEscape(w io.Writer, c rune) (int, error) {
- buf := [6]byte{
- '\\',
- 'u',
- internal.Hex[(c>>12)&0xf],
- internal.Hex[(c>>8)&0xf],
- internal.Hex[(c>>4)&0xf],
- internal.Hex[(c>>0)&0xf],
- }
- return w.Write(buf[:])
-}
-
-func writeStringShortEscape(w io.Writer, c rune) (int, error) {
- var b byte
- switch c {
- case '"', '\\', '/':
- b = byte(c)
- case '\b':
- b = 'b'
- case '\f':
- b = 'f'
- case '\n':
- b = 'n'
- case '\r':
- b = 'r'
- case '\t':
- b = 't'
- default:
- panic("should not happen")
- }
- buf := [2]byte{'\\', b}
- return w.Write(buf[:])
-}
-
-func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) {
- if escaper == nil {
- escaper = EscapeDefault
- }
- switch escaper(c, wasEscaped) {
- case BackslashEscapeNone:
- switch {
- case c < 0x0020: // override, gotta escape these
- switch c {
- case '\b', '\f', '\n', '\r', '\t': // short-escape if possible
- return writeStringShortEscape(w, c)
- default:
- return writeStringUnicodeEscape(w, c)
- }
- case c == '"' || c == '\\': // override, gotta escape these
- return writeStringShortEscape(w, c)
- default: // obey
- return writeRune(w, c)
- }
- case BackslashEscapeShort:
- switch c {
- case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey
- return writeStringShortEscape(w, c)
- default: // override, can't short-escape these
- return writeRune(w, c)
- }
- case BackslashEscapeUnicode:
- switch {
- case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?)
- return writeRune(w, c)
- default: // obey
- return writeStringUnicodeEscape(w, c)
- }
- default:
- panic("escaper returned an invalid escape mode")
- }
-}