summaryrefslogtreecommitdiff
path: root/reencode.go
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2023-02-14 11:44:36 -0700
committerLuke Shumaker <lukeshu@lukeshu.com>2023-02-16 22:56:31 -0700
commitd19e2c6884c2d409fcc828c870f1839ee84f38cb (patch)
tree3a61b0c070a5db186e2c49fe70dff6f40431124e /reencode.go
parent6f8e7db1ac5ddd21b8e3fcc39a1e30fde9b62c3a (diff)
reencode: Factor into separate modules
Diffstat (limited to 'reencode.go')
-rw-r--r--reencode.go335
1 files changed, 74 insertions, 261 deletions
diff --git a/reencode.go b/reencode.go
index d8cdb71..7e9b5ff 100644
--- a/reencode.go
+++ b/reencode.go
@@ -5,14 +5,12 @@
package lowmemjson
import (
- "bytes"
"fmt"
"io"
"unicode/utf8"
"git.lukeshu.com/go/lowmemjson/internal/fastio"
"git.lukeshu.com/go/lowmemjson/internal/jsonparse"
- "git.lukeshu.com/go/lowmemjson/internal/jsonstring"
)
// A ReEncoderConfig controls how a ReEncoder should behave.
@@ -71,10 +69,57 @@ type ReEncoderConfig struct {
// calls are syscalls, then you may want to wrap Out in a
// bufio.Writer.
func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
+ var module reEncoderModule
+
+ // Basic
+ module = &reEncodeWrite{
+ out: fastio.NewAllWriter(out),
+ }
+
+ // Whitespace
+ if cfg.ForceTrailingNewlines {
+ module = &reEncodeForceNL{
+ out: module,
+ }
+ }
+ switch {
+ case cfg.Compact:
+ module = &reEncodeCompactWS{
+ out: module,
+ }
+ case cfg.Indent != "":
+ if cfg.CompactIfUnder > 0 {
+ module = &reEncodeCompactWSIfUnder{
+ out: module,
+ CompactWSIfUnder: cfg.CompactIfUnder,
+ }
+ }
+ module = &reEncodeIndent{
+ out: module,
+ Indent: cfg.Indent,
+ Prefix: cfg.Prefix,
+ }
+ }
+
+ // Numbers
+ module = &reEncodeCompactNum{
+ out: module,
+ }
+
+ // Strings
+ escaper := cfg.BackslashEscape
+ if escaper == nil {
+ escaper = EscapeDefault
+ }
+ module = &reEncodeString{
+ out: module,
+ BackslashEscape: escaper,
+ }
+
return &ReEncoder{
- cfg: cfg,
- out: fastio.NewAllWriter(out),
- specu: new(speculation),
+ out: module,
+ esc: escaper,
+ allowMultipleValues: cfg.AllowMultipleValues,
}
}
@@ -87,8 +132,9 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder {
//
// The memory use of a ReEncoder is O(CompactIfUnder+depth).
type ReEncoder struct {
- cfg ReEncoderConfig
- out fastio.AllWriter
+ out reEncoderModule
+ esc BackslashEscaper
+ allowMultipleValues bool
// state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer
buf [utf8.UTFMax]byte
@@ -99,18 +145,11 @@ type ReEncoder struct {
par jsonparse.Parser
inputPos int64
- // state: .handleRune
- lastNonSpace jsonparse.RuneType
- lastNonSpaceNonEOF jsonparse.RuneType
- wasNumber bool
- curIndent int
- uhex [3]byte // "\uABCD"-encoded characters in strings
- fracZeros int64
- expZero bool
- specu *speculation
-
// state: .pushWriteBarrier and .popWriteBarrier
barriers []barrier
+
+ // state: .handleRuneType
+ uhex [3]byte // "\uABCD"-encoded characters in strings
}
type barrier struct {
@@ -118,26 +157,9 @@ type barrier struct {
stackSize int
}
-type speculation struct {
- speculating bool
- endWhenStackSize int
- fmt ReEncoder
- compact bytes.Buffer
- buf []inputTuple
-}
-
-func (specu *speculation) Reset() {
- specu.speculating = false
- specu.endWhenStackSize = 0
- specu.fmt = ReEncoder{}
- specu.compact.Reset()
- specu.buf = specu.buf[:0]
-}
-
-type inputTuple struct {
- c rune
- t jsonparse.RuneType
- stackSize int
+type reEncoderModule interface {
+ HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error
+ PopWriteBarrier()
}
// public API //////////////////////////////////////////////////////////////////
@@ -239,14 +261,14 @@ func (enc *ReEncoder) Close() error {
return enc.err
}
if len(enc.barriers) == 0 {
- if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil {
+ if err := enc.handleRuneType(0, jsonparse.RuneTypeEOF, enc.stackSize()); err != nil {
enc.err = &ReEncodeWriteError{
Err: err,
Offset: enc.inputPos,
}
return enc.err
}
- if enc.cfg.AllowMultipleValues {
+ if enc.allowMultipleValues {
enc.par.Reset()
}
}
@@ -271,7 +293,7 @@ rehandle:
return
}
if t == jsonparse.RuneTypeEOF {
- if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 {
+ if enc.allowMultipleValues && len(enc.barriers) == 0 {
enc.par.Reset()
goto rehandle
} else {
@@ -301,7 +323,7 @@ func (enc *ReEncoder) popWriteBarrier() {
enc.par.PopBarrier()
enc.inputPos += enc.barriers[len(enc.barriers)-1].inputPos
enc.barriers = enc.barriers[:len(enc.barriers)-1]
- enc.lastNonSpace = enc.lastNonSpaceNonEOF
+ enc.out.PopWriteBarrier()
}
// internal ////////////////////////////////////////////////////////////////////
@@ -315,189 +337,9 @@ func (enc *ReEncoder) stackSize() int {
}
func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error {
- if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" {
- return enc.handleRuneNoSpeculation(c, t)
- }
-
- // main
- if !enc.specu.speculating { // not speculating
- switch t {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // start speculating
- if err, _ := enc.handleRunePre(c, t); err != nil {
- return err
- }
- enc.specu.speculating = true
- enc.specu.endWhenStackSize = stackSize - 1
- enc.specu.fmt = ReEncoder{
- cfg: enc.cfg,
- out: &enc.specu.compact,
- }
- enc.specu.fmt.cfg.Compact = true
- enc.specu.buf = append(enc.specu.buf, inputTuple{
- c: c,
- t: t,
- stackSize: stackSize,
- })
- if err := enc.specu.fmt.handleRuneMain(c, t); err != nil {
- return err
- }
- default:
- if err := enc.handleRuneNoSpeculation(c, t); err != nil {
- return err
- }
- }
- } else { // speculating
- enc.specu.buf = append(enc.specu.buf, inputTuple{
- c: c,
- t: t,
- stackSize: stackSize,
- })
- if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil {
- return err
- }
- switch {
- case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent
- buf := append([]inputTuple(nil), enc.specu.buf...)
- enc.specu.Reset()
- if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil {
- return err
- }
- for _, tuple := range buf[1:] {
- if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil {
- return err
- }
- }
- case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact
- if _, err := enc.specu.compact.WriteTo(enc.out); err != nil {
- return err
- }
- enc.specu.Reset()
- enc.lastNonSpace = t
- enc.curIndent--
- }
- }
-
- return nil
-}
-
-func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t jsonparse.RuneType) error {
- err, shouldHandle := enc.handleRunePre(c, t)
- if err != nil {
- return err
- }
- if !shouldHandle {
- return nil
- }
- return enc.handleRuneMain(c, t)
-}
-
-// handleRunePre handles buffered things that need to happen before
-// the new rune itself is handled.
-func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) {
- // emit newlines between top-level values
- if enc.lastNonSpace == jsonparse.RuneTypeEOF {
- switch {
- case enc.wasNumber && t.IsNumber():
- if err := enc.out.WriteByte('\n'); err != nil {
- return err, false
- }
- case enc.cfg.Indent != "" && !enc.cfg.Compact:
- if err := enc.out.WriteByte('\n'); err != nil {
- return err, false
- }
- }
- }
-
- // shorten numbers
- switch t { // trim trailing '0's from the fraction-part, but don't remove all digits
- case jsonparse.RuneTypeNumberFracDot:
- enc.fracZeros = 0
- case jsonparse.RuneTypeNumberFracDig:
- if c == '0' && enc.lastNonSpace == jsonparse.RuneTypeNumberFracDig {
- enc.fracZeros++
- return nil, false
- }
- fallthrough
- default:
- for enc.fracZeros > 0 {
- if err := enc.out.WriteByte('0'); err != nil {
- return err, false
- }
- enc.fracZeros--
- }
- }
- switch t { // trim leading '0's from the exponent-part, but don't remove all digits
- case jsonparse.RuneTypeNumberExpE, jsonparse.RuneTypeNumberExpSign:
- enc.expZero = true
- case jsonparse.RuneTypeNumberExpDig:
- if c == '0' && enc.expZero {
- return nil, false
- }
- enc.expZero = false
- default:
- if enc.expZero {
- if err := enc.out.WriteByte('0'); err != nil {
- return err, false
- }
- enc.expZero = false
- }
- }
-
- // whitespace
- switch {
- case enc.cfg.Compact:
- if t == jsonparse.RuneTypeSpace {
- return nil, false
- }
- case enc.cfg.Indent != "":
- switch t {
- case jsonparse.RuneTypeSpace:
- // let us manage whitespace, don't pass it through
- return nil, false
- case jsonparse.RuneTypeObjectEnd, jsonparse.RuneTypeArrayEnd:
- enc.curIndent--
- switch enc.lastNonSpace {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg:
- // collapse
- default:
- if err := enc.emitNlIndent(); err != nil {
- return err, false
- }
- }
- default:
- switch enc.lastNonSpace {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeObjectComma, jsonparse.RuneTypeArrayBeg, jsonparse.RuneTypeArrayComma:
- if err := enc.emitNlIndent(); err != nil {
- return err, false
- }
- case jsonparse.RuneTypeObjectColon:
- if err := enc.out.WriteByte(' '); err != nil {
- return err, false
- }
- }
- switch t {
- case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg:
- enc.curIndent++
- }
- }
- }
-
- return nil, true
-}
-
-// handleRuneMain handles the new rune itself, not buffered things.
-func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
- escaper := enc.cfg.BackslashEscape
- if escaper == nil {
- escaper = EscapeDefault
- }
- var err error
switch t {
-
- case jsonparse.RuneTypeStringChar:
- err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone))
case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU:
- // do nothing
+ return nil
case jsonparse.RuneTypeStringEsc1:
switch c {
case '"', '\\', '/':
@@ -515,54 +357,25 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error {
default:
panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c))
}
- err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort))
+ return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeShort, stackSize)
case jsonparse.RuneTypeStringEscUA:
enc.uhex[0] = byte(c)
+ return nil
case jsonparse.RuneTypeStringEscUB:
enc.uhex[1] = byte(c)
+ return nil
case jsonparse.RuneTypeStringEscUC:
enc.uhex[2] = byte(c)
+ return nil
case jsonparse.RuneTypeStringEscUD:
c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c))
- err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode))
-
- case jsonparse.RuneTypeError: // EOF explicitly stated by .Close()
- fallthrough
- case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value
- enc.wasNumber = enc.lastNonSpace.IsNumber()
- switch {
- case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0:
- t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one)
- err = enc.out.WriteByte('\n')
- default:
- t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed
- }
+ return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeUnicode, stackSize)
+ case jsonparse.RuneTypeError:
+ panic(fmt.Errorf("should not happen: handleRune called with %#v", t))
default:
- err = enc.out.WriteByte(byte(c))
- }
-
- if t != jsonparse.RuneTypeSpace {
- enc.lastNonSpace = t
- if t != jsonparse.RuneTypeEOF {
- enc.lastNonSpaceNonEOF = t
- }
- }
- return err
-}
-
-func (enc *ReEncoder) emitNlIndent() error {
- if err := enc.out.WriteByte('\n'); err != nil {
- return err
- }
- if enc.cfg.Prefix != "" {
- if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil {
- return err
+ if t > jsonparse.RuneTypeEOF {
+ panic(fmt.Errorf("should not happen: handleRune called with %#v", t))
}
+ return enc.out.HandleRune(c, t, BackslashEscapeNone, stackSize)
}
- for i := 0; i < enc.curIndent; i++ {
- if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil {
- return err
- }
- }
- return nil
}