From d19e2c6884c2d409fcc828c870f1839ee84f38cb Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Tue, 14 Feb 2023 11:44:36 -0700 Subject: reencode: Factor into separate modules --- encode.go | 2 +- reencode.go | 335 ++++++++++--------------------------------- reencode_compactnum.go | 67 +++++++++ reencode_compactws.go | 27 ++++ reencode_compactwsifunder.go | 106 ++++++++++++++ reencode_indent.go | 102 +++++++++++++ reencode_nl.go | 39 +++++ reencode_string.go | 33 +++++ reencode_test.go | 15 +- reencode_write.go | 52 +++++++ 10 files changed, 514 insertions(+), 264 deletions(-) create mode 100644 reencode_compactnum.go create mode 100644 reencode_compactws.go create mode 100644 reencode_compactwsifunder.go create mode 100644 reencode_indent.go create mode 100644 reencode_nl.go create mode 100644 reencode_string.go create mode 100644 reencode_write.go diff --git a/encode.go b/encode.go index 38a2e93..00d3dad 100644 --- a/encode.go +++ b/encode.go @@ -83,7 +83,7 @@ func (enc *Encoder) Encode(obj any) (err error) { if enc.isRoot { enc.w.par.Reset() } - escaper := enc.w.cfg.BackslashEscape + escaper := enc.w.esc if escaper == nil { escaper = EscapeDefault } diff --git a/reencode.go b/reencode.go index d8cdb71..7e9b5ff 100644 --- a/reencode.go +++ b/reencode.go @@ -5,14 +5,12 @@ package lowmemjson import ( - "bytes" "fmt" "io" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/fastio" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" - "git.lukeshu.com/go/lowmemjson/internal/jsonstring" ) // A ReEncoderConfig controls how a ReEncoder should behave. @@ -71,10 +69,57 @@ type ReEncoderConfig struct { // calls are syscalls, then you may want to wrap Out in a // bufio.Writer. func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { + var module reEncoderModule + + // Basic + module = &reEncodeWrite{ + out: fastio.NewAllWriter(out), + } + + // Whitespace + if cfg.ForceTrailingNewlines { + module = &reEncodeForceNL{ + out: module, + } + } + switch { + case cfg.Compact: + module = &reEncodeCompactWS{ + out: module, + } + case cfg.Indent != "": + if cfg.CompactIfUnder > 0 { + module = &reEncodeCompactWSIfUnder{ + out: module, + CompactWSIfUnder: cfg.CompactIfUnder, + } + } + module = &reEncodeIndent{ + out: module, + Indent: cfg.Indent, + Prefix: cfg.Prefix, + } + } + + // Numbers + module = &reEncodeCompactNum{ + out: module, + } + + // Strings + escaper := cfg.BackslashEscape + if escaper == nil { + escaper = EscapeDefault + } + module = &reEncodeString{ + out: module, + BackslashEscape: escaper, + } + return &ReEncoder{ - cfg: cfg, - out: fastio.NewAllWriter(out), - specu: new(speculation), + out: module, + esc: escaper, + allowMultipleValues: cfg.AllowMultipleValues, } } @@ -87,8 +132,9 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // // The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { - cfg ReEncoderConfig - out fastio.AllWriter + out reEncoderModule + esc BackslashEscaper + allowMultipleValues bool // state: .Write's/.WriteString's/.WriteRune's utf8-decoding buffer buf [utf8.UTFMax]byte @@ -99,18 +145,11 @@ type ReEncoder struct { par jsonparse.Parser inputPos int64 - // state: .handleRune - lastNonSpace jsonparse.RuneType - lastNonSpaceNonEOF jsonparse.RuneType - wasNumber bool - curIndent int - uhex [3]byte // "\uABCD"-encoded characters in strings - fracZeros int64 - expZero bool - specu *speculation - // state: .pushWriteBarrier and .popWriteBarrier barriers []barrier + + // state: .handleRuneType + uhex [3]byte // "\uABCD"-encoded characters in strings } type barrier struct { @@ -118,26 +157,9 @@ type barrier struct { stackSize int } -type speculation struct { - speculating bool - endWhenStackSize int - fmt ReEncoder - compact bytes.Buffer - buf []inputTuple -} - -func (specu *speculation) Reset() { - specu.speculating = false - specu.endWhenStackSize = 0 - specu.fmt = ReEncoder{} - specu.compact.Reset() - specu.buf = specu.buf[:0] -} - -type inputTuple struct { - c rune - t jsonparse.RuneType - stackSize int +type reEncoderModule interface { + HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error + PopWriteBarrier() } // public API ////////////////////////////////////////////////////////////////// @@ -239,14 +261,14 @@ func (enc *ReEncoder) Close() error { return enc.err } if len(enc.barriers) == 0 { - if err := enc.handleRuneType(0, jsonparse.RuneTypeError, enc.stackSize()); err != nil { + if err := enc.handleRuneType(0, jsonparse.RuneTypeEOF, enc.stackSize()); err != nil { enc.err = &ReEncodeWriteError{ Err: err, Offset: enc.inputPos, } return enc.err } - if enc.cfg.AllowMultipleValues { + if enc.allowMultipleValues { enc.par.Reset() } } @@ -271,7 +293,7 @@ rehandle: return } if t == jsonparse.RuneTypeEOF { - if enc.cfg.AllowMultipleValues && len(enc.barriers) == 0 { + if enc.allowMultipleValues && len(enc.barriers) == 0 { enc.par.Reset() goto rehandle } else { @@ -301,7 +323,7 @@ func (enc *ReEncoder) popWriteBarrier() { enc.par.PopBarrier() enc.inputPos += enc.barriers[len(enc.barriers)-1].inputPos enc.barriers = enc.barriers[:len(enc.barriers)-1] - enc.lastNonSpace = enc.lastNonSpaceNonEOF + enc.out.PopWriteBarrier() } // internal //////////////////////////////////////////////////////////////////// @@ -315,189 +337,9 @@ func (enc *ReEncoder) stackSize() int { } func (enc *ReEncoder) handleRuneType(c rune, t jsonparse.RuneType, stackSize int) error { - if enc.cfg.CompactIfUnder == 0 || enc.cfg.Compact || enc.cfg.Indent == "" { - return enc.handleRuneNoSpeculation(c, t) - } - - // main - if !enc.specu.speculating { // not speculating - switch t { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // start speculating - if err, _ := enc.handleRunePre(c, t); err != nil { - return err - } - enc.specu.speculating = true - enc.specu.endWhenStackSize = stackSize - 1 - enc.specu.fmt = ReEncoder{ - cfg: enc.cfg, - out: &enc.specu.compact, - } - enc.specu.fmt.cfg.Compact = true - enc.specu.buf = append(enc.specu.buf, inputTuple{ - c: c, - t: t, - stackSize: stackSize, - }) - if err := enc.specu.fmt.handleRuneMain(c, t); err != nil { - return err - } - default: - if err := enc.handleRuneNoSpeculation(c, t); err != nil { - return err - } - } - } else { // speculating - enc.specu.buf = append(enc.specu.buf, inputTuple{ - c: c, - t: t, - stackSize: stackSize, - }) - if err := enc.specu.fmt.handleRuneType(c, t, stackSize); err != nil { - return err - } - switch { - case enc.specu.compact.Len() >= enc.cfg.CompactIfUnder: // stop speculating; use indent - buf := append([]inputTuple(nil), enc.specu.buf...) - enc.specu.Reset() - if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { - return err - } - for _, tuple := range buf[1:] { - if err := enc.handleRuneType(tuple.c, tuple.t, tuple.stackSize); err != nil { - return err - } - } - case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact - if _, err := enc.specu.compact.WriteTo(enc.out); err != nil { - return err - } - enc.specu.Reset() - enc.lastNonSpace = t - enc.curIndent-- - } - } - - return nil -} - -func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t jsonparse.RuneType) error { - err, shouldHandle := enc.handleRunePre(c, t) - if err != nil { - return err - } - if !shouldHandle { - return nil - } - return enc.handleRuneMain(c, t) -} - -// handleRunePre handles buffered things that need to happen before -// the new rune itself is handled. -func (enc *ReEncoder) handleRunePre(c rune, t jsonparse.RuneType) (error, bool) { - // emit newlines between top-level values - if enc.lastNonSpace == jsonparse.RuneTypeEOF { - switch { - case enc.wasNumber && t.IsNumber(): - if err := enc.out.WriteByte('\n'); err != nil { - return err, false - } - case enc.cfg.Indent != "" && !enc.cfg.Compact: - if err := enc.out.WriteByte('\n'); err != nil { - return err, false - } - } - } - - // shorten numbers - switch t { // trim trailing '0's from the fraction-part, but don't remove all digits - case jsonparse.RuneTypeNumberFracDot: - enc.fracZeros = 0 - case jsonparse.RuneTypeNumberFracDig: - if c == '0' && enc.lastNonSpace == jsonparse.RuneTypeNumberFracDig { - enc.fracZeros++ - return nil, false - } - fallthrough - default: - for enc.fracZeros > 0 { - if err := enc.out.WriteByte('0'); err != nil { - return err, false - } - enc.fracZeros-- - } - } - switch t { // trim leading '0's from the exponent-part, but don't remove all digits - case jsonparse.RuneTypeNumberExpE, jsonparse.RuneTypeNumberExpSign: - enc.expZero = true - case jsonparse.RuneTypeNumberExpDig: - if c == '0' && enc.expZero { - return nil, false - } - enc.expZero = false - default: - if enc.expZero { - if err := enc.out.WriteByte('0'); err != nil { - return err, false - } - enc.expZero = false - } - } - - // whitespace - switch { - case enc.cfg.Compact: - if t == jsonparse.RuneTypeSpace { - return nil, false - } - case enc.cfg.Indent != "": - switch t { - case jsonparse.RuneTypeSpace: - // let us manage whitespace, don't pass it through - return nil, false - case jsonparse.RuneTypeObjectEnd, jsonparse.RuneTypeArrayEnd: - enc.curIndent-- - switch enc.lastNonSpace { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: - // collapse - default: - if err := enc.emitNlIndent(); err != nil { - return err, false - } - } - default: - switch enc.lastNonSpace { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeObjectComma, jsonparse.RuneTypeArrayBeg, jsonparse.RuneTypeArrayComma: - if err := enc.emitNlIndent(); err != nil { - return err, false - } - case jsonparse.RuneTypeObjectColon: - if err := enc.out.WriteByte(' '); err != nil { - return err, false - } - } - switch t { - case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: - enc.curIndent++ - } - } - } - - return nil, true -} - -// handleRuneMain handles the new rune itself, not buffered things. -func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { - escaper := enc.cfg.BackslashEscape - if escaper == nil { - escaper = EscapeDefault - } - var err error switch t { - - case jsonparse.RuneTypeStringChar: - err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeNone)) case jsonparse.RuneTypeStringEsc, jsonparse.RuneTypeStringEscU: - // do nothing + return nil case jsonparse.RuneTypeStringEsc1: switch c { case '"', '\\', '/': @@ -515,54 +357,25 @@ func (enc *ReEncoder) handleRuneMain(c rune, t jsonparse.RuneType) error { default: panic(fmt.Errorf("should not happen: rune %q is not a RuneTypeStringEsc1", c)) } - err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeShort)) + return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeShort, stackSize) case jsonparse.RuneTypeStringEscUA: enc.uhex[0] = byte(c) + return nil case jsonparse.RuneTypeStringEscUB: enc.uhex[1] = byte(c) + return nil case jsonparse.RuneTypeStringEscUC: enc.uhex[2] = byte(c) + return nil case jsonparse.RuneTypeStringEscUD: c = hexToRune(enc.uhex[0], enc.uhex[1], enc.uhex[2], byte(c)) - err = jsonstring.WriteStringChar(enc.out, c, escaper(c, BackslashEscapeUnicode)) - - case jsonparse.RuneTypeError: // EOF explicitly stated by .Close() - fallthrough - case jsonparse.RuneTypeEOF: // EOF implied by the start of the next top-level value - enc.wasNumber = enc.lastNonSpace.IsNumber() - switch { - case enc.cfg.ForceTrailingNewlines && len(enc.barriers) == 0: - t = jsonparse.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) - err = enc.out.WriteByte('\n') - default: - t = jsonparse.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed - } + return enc.out.HandleRune(c, jsonparse.RuneTypeStringChar, BackslashEscapeUnicode, stackSize) + case jsonparse.RuneTypeError: + panic(fmt.Errorf("should not happen: handleRune called with %#v", t)) default: - err = enc.out.WriteByte(byte(c)) - } - - if t != jsonparse.RuneTypeSpace { - enc.lastNonSpace = t - if t != jsonparse.RuneTypeEOF { - enc.lastNonSpaceNonEOF = t - } - } - return err -} - -func (enc *ReEncoder) emitNlIndent() error { - if err := enc.out.WriteByte('\n'); err != nil { - return err - } - if enc.cfg.Prefix != "" { - if _, err := enc.out.WriteString(enc.cfg.Prefix); err != nil { - return err + if t > jsonparse.RuneTypeEOF { + panic(fmt.Errorf("should not happen: handleRune called with %#v", t)) } + return enc.out.HandleRune(c, t, BackslashEscapeNone, stackSize) } - for i := 0; i < enc.curIndent; i++ { - if _, err := enc.out.WriteString(enc.cfg.Indent); err != nil { - return err - } - } - return nil } diff --git a/reencode_compactnum.go b/reencode_compactnum.go new file mode 100644 index 0000000..5da2c54 --- /dev/null +++ b/reencode_compactnum.go @@ -0,0 +1,67 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +type reEncodeCompactNum struct { + out reEncoderModule + + // state + fracFirst bool + fracZeros int64 + expZero bool +} + +var _ reEncoderModule = (*reEncodeCompactNum)(nil) + +func (enc *reEncodeCompactNum) PopWriteBarrier() { + enc.out.PopWriteBarrier() +} + +func (enc *reEncodeCompactNum) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error { + // trim trailing '0's from the fraction-part, but don't remove all digits + switch t { + case jsonparse.RuneTypeNumberFracDot: + enc.fracFirst = true + enc.fracZeros = 0 + case jsonparse.RuneTypeNumberFracDig: + if c == '0' && !enc.fracFirst { + enc.fracZeros++ + return nil + } + fallthrough + default: + for enc.fracZeros > 0 { + if err := enc.out.HandleRune('0', jsonparse.RuneTypeNumberFracDig, escape, stackSize); err != nil { + return err + } + enc.fracZeros-- + } + enc.fracFirst = false + } + + // trim leading '0's from the exponent-part, but don't remove all digits + switch t { + case jsonparse.RuneTypeNumberExpE, jsonparse.RuneTypeNumberExpSign: + enc.expZero = true + case jsonparse.RuneTypeNumberExpDig: + if c == '0' && enc.expZero { + return nil + } + enc.expZero = false + default: + if enc.expZero { + if err := enc.out.HandleRune('0', jsonparse.RuneTypeNumberFracDig, escape, stackSize); err != nil { + return err + } + enc.expZero = false + } + } + + return enc.out.HandleRune(c, t, escape, stackSize) +} diff --git a/reencode_compactws.go b/reencode_compactws.go new file mode 100644 index 0000000..396cf6d --- /dev/null +++ b/reencode_compactws.go @@ -0,0 +1,27 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +type reEncodeCompactWS struct { + out reEncoderModule +} + +var _ reEncoderModule = (*reEncodeCompactWS)(nil) + +func (enc *reEncodeCompactWS) PopWriteBarrier() { + enc.out.PopWriteBarrier() +} + +func (enc *reEncodeCompactWS) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error { + if t == jsonparse.RuneTypeSpace { + return nil + } + + return enc.out.HandleRune(c, t, escape, stackSize) +} diff --git a/reencode_compactwsifunder.go b/reencode_compactwsifunder.go new file mode 100644 index 0000000..2349104 --- /dev/null +++ b/reencode_compactwsifunder.go @@ -0,0 +1,106 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "bytes" + + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +type reEncodeCompactWSIfUnder struct { + out reEncoderModule + + // CompactWSIfUnder runs uses reEncodeCompactWScauses for + // individual elements if doing so would cause that element to + // be under this number of bytes. + // + // This has O(2^min(CompactWSIfUnder, depth)) time overhead, + // so set with caution. + CompactWSIfUnder int + + // state + compactor reEncodeWrite + compacted bytes.Buffer + full []handleRuneCall + endWhenStackSize int +} + +var _ reEncoderModule = (*reEncodeCompactWSIfUnder)(nil) + +type handleRuneCall struct { + c rune + t jsonparse.RuneType + escape BackslashEscapeMode + stackSize int +} + +func (enc *reEncodeCompactWSIfUnder) reset() { + enc.compactor = reEncodeWrite{} + enc.compacted.Reset() + enc.full = enc.full[:0] + enc.endWhenStackSize = 0 +} + +func (enc *reEncodeCompactWSIfUnder) PopWriteBarrier() { + enc.out.PopWriteBarrier() +} + +func (enc *reEncodeCompactWSIfUnder) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error { + if enc.compactor.out == nil { // not speculating + switch t { + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: // start speculating + enc.endWhenStackSize = stackSize - 1 + enc.compactor = reEncodeWrite{ + out: &enc.compacted, + } + enc.full = append(enc.full, handleRuneCall{ + c: c, + t: t, + escape: escape, + stackSize: stackSize, + }) + return enc.compactor.HandleRune(c, t, escape, stackSize) + default: + return enc.out.HandleRune(c, t, escape, stackSize) + } + } else { // speculating + enc.full = append(enc.full, handleRuneCall{ + c: c, + t: t, + escape: escape, + stackSize: stackSize, + }) + if t != jsonparse.RuneTypeSpace { + if err := enc.compactor.HandleRune(c, t, escape, stackSize); err != nil { + return err + } + } + switch { + case enc.compacted.Len() >= enc.CompactWSIfUnder: // stop speculating; use indent + buf := append([]handleRuneCall(nil), enc.full...) + enc.reset() + if err := enc.out.HandleRune(buf[0].c, buf[0].t, buf[0].escape, buf[0].stackSize); err != nil { + return err + } + for _, tuple := range buf[1:] { + if err := enc.HandleRune(tuple.c, tuple.t, tuple.escape, tuple.stackSize); err != nil { + return err + } + } + case stackSize == enc.endWhenStackSize: // stop speculating; use compact + for _, tuple := range enc.full { + if tuple.t == jsonparse.RuneTypeSpace { + continue + } + if err := enc.out.HandleRune(tuple.c, tuple.t, tuple.escape, tuple.stackSize); err != nil { + return err + } + } + enc.reset() + } + return nil + } +} diff --git a/reencode_indent.go b/reencode_indent.go new file mode 100644 index 0000000..90b35db --- /dev/null +++ b/reencode_indent.go @@ -0,0 +1,102 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +type reEncodeIndent struct { + out reEncoderModule + + // String to use to indent. + // + // Newlines are emitted *between* top-level values; a newline + // is not emitted after the *last* top-level value. + Indent string + + // String to put before indents. + Prefix string + + // state + lastNonSpace jsonparse.RuneType + lastNonSpaceNonEOF jsonparse.RuneType + curIndent int +} + +var _ reEncoderModule = (*reEncodeIndent)(nil) + +func (enc *reEncodeIndent) PopWriteBarrier() { + enc.lastNonSpace = enc.lastNonSpaceNonEOF + enc.out.PopWriteBarrier() +} + +func (enc *reEncodeIndent) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error { + // emit newlines between top-level values + if enc.lastNonSpace == jsonparse.RuneTypeEOF && t != jsonparse.RuneTypeSpace { + if err := enc.out.HandleRune('\n', jsonparse.RuneTypeSpace, 0, 0); err != nil { + return err + } + } + + // indent + switch t { + case jsonparse.RuneTypeSpace: + // let us manage whitespace, don't pass it through + return nil + case jsonparse.RuneTypeObjectEnd, jsonparse.RuneTypeArrayEnd: + enc.curIndent-- + switch enc.lastNonSpace { + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: + // collapse + default: + if err := enc.emitNlIndent(stackSize + 1); err != nil { + return err + } + } + default: + switch enc.lastNonSpace { + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeObjectComma, jsonparse.RuneTypeArrayBeg, jsonparse.RuneTypeArrayComma: + if err := enc.emitNlIndent(stackSize); err != nil { + return err + } + case jsonparse.RuneTypeObjectColon: + if err := enc.out.HandleRune(' ', jsonparse.RuneTypeSpace, 0, stackSize); err != nil { + return err + } + } + switch t { + case jsonparse.RuneTypeObjectBeg, jsonparse.RuneTypeArrayBeg: + enc.curIndent++ + } + } + + if t != jsonparse.RuneTypeSpace { + enc.lastNonSpace = t + if t != jsonparse.RuneTypeEOF { + enc.lastNonSpaceNonEOF = t + } + } + return enc.out.HandleRune(c, t, escape, stackSize) +} + +func (enc *reEncodeIndent) emitNlIndent(stackSize int) error { + if err := enc.out.HandleRune('\n', jsonparse.RuneTypeSpace, 0, stackSize); err != nil { + return err + } + for _, c := range enc.Prefix { + if err := enc.out.HandleRune(c, jsonparse.RuneTypeSpace, 0, stackSize); err != nil { + return err + } + } + for i := 0; i < enc.curIndent; i++ { + for _, c := range enc.Indent { + if err := enc.out.HandleRune(c, jsonparse.RuneTypeSpace, 0, stackSize); err != nil { + return err + } + } + } + return nil +} diff --git a/reencode_nl.go b/reencode_nl.go new file mode 100644 index 0000000..b7a3cd9 --- /dev/null +++ b/reencode_nl.go @@ -0,0 +1,39 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +type reEncodeForceNL struct { + out reEncoderModule + + // state + skipNL bool +} + +var _ reEncoderModule = (*reEncodeForceNL)(nil) + +func (enc *reEncodeForceNL) PopWriteBarrier() { + enc.out.PopWriteBarrier() +} + +func (enc *reEncodeForceNL) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error { + switch { + case t == jsonparse.RuneTypeEOF: + if err := enc.out.HandleRune('\n', jsonparse.RuneTypeSpace, 0, stackSize); err != nil { + return err + } + enc.skipNL = true + return enc.out.HandleRune(c, t, escape, stackSize) + case c == '\n' && t == jsonparse.RuneTypeSpace && enc.skipNL: + enc.skipNL = false + return nil + default: + enc.skipNL = false + return enc.out.HandleRune(c, t, escape, stackSize) + } +} diff --git a/reencode_string.go b/reencode_string.go new file mode 100644 index 0000000..ab148d6 --- /dev/null +++ b/reencode_string.go @@ -0,0 +1,33 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" +) + +type reEncodeString struct { + out reEncoderModule + + // BackslashEscape returns whether a given character in a + // string should be backslash-escaped. The bool argument is + // whether it was \u-escaped in the input. This does not + // affect characters that must or must-not be escaped to be + // valid JSON. + BackslashEscape BackslashEscaper +} + +var _ reEncoderModule = (*reEncodeString)(nil) + +func (enc *reEncodeString) PopWriteBarrier() { + enc.out.PopWriteBarrier() +} + +func (enc *reEncodeString) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, stackSize int) error { + if t == jsonparse.RuneTypeStringChar { + escape = enc.BackslashEscape(c, escape) + } + return enc.out.HandleRune(c, t, escape, stackSize) +} diff --git a/reencode_test.go b/reencode_test.go index 38f3f8f..83660ef 100644 --- a/reencode_test.go +++ b/reencode_test.go @@ -66,12 +66,13 @@ func TestReEncode(t *testing.T) { "arrays2": { enc: ReEncoderConfig{ Indent: "\t", - CompactIfUnder: 10, + CompactIfUnder: 15, ForceTrailingNewlines: true, }, in: []any{ map[string]any{ "a": 1, + "b": 2, }, map[string]any{ "generation": 123456, @@ -81,7 +82,7 @@ func TestReEncode(t *testing.T) { }, }, exp: `[ - {"a":1}, + {"a":1,"b":2}, { "generation": 123456 }, @@ -120,6 +121,16 @@ func TestReEncode(t *testing.T) { ] `, }, + "indent-unicode": { + enc: ReEncoderConfig{ + Prefix: "—", + Indent: "»", + }, + in: []int{9}, + exp: `[ +—»9 +—]`, + }, } for tcName, tc := range testcases { tc := tc diff --git a/reencode_write.go b/reencode_write.go new file mode 100644 index 0000000..dffbb21 --- /dev/null +++ b/reencode_write.go @@ -0,0 +1,52 @@ +// Copyright (C) 2022-2023 Luke Shumaker +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package lowmemjson + +import ( + "git.lukeshu.com/go/lowmemjson/internal/fastio" + "git.lukeshu.com/go/lowmemjson/internal/jsonparse" + "git.lukeshu.com/go/lowmemjson/internal/jsonstring" +) + +type reEncodeWrite struct { + out fastio.AllWriter + + // state + last jsonparse.RuneType + lastNonEOF jsonparse.RuneType + wasNumber bool +} + +var _ reEncoderModule = (*reEncodeWrite)(nil) + +func (enc *reEncodeWrite) PopWriteBarrier() { + enc.last = enc.lastNonEOF +} + +func (enc *reEncodeWrite) HandleRune(c rune, t jsonparse.RuneType, escape BackslashEscapeMode, _ int) error { + // emit newlines between top-level values, if nescessary + if enc.last == jsonparse.RuneTypeEOF && enc.wasNumber && t.IsNumber() { + if _, err := enc.out.WriteRune('\n'); err != nil { + return err + } + } + if t == jsonparse.RuneTypeEOF { + enc.wasNumber = enc.last.IsNumber() + } else { + enc.lastNonEOF = t + } + enc.last = t + + // emit the rune + switch t { + case jsonparse.RuneTypeEOF: + return nil + case jsonparse.RuneTypeStringChar: + return jsonstring.WriteStringChar(enc.out, c, escape) + default: + _, err := enc.out.WriteRune(c) + return err + } +} -- cgit v1.2.3