diff options
-rw-r--r-- | ReleaseNotes.md | 11 | ||||
-rw-r--r-- | decode.go | 104 | ||||
-rw-r--r-- | decode_scan.go | 180 | ||||
-rw-r--r-- | decode_scan_test.go | 352 | ||||
-rw-r--r-- | encode.go | 326 | ||||
-rw-r--r-- | encode_string.go | 46 | ||||
-rw-r--r-- | go.mod | 5 | ||||
-rw-r--r-- | go.sum | 2 | ||||
-rw-r--r-- | internal/allwriter.go | 174 | ||||
-rw-r--r-- | internal/base64.go | 9 | ||||
-rw-r--r-- | internal/parse.go | 124 | ||||
-rw-r--r-- | ioutil.go | 31 | ||||
-rw-r--r-- | reencode.go | 286 | ||||
-rw-r--r-- | struct.go | 14 |
14 files changed, 950 insertions, 714 deletions
diff --git a/ReleaseNotes.md b/ReleaseNotes.md index f1fccfb..bb366f3 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -1,3 +1,14 @@ +# v0.3.1 (TBD) + + Theme: Performance + + This release does a bunch of performance tuning and optimizations, + with no user-visible changes other than memory consumption and CPU + time. Based on benchmarks with a real-world use-case, it is now + roughly an order of magnitude faster, with much lower memory + consumption (the big-O of memory consumption was always pretty low, + but there were some big constant factors before). + # v0.3.0 (2023-01-30) Theme: Breaking changes @@ -104,12 +104,10 @@ const maxNestingDepth = 10000 // an io.Reader. func NewDecoder(r io.RuneScanner) *Decoder { return &Decoder{ - io: &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: r, - parser: internal.Parser{ - MaxDepth: maxNestingDepth, - }, + io: runeTypeScanner{ + inner: r, + parser: internal.Parser{ + MaxDepth: maxNestingDepth, }, }, } @@ -247,6 +245,7 @@ func (dec *Decoder) Decode(ptr any) (err error) { } dec.io.Reset() + dec.io.PushReadBarrier() defer func() { if r := recover(); r != nil { if de, ok := r.(decodeError); ok { @@ -259,6 +258,7 @@ func (dec *Decoder) Decode(ptr any) (err error) { } }() dec.decode(ptrVal.Elem(), false) + dec.io.PopReadBarrier() return nil } @@ -321,12 +321,21 @@ func (dec *Decoder) expectRuneType(ec rune, et internal.RuneType, gt reflect.Typ } } -type decRuneTypeScanner struct { +type decRuneScanner struct { dec *Decoder + eof bool } -func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { +func (sc *decRuneScanner) ReadRune() (rune, int, error) { + if sc.eof { + return 0, 0, io.EOF + } c, s, t, e := sc.dec.io.ReadRuneType() + if t == internal.RuneTypeEOF { + sc.eof = true + sc.dec.io.PopReadBarrier() + return 0, 0, io.EOF + } if e != nil { panic(decodeError{ Field: sc.dec.structStackStr(), @@ -335,28 +344,17 @@ func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, erro Err: e, }) } - return c, s, t, nil + return c, s, nil } -func (sc *decRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, _ := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - default: - return r, s, nil - } +func (sc *decRuneScanner) UnreadRune() error { + return sc.dec.io.UnreadRune() } -func (sc *decRuneTypeScanner) UnreadRune() error { return sc.dec.io.UnreadRune() } -func (sc *decRuneTypeScanner) InputOffset() int64 { return sc.dec.InputOffset() } -func (sc *decRuneTypeScanner) Reset() { sc.dec.io.Reset() } - -func (dec *Decoder) limitingScanner() runeTypeScanner { - return &elemRuneTypeScanner{ - inner: &decRuneTypeScanner{ - dec: dec, - }, +func (dec *Decoder) limitingScanner() io.RuneScanner { + dec.io.PushReadBarrier() + return &decRuneScanner{ + dec: dec, } } @@ -565,7 +563,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { if dec.disallowUnknownFields { dec.panicType("", typ, fmt.Errorf("json: unknown field %q", name)) } - dec.scan(io.Discard) + dec.scan(internal.Discard) return } field := index.byPos[idx] @@ -749,7 +747,7 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { dec.decode(mValPtr.Elem(), false) val.Index(i).Set(mValPtr.Elem()) } else { - dec.scan(io.Discard) + dec.scan(internal.Discard) } i++ }) @@ -773,18 +771,18 @@ func (dec *Decoder) decode(val reflect.Value, nullOK bool) { } } -func (dec *Decoder) scan(out io.Writer) { +func (dec *Decoder) scan(out internal.RuneWriter) { limiter := dec.limitingScanner() for { c, _, err := limiter.ReadRune() if err == io.EOF { return } - _, _ = writeRune(out, c) + _, _ = out.WriteRune(c) } } -func (dec *Decoder) scanNumber(gTyp reflect.Type, out io.Writer) { +func (dec *Decoder) scanNumber(gTyp reflect.Type, out internal.RuneWriter) { if t := dec.peekRuneType(); !t.IsNumber() { dec.panicType(t.JSONType(), gTyp, nil) } @@ -869,7 +867,12 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er } } }() - dec := NewDecoder(r) + var dec *Decoder + if dr, ok := r.(*decRuneScanner); ok { + dec = dr.dec + } else { + dec = NewDecoder(r) + } dec.posStackPush() defer dec.posStackPop() dec.decodeObject(nil, @@ -949,7 +952,12 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e } } }() - dec := NewDecoder(r) + var dec *Decoder + if dr, ok := r.(*decRuneScanner); ok { + dec = dr.dec + } else { + dec = NewDecoder(r) + } dec.posStackPush() defer dec.posStackPop() dec.decodeArray(nil, func() { @@ -991,34 +999,34 @@ func (dec *Decoder) decodeArray(gTyp reflect.Type, decodeMember func()) { } } -func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { +func (dec *Decoder) decodeString(gTyp reflect.Type, out internal.RuneWriter) { dec.expectRuneType('"', internal.RuneTypeStringBeg, gTyp) var uhex [4]byte for { c, t := dec.readRune() switch t { case internal.RuneTypeStringChar: - _, _ = writeRune(out, c) + _, _ = out.WriteRune(c) case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: // do nothing case internal.RuneTypeStringEsc1: switch c { case '"': - _, _ = writeRune(out, '"') + _, _ = out.WriteRune('"') case '\\': - _, _ = writeRune(out, '\\') + _, _ = out.WriteRune('\\') case '/': - _, _ = writeRune(out, '/') + _, _ = out.WriteRune('/') case 'b': - _, _ = writeRune(out, '\b') + _, _ = out.WriteRune('\b') case 'f': - _, _ = writeRune(out, '\f') + _, _ = out.WriteRune('\f') case 'n': - _, _ = writeRune(out, '\n') + _, _ = out.WriteRune('\n') case 'r': - _, _ = writeRune(out, '\r') + _, _ = out.WriteRune('\r') case 't': - _, _ = writeRune(out, '\t') + _, _ = out.WriteRune('\t') default: panic("should not happen") } @@ -1038,12 +1046,12 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { handleUnicode: if utf16.IsSurrogate(c) { if dec.peekRuneType() != internal.RuneTypeStringEsc { - _, _ = writeRune(out, utf8.RuneError) + _, _ = out.WriteRune(utf8.RuneError) break } dec.expectRune('\\', internal.RuneTypeStringEsc) if dec.peekRuneType() != internal.RuneTypeStringEscU { - _, _ = writeRune(out, utf8.RuneError) + _, _ = out.WriteRune(utf8.RuneError) break } dec.expectRune('u', internal.RuneTypeStringEscU) @@ -1063,13 +1071,13 @@ func (dec *Decoder) decodeString(gTyp reflect.Type, out io.Writer) { rune(uhex[3])<<0 d := utf16.DecodeRune(c, c2) if d == utf8.RuneError { - _, _ = writeRune(out, utf8.RuneError) + _, _ = out.WriteRune(utf8.RuneError) c = c2 goto handleUnicode } - _, _ = writeRune(out, d) + _, _ = out.WriteRune(d) } else { - _, _ = writeRune(out, c) + _, _ = out.WriteRune(c) } case internal.RuneTypeStringEnd: return diff --git a/decode_scan.go b/decode_scan.go index 387fcea..e233caf 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -5,31 +5,12 @@ package lowmemjson import ( - "errors" "io" "git.lukeshu.com/go/lowmemjson/internal" ) -type runeTypeScanner interface { - // The returned error is a *ReadError, a *SyntaxError, or nil. - // An EOF condition is represented as one of: - // - // end of value but not file: (_, >0, RuneTypeEOF, nil) - // end of both value and file: (_, 0, RuneTypeEOF, nil) - // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) - // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) - ReadRuneType() (rune, int, internal.RuneType, error) - // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil. - ReadRune() (rune, int, error) - UnreadRune() error - Reset() - InputOffset() int64 -} - -// runeTypeScannerImpl ///////////////////////////////////////////////////////////////////////////// - -type runeTypeScannerImpl struct { +type runeTypeScanner struct { // everything that is not "initialized by constructor" starts // out as the zero value. @@ -47,9 +28,7 @@ type runeTypeScannerImpl struct { rErr error } -var _ runeTypeScanner = (*runeTypeScannerImpl)(nil) - -func (sc *runeTypeScannerImpl) Reset() { +func (sc *runeTypeScanner) Reset() { sc.parser.Reset() if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { sc.repeat = false @@ -69,7 +48,14 @@ func (sc *runeTypeScannerImpl) Reset() { } } -func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, error) { +// The returned error is a *ReadError, a *SyntaxError, or nil. +// An EOF condition is represented as one of: +// +// end of value but not file: (_, >0, RuneTypeEOF, nil) +// end of both value and file: (_, 0, RuneTypeEOF, nil) +// end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) +// end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) +func (sc *runeTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { switch { case sc.initialized && (sc.rType == internal.RuneTypeError || sc.rType == internal.RuneTypeEOF): // do nothing @@ -77,6 +63,7 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err _, _, _ = sc.inner.ReadRune() default: sc.initialized = true + again: var err error sc.rRune, sc.rSize, err = sc.inner.ReadRune() sc.offset += int64(sc.rSize) @@ -91,6 +78,9 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err } else { sc.rErr = nil } + if sc.rType == internal.RuneTypeSpace { + goto again + } case io.EOF: sc.rType, err = sc.parser.HandleEOF() if err != nil { @@ -113,24 +103,12 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err return sc.rRune, sc.rSize, sc.rType, sc.rErr } -func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } -} - // UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). // // If the last call to .ReadRune() or .ReadRuneType() has already been // unread, or if that call returned a rune with size 0, then // ErrInvalidUnreadRune is returned. Otherwise, nil is returned. -func (sc *runeTypeScannerImpl) UnreadRune() error { +func (sc *runeTypeScanner) UnreadRune() error { if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } @@ -139,7 +117,7 @@ func (sc *runeTypeScannerImpl) UnreadRune() error { return nil } -func (sc *runeTypeScannerImpl) InputOffset() int64 { +func (sc *runeTypeScanner) InputOffset() int64 { ret := sc.offset if sc.repeat { ret -= int64(sc.rSize) @@ -147,109 +125,37 @@ func (sc *runeTypeScannerImpl) InputOffset() int64 { return ret } -// noWSRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// - -type noWSRuneTypeScanner struct { - inner runeTypeScanner -} - -var _ runeTypeScanner = (*noWSRuneTypeScanner)(nil) - -func (sc *noWSRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { -again: - r, s, t, e := sc.inner.ReadRuneType() - if t == internal.RuneTypeSpace { - goto again - } - return r, s, t, e -} - -func (sc *noWSRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } +func (sc *runeTypeScanner) PushReadBarrier() { + sc.parser.PushReadBarrier() } -func (sc *noWSRuneTypeScanner) UnreadRune() error { return sc.inner.UnreadRune() } -func (sc *noWSRuneTypeScanner) Reset() { sc.inner.Reset() } -func (sc *noWSRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } - -// elemRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// - -type elemRuneTypeScanner struct { - inner runeTypeScanner - - parser internal.Parser - repeat bool - stuck bool - rType internal.RuneType - rErr error -} - -var _ runeTypeScanner = (*elemRuneTypeScanner)(nil) - -func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { - // Read it, run it through the parent's parser. - r, s, t, e := sc.inner.ReadRuneType() - - // Run it through our child parser. - if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) { - if sc.repeat || sc.stuck { - sc.repeat = false - } else { - var err error - if s > 0 { - sc.rType, err = sc.parser.HandleRune(r) - } else { - sc.rType, err = sc.parser.HandleEOF() - } - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.inner.InputOffset(), - Err: err, - } - } else { - sc.rErr = nil +func (sc *runeTypeScanner) PopReadBarrier() { + sc.parser.PopBarrier() + if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, } + } else { + sc.rErr = nil } - sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError - t, e = sc.rType, sc.rErr - } - - // Check if we need to truncate the result. - if t == internal.RuneTypeEOF { - if s > 0 { - _ = sc.inner.UnreadRune() + // tell it to use that rType and rErr + _ = sc.UnreadRune() // we set it up to always succeed + } else if sc.rType == internal.RuneTypeEOF { + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleEOF() + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset, + Err: err, + } + } else { + sc.rErr = nil } - return 0, 0, internal.RuneTypeEOF, nil } - - return r, s, t, e } - -func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } -} - -func (sc *elemRuneTypeScanner) UnreadRune() error { - ret := sc.inner.UnreadRune() - sc.repeat = true - return ret -} - -func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } -func (sc *elemRuneTypeScanner) Reset() {} diff --git a/decode_scan_test.go b/decode_scan_test.go index 6a430ab..d0725e5 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -11,7 +11,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "git.lukeshu.com/go/lowmemjson/internal" ) @@ -23,8 +22,26 @@ type ReadRuneTypeResult struct { e error } +const ( + unreadRune = -1 + pushReadBarrier = -2 + popReadBarrier = -3 + reset = -4 +) + func (r ReadRuneTypeResult) String() string { - return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e) + switch r.s { + case unreadRune: + return fmt.Sprintf("{%q, unreadRune, %#v, %v}", r.r, r.t, r.e) + case pushReadBarrier: + return fmt.Sprintf("{%q, pushReadBarrier, %#v, %v}", r.r, r.t, r.e) + case popReadBarrier: + return fmt.Sprintf("{%q, popReadBarrier, %#v, %v}", r.r, r.t, r.e) + case reset: + return fmt.Sprintf("{%q, reset, %#v, %v}", r.r, r.t, r.e) + default: + return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e) + } } type runeTypeScannerTestcase struct { @@ -33,31 +50,6 @@ type runeTypeScannerTestcase struct { Exp []ReadRuneTypeResult } -func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestcase, factory func(io.RuneScanner) runeTypeScanner) { - for tcName, tc := range testcases { - tc := tc - t.Run(tcName, func(t *testing.T) { - t.Parallel() - reader := strings.NewReader(tc.Input) - sc := factory(reader) - var exp, act []string - for _, iExp := range tc.Exp { - var iAct ReadRuneTypeResult - if iExp.s < 0 { - iAct.s = iExp.s - iAct.e = sc.UnreadRune() - } else { - iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType() - } - exp = append(exp, iExp.String()) - act = append(act, iAct.String()) - } - assert.Equal(t, exp, act) - assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():]) - }) - } -} - func TestRuneTypeScanner(t *testing.T) { t.Parallel() testcases := map[string]runeTypeScannerTestcase{ @@ -69,7 +61,6 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -86,9 +77,8 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, + {0, unreadRune, 0, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -105,10 +95,9 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, - {0, -1, 0, ErrInvalidUnreadRune}, + {0, unreadRune, 0, nil}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -125,14 +114,29 @@ func TestRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, - {' ', 1, internal.RuneTypeSpace, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, {'0', 1, internal.RuneTypeNumberFracDig, nil}, {'}', 1, internal.RuneTypeObjectEnd, nil}, {0, 0, internal.RuneTypeEOF, nil}, - {0, -1, 0, ErrInvalidUnreadRune}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, + {0, 0, internal.RuneTypeEOF, nil}, + {0, 0, internal.RuneTypeEOF, nil}, + }}, + "tail-ws": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, @@ -151,63 +155,30 @@ func TestRuneTypeScanner(t *testing.T) { {'{', 1, internal.RuneTypeEOF, nil}, {'{', 1, internal.RuneTypeEOF, nil}, }}, - "early-eof": {`{`, ``, []ReadRuneTypeResult{ + "early-eof": {` {`, ``, []ReadRuneTypeResult{ {'{', 1, internal.RuneTypeObjectBeg, nil}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, + {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, }}, "empty": {``, ``, []ReadRuneTypeResult{ {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, }}, - } - testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { - return &runeTypeScannerImpl{ - inner: reader, - } - }) -} - -func TestNoWSRuneTypeScanner(t *testing.T) { - t.Parallel() - testcases := map[string]runeTypeScannerTestcase{ - "basic": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, + "basic2": {`1`, ``, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, + {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "unread": {`{"foo": 12.0}`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, + "fragment": {`1,`, ``, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, }}, - "tail": {`{"foo": 12.0} `, ``, []ReadRuneTypeResult{ + "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ {'{', 1, internal.RuneTypeObjectBeg, nil}, {'"', 1, internal.RuneTypeStringBeg, nil}, {'f', 1, internal.RuneTypeStringChar, nil}, @@ -215,178 +186,83 @@ func TestNoWSRuneTypeScanner(t *testing.T) { {'o', 1, internal.RuneTypeStringChar, nil}, {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, + {0, pushReadBarrier, 0, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeEOF, nil}, + {'}', 1, internal.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, {'}', 1, internal.RuneTypeObjectEnd, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "multi-value": {`1{}`, `}`, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - {'{', 1, internal.RuneTypeEOF, nil}, - }}, - "early-eof": {` {`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 2, Err: io.ErrUnexpectedEOF}}, - }}, - } - testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { - return &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, - } - }) -} - -func TestElemRuneTypeScanner(t *testing.T) { - t.Parallel() - toplevelTestcases := map[string]runeTypeScannerTestcase{ - "basic": {`1`, ``, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - }}, - "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'0', 1, internal.RuneTypeNumberIntZero, nil}, - {',', 1, internal.RuneTypeArrayComma, nil}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - }}, - "multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - }}, - "fragment": {`1,`, `,`, []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - }}, - "early-eof": {`{`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - }}, } - - childTestcases := make(map[string]runeTypeScannerTestcase, len(toplevelTestcases)) - for tcName, tc := range toplevelTestcases { - tc.Input = `[` + tc.Input - tc.Exp = append([]ReadRuneTypeResult(nil), tc.Exp...) // copy - for i, res := range tc.Exp { - if se, ok := res.e.(*DecodeSyntaxError); ok { - seCopy := *se - seCopy.Offset++ - tc.Exp[i].e = &seCopy + func() { + childTestcases := make(map[string]runeTypeScannerTestcase) + for tcName, tc := range testcases { + canChild := true + for _, res := range tc.Exp { + if res.s == pushReadBarrier { + canChild = false + break + } } - } - childTestcases[tcName] = tc - } - - t.Run("top-level", func(t *testing.T) { - t.Parallel() - testRuneTypeScanner(t, toplevelTestcases, func(reader io.RuneScanner) runeTypeScanner { - return &elemRuneTypeScanner{ - inner: &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, - }, + if !canChild { + continue } - }) - }) - t.Run("child", func(t *testing.T) { - t.Parallel() - testRuneTypeScanner(t, childTestcases, func(reader io.RuneScanner) runeTypeScanner { - inner := &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, + tc.Input = `[1,` + tc.Input + tc.Exp = append([]ReadRuneTypeResult{ + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {',', 1, internal.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + }, tc.Exp...) + for i := 2; i < len(tc.Exp); i++ { + if se, ok := tc.Exp[i].e.(*DecodeSyntaxError); ok { + seCopy := *se + seCopy.Offset += 3 + tc.Exp[i].e = &seCopy + } } - var res ReadRuneTypeResult - res.r, res.s, res.t, res.e = inner.ReadRuneType() - require.Equal(t, - ReadRuneTypeResult{'[', 1, internal.RuneTypeArrayBeg, nil}.String(), - res.String()) - - return &elemRuneTypeScanner{ - inner: inner, + childTestcases["child-"+tcName] = tc + } + for tcName, tc := range childTestcases { + testcases[tcName] = tc + } + }() + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + t.Logf("input=%q", tc.Input) + reader := strings.NewReader(tc.Input) + sc := &runeTypeScanner{inner: reader} + var exp, act []string + for _, iExp := range tc.Exp { + var iAct ReadRuneTypeResult + switch iExp.s { + case unreadRune: + iAct.s = iExp.s + iAct.e = sc.UnreadRune() + case pushReadBarrier: + sc.PushReadBarrier() + iAct.s = iExp.s + case popReadBarrier: + sc.PopReadBarrier() + iAct.s = iExp.s + case reset: + sc.Reset() + iAct.s = iExp.s + default: + iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType() + } + exp = append(exp, iExp.String()) + act = append(act, iAct.String()) } + assert.Equal(t, exp, act) + assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():]) }) - }) -} - -func TestElemRuneTypeScanner2(t *testing.T) { - t.Parallel() - parent := &noWSRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: strings.NewReader(` { "foo" : 12.0 } `), - }, - } - exp := []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - } - expStr := make([]string, 0, len(exp)) - actStr := make([]string, 0, len(exp)) - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) - } - - child := &elemRuneTypeScanner{ - inner: parent, - } - exp = []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - } - expStr, actStr = nil, nil - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = child.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) - } - - exp = []ReadRuneTypeResult{ - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - } - expStr, actStr = nil, nil - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) } } @@ -9,10 +9,8 @@ import ( "encoding" "encoding/base64" "encoding/json" - "errors" "fmt" "io" - iofs "io/fs" "reflect" "sort" "strconv" @@ -30,22 +28,6 @@ type Encodable interface { EncodeJSON(w io.Writer) error } -type encodeError struct { - Err error -} - -func encodeWriteByte(w io.Writer, b byte) { - if err := writeByte(w, b); err != nil { - panic(encodeError{err}) - } -} - -func encodeWriteString(w io.Writer, str string) { - if _, err := io.WriteString(w, str); err != nil { - panic(encodeError{err}) - } -} - // An Encoder encodes and writes values to a stream of JSON elements. // // Encoder is analogous to, and has a similar API to the standar @@ -91,22 +73,19 @@ func NewEncoder(w io.Writer) *Encoder { // // [documentation for encoding/json.Marshal]: https://pkg.go.dev/encoding/json@go1.18#Marshal func (enc *Encoder) Encode(obj any) (err error) { - defer func() { - if r := recover(); r != nil { - if e, ok := r.(encodeError); ok { - err = e.Err - } else { - panic(r) - } - } - }() - encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}) + if err := encode(enc.w, reflect.ValueOf(obj), enc.w.BackslashEscape, false, 0, map[any]struct{}{}); err != nil { + return err + } if enc.closeAfterEncode { return enc.w.Close() } return nil } +func discardInt(_ int, err error) error { + return err +} + var ( encodableType = reflect.TypeOf((*Encodable)(nil)).Elem() jsonMarshalerType = reflect.TypeOf((*json.Marshaler)(nil)).Elem() @@ -115,10 +94,9 @@ var ( const startDetectingCyclesAfter = 1000 -func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) { +func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error { if !val.IsValid() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } switch { @@ -127,129 +105,150 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool fallthrough case val.Type().Implements(encodableType): if val.Kind() == reflect.Pointer && val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } obj, ok := val.Interface().(Encodable) if !ok { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } - // Use a sub-ReEncoder to check that it's a full element. - validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) - if err := obj.EncodeJSON(validator); err != nil { - panic(encodeError{&EncodeMethodError{ + w.pushWriteBarrier() + if err := obj.EncodeJSON(w); err != nil { + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, - }}) + } } - if err := validator.Close(); err != nil && !errors.Is(err, iofs.ErrClosed) { - panic(encodeError{&EncodeMethodError{ + if err := w.Close(); err != nil { + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, - }}) + } } + w.popWriteBarrier() case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(jsonMarshalerType): val = val.Addr() fallthrough case val.Type().Implements(jsonMarshalerType): if val.Kind() == reflect.Pointer && val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } obj, ok := val.Interface().(json.Marshaler) if !ok { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } dat, err := obj.MarshalJSON() if err != nil { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, - }}) + } } - // Use a sub-ReEncoder to check that it's a full element. - validator := NewReEncoder(w, ReEncoderConfig{BackslashEscape: escaper}) - if _, err := validator.Write(dat); err != nil { - panic(encodeError{&EncodeMethodError{ + w.pushWriteBarrier() + if _, err := w.Write(dat); err != nil { + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, - }}) + } } - if err := validator.Close(); err != nil { - panic(encodeError{&EncodeMethodError{ + if err := w.Close(); err != nil { + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, - }}) + } } + w.popWriteBarrier() case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(textMarshalerType): val = val.Addr() fallthrough case val.Type().Implements(textMarshalerType): if val.Kind() == reflect.Pointer && val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } obj, ok := val.Interface().(encoding.TextMarshaler) if !ok { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } text, err := obj.MarshalText() if err != nil { - panic(encodeError{&EncodeMethodError{ + return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalText", Err: err, - }}) + } + } + if err := encodeStringFromBytes(w, escaper, text); err != nil { + return err } - encodeStringFromBytes(w, escaper, text) - default: switch val.Kind() { case reflect.Bool: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } if val.Bool() { - encodeWriteString(w, "true") + if _, err := w.WriteString("true"); err != nil { + return err + } } else { - encodeWriteString(w, "false") + if _, err := w.WriteString("false"); err != nil { + return err + } } if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if _, err := w.WriteString(strconv.FormatInt(val.Int(), 10)); err != nil { + return err } - encodeWriteString(w, strconv.FormatInt(val.Int(), 10)) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if _, err := w.WriteString(strconv.FormatUint(val.Uint(), 10)); err != nil { + return err } - encodeWriteString(w, strconv.FormatUint(val.Uint(), 10)) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.Float32, reflect.Float64: if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if err := encodeTODO(w, val); err != nil { + return err } - encodeTODO(w, val) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } case reflect.String: if val.Type() == numberType { @@ -258,29 +257,47 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool numStr = "0" } if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + } + if _, err := w.WriteString(numStr); err != nil { + return err } - encodeWriteString(w, numStr) if quote { - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } } } else { if quote { var buf bytes.Buffer - encodeStringFromString(&buf, escaper, val.String()) - encodeStringFromBytes(w, escaper, buf.Bytes()) + if err := encodeStringFromString(&buf, escaper, val.String()); err != nil { + return err + } + if err := encodeStringFromBytes(w, escaper, buf.Bytes()); err != nil { + return err + } } else { - encodeStringFromString(w, escaper, val.String()) + if err := encodeStringFromString(w, escaper, val.String()); err != nil { + return err + } } } case reflect.Interface: if val.IsNil() { - encodeWriteString(w, "null") + if _, err := w.WriteString("null"); err != nil { + return err + } } else { - encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen) + if err := encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen); err != nil { + return err + } } case reflect.Struct: - encodeWriteByte(w, '{') + if err := w.WriteByte('{'); err != nil { + return err + } empty := true for _, field := range indexStruct(val.Type()).byPos { fVal, err := val.FieldByIndexErr(field.Path) @@ -291,35 +308,45 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool continue } if !empty { - encodeWriteByte(w, ',') + if err := w.WriteByte(','); err != nil { + return err + } } empty = false - encodeStringFromString(w, escaper, field.Name) - encodeWriteByte(w, ':') - encode(w, fVal, escaper, field.Quote, cycleDepth, cycleSeen) + if err := encodeStringFromString(w, escaper, field.Name); err != nil { + return err + } + if err := w.WriteByte(':'); err != nil { + return err + } + if err := encode(w, fVal, escaper, field.Quote, cycleDepth, cycleSeen); err != nil { + return err + } + } + if err := w.WriteByte('}'); err != nil { + return err } - encodeWriteByte(w, '}') case reflect.Map: if val.IsNil() { - encodeWriteString(w, "null") - return + return discardInt(w.WriteString("null")) } if val.Len() == 0 { - encodeWriteString(w, "{}") - return + return discardInt(w.WriteString("{}")) } if cycleDepth++; cycleDepth > startDetectingCyclesAfter { ptr := val.UnsafePointer() if _, seen := cycleSeen[ptr]; seen { - panic(encodeError{&EncodeValueError{ + return &EncodeValueError{ Value: val, Str: fmt.Sprintf("encountered a cycle via %s", val.Type()), - }}) + } } cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encodeWriteByte(w, '{') + if err := w.WriteByte('{'); err != nil { + return err + } type kv struct { K string @@ -330,14 +357,18 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool for i := 0; iter.Next(); i++ { // TODO: Avoid buffering the map key var k strings.Builder - encode(&k, iter.Key(), escaper, false, cycleDepth, cycleSeen) + if err := encode(NewReEncoder(&k, ReEncoderConfig{BackslashEscape: escaper}), iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil { + return err + } kStr := k.String() if kStr == "null" { kStr = `""` } if !strings.HasPrefix(kStr, `"`) { k.Reset() - encodeStringFromString(&k, escaper, kStr) + if err := encodeStringFromString(&k, escaper, kStr); err != nil { + return err + } kStr = k.String() } kvs[i].K = kStr @@ -349,17 +380,29 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool for i, kv := range kvs { if i > 0 { - encodeWriteByte(w, ',') + if err := w.WriteByte(','); err != nil { + return err + } + } + if _, err := w.WriteString(kv.K); err != nil { + return err + } + if err := w.WriteByte(':'); err != nil { + return err + } + if err := encode(w, kv.V, escaper, false, cycleDepth, cycleSeen); err != nil { + return err } - encodeWriteString(w, kv.K) - encodeWriteByte(w, ':') - encode(w, kv.V, escaper, false, cycleDepth, cycleSeen) } - encodeWriteByte(w, '}') + if err := w.WriteByte('}'); err != nil { + return err + } case reflect.Slice: switch { case val.IsNil(): - encodeWriteString(w, "null") + if _, err := w.WriteString("null"); err != nil { + return err + } case val.Type().Elem().Kind() == reflect.Uint8 && !(false || val.Type().Elem().Implements(encodableType) || reflect.PointerTo(val.Type().Elem()).Implements(encodableType) || @@ -367,11 +410,13 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool reflect.PointerTo(val.Type().Elem()).Implements(jsonMarshalerType) || val.Type().Elem().Implements(textMarshalerType) || reflect.PointerTo(val.Type().Elem()).Implements(textMarshalerType)): - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } enc := base64.NewEncoder(base64.StdEncoding, w) if val.CanConvert(byteSliceType) { if _, err := enc.Write(val.Convert(byteSliceType).Interface().([]byte)); err != nil { - panic(encodeError{err}) + return err } } else { // TODO: Surely there's a better way. @@ -379,14 +424,16 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool var buf [1]byte buf[0] = val.Index(i).Convert(byteType).Interface().(byte) if _, err := enc.Write(buf[:]); err != nil { - panic(encodeError{err}) + return err } } } if err := enc.Close(); err != nil { - panic(encodeError{err}) + return err + } + if err := w.WriteByte('"'); err != nil { + return err } - encodeWriteByte(w, '"') default: if cycleDepth++; cycleDepth > startDetectingCyclesAfter { // For slices, val.UnsafePointer() doesn't return a pointer to the slice header @@ -399,61 +446,80 @@ func encode(w io.Writer, val reflect.Value, escaper BackslashEscaper, quote bool len int }{val.UnsafePointer(), val.Len()} if _, seen := cycleSeen[ptr]; seen { - panic(encodeError{&EncodeValueError{ + return &EncodeValueError{ Value: val, Str: fmt.Sprintf("encountered a cycle via %s", val.Type()), - }}) + } } cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encodeArray(w, val, escaper, cycleDepth, cycleSeen) + if err := encodeArray(w, val, escaper, cycleDepth, cycleSeen); err != nil { + return err + } } case reflect.Array: - encodeArray(w, val, escaper, cycleDepth, cycleSeen) + if err := encodeArray(w, val, escaper, cycleDepth, cycleSeen); err != nil { + return err + } case reflect.Pointer: if val.IsNil() { - encodeWriteString(w, "null") + if _, err := w.WriteString("null"); err != nil { + return err + } } else { if cycleDepth++; cycleDepth > startDetectingCyclesAfter { ptr := val.UnsafePointer() if _, seen := cycleSeen[ptr]; seen { - panic(encodeError{&EncodeValueError{ + return &EncodeValueError{ Value: val, Str: fmt.Sprintf("encountered a cycle via %s", val.Type()), - }}) + } } cycleSeen[ptr] = struct{}{} defer delete(cycleSeen, ptr) } - encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen) + if err := encode(w, val.Elem(), escaper, quote, cycleDepth, cycleSeen); err != nil { + return err + } } default: - panic(encodeError{&EncodeTypeError{ + return &EncodeTypeError{ Type: val.Type(), - }}) + } } } + return nil } -func encodeArray(w io.Writer, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) { - encodeWriteByte(w, '[') +func encodeArray(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error { + if err := w.WriteByte('['); err != nil { + return err + } n := val.Len() for i := 0; i < n; i++ { if i > 0 { - encodeWriteByte(w, ',') + if err := w.WriteByte(','); err != nil { + return err + } } - encode(w, val.Index(i), escaper, false, cycleDepth, cycleSeen) + if err := encode(w, val.Index(i), escaper, false, cycleDepth, cycleSeen); err != nil { + return err + } + } + if err := w.WriteByte(']'); err != nil { + return err } - encodeWriteByte(w, ']') + return nil } -func encodeTODO(w io.Writer, val reflect.Value) { +func encodeTODO(w io.Writer, val reflect.Value) error { bs, err := json.Marshal(val.Interface()) if err != nil { - panic(encodeError{err}) + return err } if _, err := w.Write(bs); err != nil { - panic(encodeError{err}) + return err } + return nil } diff --git a/encode_string.go b/encode_string.go index c5cb442..12f934e 100644 --- a/encode_string.go +++ b/encode_string.go @@ -45,7 +45,7 @@ func writeStringShortEscape(w io.Writer, c rune) (int, error) { return w.Write(buf[:]) } -func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { +func writeStringChar(w internal.AllWriter, c rune, wasEscaped BackslashEscapeMode, escaper BackslashEscaper) (int, error) { if escaper == nil { escaper = EscapeDefault } @@ -62,19 +62,19 @@ func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escape case c == '"' || c == '\\': // override, gotta escape these return writeStringShortEscape(w, c) default: // obey - return writeRune(w, c) + return w.WriteRune(c) } case BackslashEscapeShort: switch c { case '"', '\\', '/', '\b', '\f', '\n', '\r', '\t': // obey return writeStringShortEscape(w, c) default: // override, can't short-escape these - return writeRune(w, c) + return w.WriteRune(c) } case BackslashEscapeUnicode: switch { case c > 0xFFFF: // override, can't escape these (TODO: unless we use UTF-16 surrogates?) - return writeRune(w, c) + return w.WriteRune(c) default: // obey return writeStringUnicodeEscape(w, c) } @@ -83,29 +83,47 @@ func writeStringChar(w io.Writer, c rune, wasEscaped BackslashEscapeMode, escape } } -func encodeStringFromString(w io.Writer, escaper BackslashEscaper, str string) { - encodeWriteByte(w, '"') +func encodeStringFromString(w internal.AllWriter, escaper BackslashEscaper, str string) error { + if err := w.WriteByte('"'); err != nil { + return err + } for _, c := range str { if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) + return err } } - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + return nil } -func encodeStringFromBytes(w io.Writer, escaper BackslashEscaper, str []byte) { - encodeWriteByte(w, '"') +func encodeStringFromBytes(w internal.AllWriter, escaper BackslashEscaper, str []byte) error { + if err := w.WriteByte('"'); err != nil { + return err + } for i := 0; i < len(str); { c, size := utf8.DecodeRune(str[i:]) if _, err := writeStringChar(w, c, BackslashEscapeNone, escaper); err != nil { - panic(encodeError{err}) + return err } i += size } - encodeWriteByte(w, '"') + if err := w.WriteByte('"'); err != nil { + return err + } + return nil } func init() { - internal.EncodeStringFromString = func(w io.Writer, s string) { encodeStringFromString(w, nil, s) } - internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { encodeStringFromBytes(w, nil, s) } + internal.EncodeStringFromString = func(w io.Writer, s string) { + if err := encodeStringFromString(internal.NewAllWriter(w), nil, s); err != nil { + panic(err) + } + } + internal.EncodeStringFromBytes = func(w io.Writer, s []byte) { + if err := encodeStringFromBytes(internal.NewAllWriter(w), nil, s); err != nil { + panic(err) + } + } } @@ -2,7 +2,10 @@ module git.lukeshu.com/go/lowmemjson go 1.18 -require github.com/stretchr/testify v1.8.0 +require ( + git.lukeshu.com/go/typedsync v0.0.0-20230126205501-1e8afc0ceb1e + github.com/stretchr/testify v1.8.0 +) require ( github.com/davecgh/go-spew v1.1.1 // indirect @@ -1,3 +1,5 @@ +git.lukeshu.com/go/typedsync v0.0.0-20230126205501-1e8afc0ceb1e h1:ZAzzElMx7aMgJXC9QXOxIPyoZrWxX00eP2sR4UHYP+4= +git.lukeshu.com/go/typedsync v0.0.0-20230126205501-1e8afc0ceb1e/go.mod h1:EAn7NcfoGeGMv3DWxKQnifcT/rYPAIEqp9Rsz//oYqY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/internal/allwriter.go b/internal/allwriter.go new file mode 100644 index 0000000..187aa8e --- /dev/null +++ b/internal/allwriter.go @@ -0,0 +1,174 @@ +// Copyright (C) 2023 Luke Shumaker <lukeshu@lukeshu.com> +// +// SPDX-License-Identifier: GPL-2.0-or-later + +package internal + +import ( + "io" + "unicode/utf8" +) + +// interfaces ///////////////////////////////////////////////////////////////// + +type RuneWriter interface { + WriteRune(rune) (int, error) +} + +// An AllWriter is the union of several common writer interfaces. +type AllWriter interface { + io.Writer + io.ByteWriter + RuneWriter + io.StringWriter +} + +// implementations //////////////////////////////////////////////////////////// + +func WriteByte(w io.Writer, b byte) error { + var buf [1]byte + buf[0] = b + _, err := w.Write(buf[:]) + return err +} + +func WriteRune(w io.Writer, r rune) (int, error) { + var buf [utf8.UTFMax]byte + n := utf8.EncodeRune(buf[:], r) + return w.Write(buf[:n]) +} + +func WriteString(w io.Writer, s string) (int, error) { + return w.Write([]byte(s)) +} + +// wrappers /////////////////////////////////////////////////////////////////// + +// NNN + +type ( + writerNNN interface{ io.Writer } + writerNNNWrapper struct{ writerNNN } +) + +func (w writerNNNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } +func (w writerNNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NNY + +type ( + writerNNY interface { + io.Writer + io.StringWriter + } + writerNNYWrapper struct{ writerNNY } +) + +func (w writerNNYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } + +// NYN + +type ( + writerNYN interface { + io.Writer + RuneWriter + } + writerNYNWrapper struct{ writerNYN } +) + +func (w writerNYNWrapper) WriteByte(b byte) error { return WriteByte(w, b) } +func (w writerNYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NYY + +type ( + writerNYY interface { + io.Writer + RuneWriter + io.StringWriter + } + writerNYYWrapper struct{ writerNYY } +) + +func (w writerNYYWrapper) WriteByte(b byte) error { return WriteByte(w, b) } + +// YNN + +type ( + writerYNN interface { + io.Writer + io.ByteWriter + } + writerYNNWrapper struct{ writerYNN } +) + +func (w writerYNNWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } +func (w writerYNNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// YNY + +type ( + writerYNY interface { + io.Writer + io.ByteWriter + io.StringWriter + } + writerYNYWrapper struct{ writerYNY } +) + +func (w writerYNYWrapper) WriteRune(r rune) (int, error) { return WriteRune(w, r) } + +// YYN + +type ( + writerYYN interface { + io.Writer + io.ByteWriter + RuneWriter + } + writerYYNWrapper struct{ writerYYN } +) + +func (w writerYYNWrapper) WriteString(s string) (int, error) { return WriteString(w, s) } + +// NewAllWriter wraps an io.Writer turning it in to an AllWriter. If +// the io.Writer already has any of the other write methods, then its +// native version of those methods are used. +func NewAllWriter(inner io.Writer) AllWriter { + switch inner := inner.(type) { + // 3 Y bits + case AllWriter: // YYY: + return inner + // 2 Y bits + case writerNYY: + return writerNYYWrapper{writerNYY: inner} + case writerYNY: + return writerYNYWrapper{writerYNY: inner} + case writerYYN: + return writerYYNWrapper{writerYYN: inner} + // 1 Y bit + case writerNNY: + return writerNNYWrapper{writerNNY: inner} + case writerNYN: + return writerNYNWrapper{writerNYN: inner} + case writerYNN: + return writerYNNWrapper{writerYNN: inner} + // 0 Y bits + default: // NNN: + return writerNNNWrapper{writerNNN: inner} + } +} + +// discard ///////////////////////////////////////////////////////////////////// + +// Discard is like io.Discard, but implements AllWriter. +var Discard = discard{} + +type discard struct{} + +func (discard) Write(p []byte) (int, error) { return len(p), nil } +func (discard) WriteByte(b byte) error { return nil } +func (discard) WriteRune(r rune) (int, error) { return 0, nil } +func (discard) WriteString(s string) (int, error) { return len(s), nil } diff --git a/internal/base64.go b/internal/base64.go index 15adbf4..291a229 100644 --- a/internal/base64.go +++ b/internal/base64.go @@ -19,7 +19,10 @@ type base64Decoder struct { bufLen int } -func NewBase64Decoder(w io.Writer) io.WriteCloser { +func NewBase64Decoder(w io.Writer) interface { + io.WriteCloser + RuneWriter +} { return &base64Decoder{ dst: w, } @@ -112,6 +115,10 @@ func (dec *base64Decoder) Write(dat []byte) (int, error) { return len(dat), nil } +func (dec *base64Decoder) WriteRune(r rune) (int, error) { + return WriteRune(dec, r) +} + func (dec *base64Decoder) Close() error { if dec.bufLen == 0 { return nil diff --git a/internal/parse.go b/internal/parse.go index b11aae6..36db4a9 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -313,6 +313,13 @@ type Parser struct { // a ["x","y" // ["x","y"] stack []RuneType + + barriers []barrier +} + +type barrier struct { + closed bool + stack []RuneType } func (par *Parser) init() { @@ -345,8 +352,26 @@ func (par *Parser) stackString() string { return buf.String() } +func (par *Parser) depth() int { + n := len(par.stack) + for _, barrier := range par.barriers { + n += len(barrier.stack) + } + return n +} + func (par *Parser) StackIsEmpty() bool { - return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == runeTypeAny) + if len(par.barriers) > 0 { + return false + } + if len(par.stack) == 0 { + return true + } + return len(par.stack) == 1 && par.stack[0] == runeTypeAny +} + +func (par *Parser) StackSize() int { + return len(par.stack) } // Reset all Parser state. @@ -356,6 +381,99 @@ func (par *Parser) Reset() { } } +// PushReadBarrier causes the parser to expect EOF once the end of the +// element that is started by the current top-of-stack is reached, +// until this is un-done with PopBarrier. It essentially turns the +// parser in to a sub-parser. +// +// PushReadBarrier may only be called at the beginning of an element, +// whether that be +// +// - runeTypeAny +// - RuneTypeObjectBeg +// - RuneTypeArrayBeg +// - RuneTypeStringBeg +// - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig +// - RuneTypeTrueT +// - RuneTypeFalseF +// - RuneTypeNullN +func (par *Parser) PushReadBarrier() { + // Sanity checking. + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushReadBarrier call: empty stack")) + } + curState := par.stack[len(par.stack)-1] + switch curState { + case runeTypeAny, + RuneTypeObjectBeg, + RuneTypeArrayBeg, + RuneTypeStringBeg, + RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, + RuneTypeTrueT, + RuneTypeFalseF, + RuneTypeNullN: + // OK + default: + panic(fmt.Errorf("illegal PushReadBarrier call: %q", curState)) + } + // Actually push. + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack[:len(par.stack)-1], + }) + par.stack = []RuneType{curState} +} + +// PushWriteBarrier causes the parser to expect EOF once the end of +// the about-to-start element is reached, until this is un-done with +// PopBarrier. It essentially turns the parser in to a sub-parser. +// +// PushWriteBarrier may only be called at the places where an element +// of any type may start: +// +// - runeTypeAny for top-level and object-value elements +// - RuneTypeArrayBeg for array-item elements +// +// PushWriteBarrier signals intent to write an element; if it is +// called in a place where an element is optional (at the beginning of +// an array), it becomes a syntax error to not write the element. +func (par *Parser) PushWriteBarrier() { + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushWriteBarrier call: empty stack")) + } + switch par.stack[len(par.stack)-1] { + case runeTypeAny: + par.popState() + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + case RuneTypeArrayBeg: + par.replaceState(RuneTypeArrayComma) + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + default: + panic(fmt.Errorf("illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) + } +} + +// PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. +func (par *Parser) PopBarrier() { + if len(par.barriers) == 0 { + panic(errors.New("illegal PopBarrier call: empty barrier stack")) + } + barrier := par.barriers[len(par.barriers)-1] + par.barriers = par.barriers[:len(par.barriers)-1] + par.closed = barrier.closed + par.stack = append(barrier.stack, par.stack...) +} + // HandleEOF feeds EOF to the Parser. The returned RuneType is either // RuneTypeEOF or RuneTypeError. // @@ -435,12 +553,12 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case '{': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeObjectBeg), nil case '[': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeArrayBeg), nil diff --git a/ioutil.go b/ioutil.go deleted file mode 100644 index a53eac3..0000000 --- a/ioutil.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright (C) 2022-2023 Luke Shumaker <lukeshu@lukeshu.com> -// -// SPDX-License-Identifier: GPL-2.0-or-later - -package lowmemjson - -import ( - "io" - "unicode/utf8" -) - -func writeByte(w io.Writer, c byte) error { - if br, ok := w.(interface{ WriteByte(byte) error }); ok { - return br.WriteByte(c) - } - var buf [1]byte - buf[0] = c - if _, err := w.Write(buf[:]); err != nil { - return err - } - return nil -} - -func writeRune(w io.Writer, c rune) (int, error) { - if rw, ok := w.(interface{ WriteRune(rune) (int, error) }); ok { - return rw.WriteRune(c) - } - var buf [utf8.UTFMax]byte - n := utf8.EncodeRune(buf[:], c) - return w.Write(buf[:n]) -} diff --git a/reencode.go b/reencode.go index 876af62..eae80db 100644 --- a/reencode.go +++ b/reencode.go @@ -35,8 +35,8 @@ type ReEncoderConfig struct { // // Has no affect if Compact is true or Indent is empty. // - // This has O((CompactIfUnder+1)^2) memory overhead, so set - // with caution. + // his has O(2^min(CompactIfUnder, depth)) time overhead, so + // set with caution. CompactIfUnder int // String to use to indent; ignored if Compact is true. @@ -71,7 +71,8 @@ type ReEncoderConfig struct { func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { return &ReEncoder{ ReEncoderConfig: cfg, - out: out, + out: internal.NewAllWriter(out), + specu: new(speculation), } } @@ -82,12 +83,12 @@ func NewReEncoder(out io.Writer, cfg ReEncoderConfig) *ReEncoder { // This is useful for prettifying, minifying, sanitizing, and/or // validating JSON. // -// The memory use of a ReEncoder is O( (CompactIfUnder+1)^2 + depth). +// The memory use of a ReEncoder is O(CompactIfUnder+depth). type ReEncoder struct { ReEncoderConfig - out io.Writer + out internal.AllWriter - // state: .Write's utf8-decoding buffer + // state: .Write's and .WriteString's utf8-decoding buffer buf [utf8.UTFMax]byte bufLen int @@ -98,27 +99,47 @@ type ReEncoder struct { inputPos int64 // state: .handleRune - handleRuneState struct { - lastNonSpace internal.RuneType - wasNumber bool - curIndent int - uhex [4]byte // "\uABCD"-encoded characters in strings - fracZeros int64 - expZero bool - - specu *speculation - } + lastNonSpace internal.RuneType + wasNumber bool + curIndent int + uhex [4]byte // "\uABCD"-encoded characters in strings + fracZeros int64 + expZero bool + specu *speculation + + // state: .pushBarrier and .popBarrier + stackInputPos []int64 } type speculation struct { - compactFmt ReEncoder - compactBuf bytes.Buffer - indentFmt ReEncoder - indentBuf bytes.Buffer + speculating bool + endWhenStackSize int + fmt ReEncoder + compact bytes.Buffer + buf []inputTuple +} + +func (specu *speculation) Reset() { + specu.speculating = false + specu.endWhenStackSize = 0 + specu.fmt = ReEncoder{} + specu.compact.Reset() + specu.buf = specu.buf[:0] +} + +type inputTuple struct { + c rune + t internal.RuneType + stackSize int } // public API ////////////////////////////////////////////////////////////////// +var ( + _ internal.AllWriter = (*ReEncoder)(nil) + _ io.Closer = (*ReEncoder)(nil) +) + // Write implements io.Writer; it does what you'd expect. // // It is worth noting that Write returns the number of bytes consumed @@ -152,6 +173,38 @@ func (enc *ReEncoder) Write(p []byte) (int, error) { return len(p), nil } +// WriteString implements io.StringWriter; it does what you'd expect, +// but see the notes on the Write method. +func (enc *ReEncoder) WriteString(p string) (int, error) { + if len(p) == 0 { + return 0, nil + } + var n int + if enc.bufLen > 0 { + copy(enc.buf[enc.bufLen:], p) + c, size := utf8.DecodeRune(enc.buf[:]) + n += size - enc.bufLen + enc.bufLen = 0 + if _, err := enc.WriteRune(c); err != nil { + return 0, err + } + } + for utf8.FullRuneInString(p[n:]) { + c, size := utf8.DecodeRuneInString(p[n:]) + if _, err := enc.WriteRune(c); err != nil { + return n, err + } + n += size + } + enc.bufLen = copy(enc.buf[:], p[n:]) + return len(p), nil +} + +// WriteByte implements io.ByteWriter; it does what you'd expect. +func (enc *ReEncoder) WriteByte(b byte) error { + return internal.WriteByte(enc, b) +} + // Close implements io.Closer; it does what you'd expect, mostly. // // The *ReEncoder may continue to be written to with new JSON values @@ -170,14 +223,14 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if err := enc.handleRune(0, internal.RuneTypeError); err != nil { + if err := enc.handleRune(0, internal.RuneTypeError, enc.par.StackSize()); err != nil { enc.err = &ReEncodeSyntaxError{ Err: err, Offset: enc.inputPos, } return enc.err } - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() } return nil @@ -212,9 +265,9 @@ rehandle: } return enc.written, enc.err } - enc.err = enc.handleRune(c, t) + enc.err = enc.handleRune(c, t, enc.par.StackSize()) if enc.err == nil && t == internal.RuneTypeEOF { - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() goto rehandle } else { @@ -230,32 +283,47 @@ rehandle: return enc.written, enc.err } +// semi-public API ///////////////////////////////////////////////////////////// + +func (enc *ReEncoder) pushWriteBarrier() { + enc.par.PushWriteBarrier() + enc.stackInputPos = append(enc.stackInputPos, enc.inputPos) + enc.inputPos = 0 +} + +func (enc *ReEncoder) popWriteBarrier() { + enc.par.PopBarrier() + enc.inputPos += enc.stackInputPos[len(enc.stackInputPos)-1] + enc.stackInputPos = enc.stackInputPos[:len(enc.stackInputPos)-1] +} + // internal //////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { +func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error { if enc.CompactIfUnder == 0 || enc.Compact || enc.Indent == "" { return enc.handleRuneNoSpeculation(c, t) } // main - if enc.handleRuneState.specu == nil { // not speculating + if !enc.specu.speculating { // not speculating switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // start speculating if err, _ := enc.handleRunePre(c, t); err != nil { return err } - specu := &speculation{ - compactFmt: *enc, - indentFmt: *enc, - } - specu.compactFmt.Compact = true - specu.compactFmt.out = &specu.compactBuf - specu.indentFmt.out = &specu.indentBuf - enc.handleRuneState.specu = specu - if err := specu.compactFmt.handleRuneMain(c, t); err != nil { - return err + enc.specu.speculating = true + enc.specu.endWhenStackSize = stackSize - 1 + enc.specu.fmt = ReEncoder{ + ReEncoderConfig: enc.ReEncoderConfig, + out: &enc.specu.compact, } - if err := specu.indentFmt.handleRuneMain(c, t); err != nil { + enc.specu.fmt.Compact = true + enc.specu.buf = append(enc.specu.buf, inputTuple{ + c: c, + t: t, + stackSize: stackSize, + }) + if err := enc.specu.fmt.handleRuneMain(c, t); err != nil { return err } default: @@ -264,31 +332,33 @@ func (enc *ReEncoder) handleRune(c rune, t internal.RuneType) error { } } } else { // speculating - - // canCompress is whether we're 1-up from the leaf; - // set this *before* the calls to .handleRune. - canCompress := enc.handleRuneState.specu.indentFmt.handleRuneState.specu == nil - - if err := enc.handleRuneState.specu.compactFmt.handleRune(c, t); err != nil { + enc.specu.buf = append(enc.specu.buf, inputTuple{ + c: c, + t: t, + stackSize: stackSize, + }) + if err := enc.specu.fmt.handleRune(c, t, stackSize); err != nil { return err } - if err := enc.handleRuneState.specu.indentFmt.handleRune(c, t); err != nil { - return err - } - switch { - case enc.handleRuneState.specu.compactBuf.Len() >= enc.CompactIfUnder: // stop speculating; use indent - if _, err := enc.handleRuneState.specu.indentBuf.WriteTo(enc.out); err != nil { + case enc.specu.compact.Len() >= enc.CompactIfUnder: // stop speculating; use indent + buf := append([]inputTuple(nil), enc.specu.buf...) + enc.specu.Reset() + if err := enc.handleRuneMain(buf[0].c, buf[0].t); err != nil { return err } - enc.handleRuneState = enc.handleRuneState.specu.indentFmt.handleRuneState - case canCompress && (t == internal.RuneTypeObjectEnd || t == internal.RuneTypeArrayEnd): // stop speculating; use compact - if _, err := enc.handleRuneState.specu.compactBuf.WriteTo(enc.out); err != nil { + for _, tuple := range buf[1:] { + if err := enc.handleRune(tuple.c, tuple.t, tuple.stackSize); err != nil { + return err + } + } + case stackSize == enc.specu.endWhenStackSize: // stop speculating; use compact + if _, err := enc.specu.compact.WriteTo(enc.out); err != nil { return err } - enc.handleRuneState.lastNonSpace = t - enc.handleRuneState.curIndent-- - enc.handleRuneState.specu = nil + enc.specu.Reset() + enc.lastNonSpace = t + enc.curIndent-- } } @@ -310,9 +380,9 @@ func (enc *ReEncoder) handleRuneNoSpeculation(c rune, t internal.RuneType) error // the new rune itself is handled. func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // emit newlines between top-level values - if enc.handleRuneState.lastNonSpace == internal.RuneTypeEOF { + if enc.lastNonSpace == internal.RuneTypeEOF { switch { - case enc.handleRuneState.wasNumber && t.IsNumber(): + case enc.wasNumber && t.IsNumber(): if err := enc.emitByte('\n'); err != nil { return err, false } @@ -326,35 +396,35 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // shorten numbers switch t { // trim trailing '0's from the fraction-part, but don't remove all digits case internal.RuneTypeNumberFracDot: - enc.handleRuneState.fracZeros = 0 + enc.fracZeros = 0 case internal.RuneTypeNumberFracDig: - if c == '0' && enc.handleRuneState.lastNonSpace == internal.RuneTypeNumberFracDig { - enc.handleRuneState.fracZeros++ + if c == '0' && enc.lastNonSpace == internal.RuneTypeNumberFracDig { + enc.fracZeros++ return nil, false } fallthrough default: - for enc.handleRuneState.fracZeros > 0 { + for enc.fracZeros > 0 { if err := enc.emitByte('0'); err != nil { return err, false } - enc.handleRuneState.fracZeros-- + enc.fracZeros-- } } switch t { // trim leading '0's from the exponent-part, but don't remove all digits case internal.RuneTypeNumberExpE, internal.RuneTypeNumberExpSign: - enc.handleRuneState.expZero = true + enc.expZero = true case internal.RuneTypeNumberExpDig: - if c == '0' && enc.handleRuneState.expZero { + if c == '0' && enc.expZero { return nil, false } - enc.handleRuneState.expZero = false + enc.expZero = false default: - if enc.handleRuneState.expZero { + if enc.expZero { if err := enc.emitByte('0'); err != nil { return err, false } - enc.handleRuneState.expZero = false + enc.expZero = false } } @@ -370,8 +440,8 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // let us manage whitespace, don't pass it through return nil, false case internal.RuneTypeObjectEnd, internal.RuneTypeArrayEnd: - enc.handleRuneState.curIndent-- - switch enc.handleRuneState.lastNonSpace { + enc.curIndent-- + switch enc.lastNonSpace { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: // collapse default: @@ -380,7 +450,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } } default: - switch enc.handleRuneState.lastNonSpace { + switch enc.lastNonSpace { case internal.RuneTypeObjectBeg, internal.RuneTypeObjectComma, internal.RuneTypeArrayBeg, internal.RuneTypeArrayComma: if err := enc.emitNlIndent(); err != nil { return err, false @@ -392,7 +462,7 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { } switch t { case internal.RuneTypeObjectBeg, internal.RuneTypeArrayBeg: - enc.handleRuneState.curIndent++ + enc.curIndent++ } } } @@ -402,76 +472,72 @@ func (enc *ReEncoder) handleRunePre(c rune, t internal.RuneType) (error, bool) { // handleRuneMain handles the new rune itself, not buffered things. func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { - defer func() { - if t != internal.RuneTypeSpace { - enc.handleRuneState.lastNonSpace = t - } - }() - + var err error switch t { case internal.RuneTypeStringChar: - return enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeNone, enc.BackslashEscape)) case internal.RuneTypeStringEsc, internal.RuneTypeStringEscU: - return nil + // do nothing case internal.RuneTypeStringEsc1: switch c { case '"': - return enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '"', BackslashEscapeShort, enc.BackslashEscape)) case '\\': - return enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\\', BackslashEscapeShort, enc.BackslashEscape)) case '/': - return enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '/', BackslashEscapeShort, enc.BackslashEscape)) case 'b': - return enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\b', BackslashEscapeShort, enc.BackslashEscape)) case 'f': - return enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\f', BackslashEscapeShort, enc.BackslashEscape)) case 'n': - return enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\n', BackslashEscapeShort, enc.BackslashEscape)) case 'r': - return enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\r', BackslashEscapeShort, enc.BackslashEscape)) case 't': - return enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) + err = enc.emit(writeStringChar(enc.out, '\t', BackslashEscapeShort, enc.BackslashEscape)) default: panic("should not happen") } case internal.RuneTypeStringEscUA: - enc.handleRuneState.uhex[0], _ = internal.HexToInt(c) - return nil + enc.uhex[0], _ = internal.HexToInt(c) case internal.RuneTypeStringEscUB: - enc.handleRuneState.uhex[1], _ = internal.HexToInt(c) - return nil + enc.uhex[1], _ = internal.HexToInt(c) case internal.RuneTypeStringEscUC: - enc.handleRuneState.uhex[2], _ = internal.HexToInt(c) - return nil + enc.uhex[2], _ = internal.HexToInt(c) case internal.RuneTypeStringEscUD: - enc.handleRuneState.uhex[3], _ = internal.HexToInt(c) + enc.uhex[3], _ = internal.HexToInt(c) c := 0 | - rune(enc.handleRuneState.uhex[0])<<12 | - rune(enc.handleRuneState.uhex[1])<<8 | - rune(enc.handleRuneState.uhex[2])<<4 | - rune(enc.handleRuneState.uhex[3])<<0 - return enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) + rune(enc.uhex[0])<<12 | + rune(enc.uhex[1])<<8 | + rune(enc.uhex[2])<<4 | + rune(enc.uhex[3])<<0 + err = enc.emit(writeStringChar(enc.out, c, BackslashEscapeUnicode, enc.BackslashEscape)) case internal.RuneTypeError: // EOF explicitly stated by .Close() fallthrough case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value - enc.handleRuneState.wasNumber = enc.handleRuneState.lastNonSpace.IsNumber() + enc.wasNumber = enc.lastNonSpace.IsNumber() switch { - case enc.ForceTrailingNewlines: - t = internal.RuneTypeError // enc.handleRuneState.lastNonSpace : an NL isn't needed (we already printed one) - return enc.emitByte('\n') + case enc.ForceTrailingNewlines && len(enc.stackInputPos) == 0: + t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) + err = enc.emitByte('\n') default: - t = internal.RuneTypeEOF // enc.handleRuneState.lastNonSpace : an NL *might* be needed - return nil + t = internal.RuneTypeEOF // enc.lastNonSpace : an NL *might* be needed } default: - return enc.emitByte(byte(c)) + err = enc.emitByte(byte(c)) } + + if t != internal.RuneTypeSpace { + enc.lastNonSpace = t + } + return err } func (enc *ReEncoder) emitByte(c byte) error { - err := writeByte(enc.out, c) + err := enc.out.WriteByte(c) if err == nil { enc.written++ } @@ -488,12 +554,12 @@ func (enc *ReEncoder) emitNlIndent() error { return err } if enc.Prefix != "" { - if err := enc.emit(io.WriteString(enc.out, enc.Prefix)); err != nil { + if err := enc.emit(enc.out.WriteString(enc.Prefix)); err != nil { return err } } - for i := 0; i < enc.handleRuneState.curIndent; i++ { - if err := enc.emit(io.WriteString(enc.out, enc.Indent)); err != nil { + for i := 0; i < enc.curIndent; i++ { + if err := enc.emit(enc.out.WriteString(enc.Indent)); err != nil { return err } } @@ -7,6 +7,8 @@ package lowmemjson import ( "reflect" + "git.lukeshu.com/go/typedsync" + "git.lukeshu.com/go/lowmemjson/internal" ) @@ -25,9 +27,19 @@ type structIndex struct { byName map[string]int } +var structIndexCache typedsync.CacheMap[reflect.Type, structIndex] + // indexStruct takes a struct Type, and indexes its fields for use by -// Decoder.Decode() and Encoder.Encode(). +// Decoder.Decode() and Encoder.Encode(). indexStruct caches its +// results. func indexStruct(typ reflect.Type) structIndex { + ret, _ := structIndexCache.LoadOrCompute(typ, indexStructReal) + return ret +} + +// indexStructReal is like indexStruct, but is the real indexer, +// bypassing the cache. +func indexStructReal(typ reflect.Type) structIndex { var byPos []structField byName := make(map[string][]int) |