From b3f4186f2b8e992f56f898784b1cd28bfd7550ca Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 29 Jan 2023 20:59:37 -0700 Subject: Invent "barriers" instead of nesting parsers --- decode.go | 56 ++++++----- decode_scan.go | 143 ++++++++-------------------- decode_scan_test.go | 263 +++++++++++++++++++++------------------------------- encode.go | 26 +++--- internal/parse.go | 120 +++++++++++++++++++++++- reencode.go | 23 ++++- 6 files changed, 324 insertions(+), 307 deletions(-) diff --git a/decode.go b/decode.go index 8fab267..60b530f 100644 --- a/decode.go +++ b/decode.go @@ -104,7 +104,7 @@ const maxNestingDepth = 10000 // an io.Reader. func NewDecoder(r io.RuneScanner) *Decoder { return &Decoder{ - io: &runeTypeScannerImpl{ + io: runeTypeScanner{ inner: r, parser: internal.Parser{ MaxDepth: maxNestingDepth, @@ -245,6 +245,7 @@ func (dec *Decoder) Decode(ptr any) (err error) { } dec.io.Reset() + dec.io.PushReadBarrier() defer func() { if r := recover(); r != nil { if de, ok := r.(decodeError); ok { @@ -257,6 +258,7 @@ func (dec *Decoder) Decode(ptr any) (err error) { } }() dec.decode(ptrVal.Elem(), false) + dec.io.PopReadBarrier() return nil } @@ -319,12 +321,21 @@ func (dec *Decoder) expectRuneType(ec rune, et internal.RuneType, gt reflect.Typ } } -type decRuneTypeScanner struct { +type decRuneScanner struct { dec *Decoder + eof bool } -func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { +func (sc *decRuneScanner) ReadRune() (rune, int, error) { + if sc.eof { + return 0, 0, io.EOF + } c, s, t, e := sc.dec.io.ReadRuneType() + if t == internal.RuneTypeEOF { + sc.eof = true + sc.dec.io.PopReadBarrier() + return 0, 0, io.EOF + } if e != nil { panic(decodeError{ Field: sc.dec.structStackStr(), @@ -333,28 +344,17 @@ func (sc *decRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, erro Err: e, }) } - return c, s, t, nil + return c, s, nil } -func (sc *decRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, _ := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - default: - return r, s, nil - } +func (sc *decRuneScanner) UnreadRune() error { + return sc.dec.io.UnreadRune() } -func (sc *decRuneTypeScanner) UnreadRune() error { return sc.dec.io.UnreadRune() } -func (sc *decRuneTypeScanner) InputOffset() int64 { return sc.dec.InputOffset() } -func (sc *decRuneTypeScanner) Reset() { sc.dec.io.Reset() } - -func (dec *Decoder) limitingScanner() runeTypeScanner { - return &elemRuneTypeScanner{ - inner: &decRuneTypeScanner{ - dec: dec, - }, +func (dec *Decoder) limitingScanner() io.RuneScanner { + dec.io.PushReadBarrier() + return &decRuneScanner{ + dec: dec, } } @@ -867,7 +867,12 @@ func DecodeObject(r io.RuneScanner, decodeKey, decodeVal func(io.RuneScanner) er } } }() - dec := NewDecoder(r) + var dec *Decoder + if dr, ok := r.(*decRuneScanner); ok { + dec = dr.dec + } else { + dec = NewDecoder(r) + } dec.posStackPush() defer dec.posStackPop() dec.decodeObject(nil, @@ -947,7 +952,12 @@ func DecodeArray(r io.RuneScanner, decodeMember func(r io.RuneScanner) error) (e } } }() - dec := NewDecoder(r) + var dec *Decoder + if dr, ok := r.(*decRuneScanner); ok { + dec = dr.dec + } else { + dec = NewDecoder(r) + } dec.posStackPush() defer dec.posStackPop() dec.decodeArray(nil, func() { diff --git a/decode_scan.go b/decode_scan.go index 261aaa6..e233caf 100644 --- a/decode_scan.go +++ b/decode_scan.go @@ -5,31 +5,12 @@ package lowmemjson import ( - "errors" "io" "git.lukeshu.com/go/lowmemjson/internal" ) -type runeTypeScanner interface { - // The returned error is a *ReadError, a *SyntaxError, or nil. - // An EOF condition is represented as one of: - // - // end of value but not file: (_, >0, RuneTypeEOF, nil) - // end of both value and file: (_, 0, RuneTypeEOF, nil) - // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) - // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) - ReadRuneType() (rune, int, internal.RuneType, error) - // The returned error is a *DecodeReadError, a *DecodeSyntaxError, io.EOF, or nil. - ReadRune() (rune, int, error) - UnreadRune() error - Reset() - InputOffset() int64 -} - -// runeTypeScannerImpl ///////////////////////////////////////////////////////////////////////////// - -type runeTypeScannerImpl struct { +type runeTypeScanner struct { // everything that is not "initialized by constructor" starts // out as the zero value. @@ -47,9 +28,7 @@ type runeTypeScannerImpl struct { rErr error } -var _ runeTypeScanner = (*runeTypeScannerImpl)(nil) - -func (sc *runeTypeScannerImpl) Reset() { +func (sc *runeTypeScanner) Reset() { sc.parser.Reset() if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { sc.repeat = false @@ -69,7 +48,14 @@ func (sc *runeTypeScannerImpl) Reset() { } } -func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, error) { +// The returned error is a *ReadError, a *SyntaxError, or nil. +// An EOF condition is represented as one of: +// +// end of value but not file: (_, >0, RuneTypeEOF, nil) +// end of both value and file: (_, 0, RuneTypeEOF, nil) +// end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) +// end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) +func (sc *runeTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { switch { case sc.initialized && (sc.rType == internal.RuneTypeError || sc.rType == internal.RuneTypeEOF): // do nothing @@ -117,24 +103,12 @@ func (sc *runeTypeScannerImpl) ReadRuneType() (rune, int, internal.RuneType, err return sc.rRune, sc.rSize, sc.rType, sc.rErr } -func (sc *runeTypeScannerImpl) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } -} - // UnreadRune undoes a call to .ReadRune() or .ReadRuneType(). // // If the last call to .ReadRune() or .ReadRuneType() has already been // unread, or if that call returned a rune with size 0, then // ErrInvalidUnreadRune is returned. Otherwise, nil is returned. -func (sc *runeTypeScannerImpl) UnreadRune() error { +func (sc *runeTypeScanner) UnreadRune() error { if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } @@ -143,7 +117,7 @@ func (sc *runeTypeScannerImpl) UnreadRune() error { return nil } -func (sc *runeTypeScannerImpl) InputOffset() int64 { +func (sc *runeTypeScanner) InputOffset() int64 { ret := sc.offset if sc.repeat { ret -= int64(sc.rSize) @@ -151,76 +125,37 @@ func (sc *runeTypeScannerImpl) InputOffset() int64 { return ret } -// elemRuneTypeScanner ///////////////////////////////////////////////////////////////////////////// - -type elemRuneTypeScanner struct { - inner runeTypeScanner - - parser internal.Parser - repeat bool - stuck bool - rType internal.RuneType - rErr error +func (sc *runeTypeScanner) PushReadBarrier() { + sc.parser.PushReadBarrier() } -var _ runeTypeScanner = (*elemRuneTypeScanner)(nil) - -func (sc *elemRuneTypeScanner) ReadRuneType() (rune, int, internal.RuneType, error) { - // Read it, run it through the parent's parser. - r, s, t, e := sc.inner.ReadRuneType() - - // Run it through our child parser. - if s > 0 || errors.Is(e, io.ErrUnexpectedEOF) { - if sc.repeat || sc.stuck { - sc.repeat = false - } else { - var err error - if s > 0 { - sc.rType, err = sc.parser.HandleRune(r) - } else { - sc.rType, err = sc.parser.HandleEOF() - } - if err != nil { - sc.rErr = &DecodeSyntaxError{ - Offset: sc.inner.InputOffset(), - Err: err, - } - } else { - sc.rErr = nil +func (sc *runeTypeScanner) PopReadBarrier() { + sc.parser.PopBarrier() + if sc.repeat || (sc.rType == internal.RuneTypeEOF && sc.rSize > 0) { + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleRune(sc.rRune) + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset - int64(sc.rSize), + Err: err, } + } else { + sc.rErr = nil } - sc.stuck = sc.rType == internal.RuneTypeEOF || sc.rType == internal.RuneTypeError - t, e = sc.rType, sc.rErr - } - - // Check if we need to truncate the result. - if t == internal.RuneTypeEOF { - if s > 0 { - _ = sc.inner.UnreadRune() + // tell it to use that rType and rErr + _ = sc.UnreadRune() // we set it up to always succeed + } else if sc.rType == internal.RuneTypeEOF { + // re-figure the rType and rErr + var err error + sc.rType, err = sc.parser.HandleEOF() + if err != nil { + sc.rErr = &DecodeSyntaxError{ + Offset: sc.offset, + Err: err, + } + } else { + sc.rErr = nil } - return 0, 0, internal.RuneTypeEOF, nil } - - return r, s, t, e } - -func (sc *elemRuneTypeScanner) ReadRune() (rune, int, error) { - r, s, t, e := sc.ReadRuneType() - switch t { - case internal.RuneTypeEOF: - return 0, 0, io.EOF - case internal.RuneTypeError: - return 0, 0, e - default: - return r, s, nil - } -} - -func (sc *elemRuneTypeScanner) UnreadRune() error { - ret := sc.inner.UnreadRune() - sc.repeat = true - return ret -} - -func (sc *elemRuneTypeScanner) InputOffset() int64 { return sc.inner.InputOffset() } -func (sc *elemRuneTypeScanner) Reset() {} diff --git a/decode_scan_test.go b/decode_scan_test.go index 5bf5e2a..d0725e5 100644 --- a/decode_scan_test.go +++ b/decode_scan_test.go @@ -11,7 +11,6 @@ import ( "testing" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" "git.lukeshu.com/go/lowmemjson/internal" ) @@ -23,8 +22,26 @@ type ReadRuneTypeResult struct { e error } +const ( + unreadRune = -1 + pushReadBarrier = -2 + popReadBarrier = -3 + reset = -4 +) + func (r ReadRuneTypeResult) String() string { - return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e) + switch r.s { + case unreadRune: + return fmt.Sprintf("{%q, unreadRune, %#v, %v}", r.r, r.t, r.e) + case pushReadBarrier: + return fmt.Sprintf("{%q, pushReadBarrier, %#v, %v}", r.r, r.t, r.e) + case popReadBarrier: + return fmt.Sprintf("{%q, popReadBarrier, %#v, %v}", r.r, r.t, r.e) + case reset: + return fmt.Sprintf("{%q, reset, %#v, %v}", r.r, r.t, r.e) + default: + return fmt.Sprintf("{%q, %d, %#v, %v}", r.r, r.s, r.t, r.e) + } } type runeTypeScannerTestcase struct { @@ -33,31 +50,6 @@ type runeTypeScannerTestcase struct { Exp []ReadRuneTypeResult } -func testRuneTypeScanner(t *testing.T, testcases map[string]runeTypeScannerTestcase, factory func(io.RuneScanner) runeTypeScanner) { - for tcName, tc := range testcases { - tc := tc - t.Run(tcName, func(t *testing.T) { - t.Parallel() - reader := strings.NewReader(tc.Input) - sc := factory(reader) - var exp, act []string - for _, iExp := range tc.Exp { - var iAct ReadRuneTypeResult - if iExp.s < 0 { - iAct.s = iExp.s - iAct.e = sc.UnreadRune() - } else { - iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType() - } - exp = append(exp, iExp.String()) - act = append(act, iAct.String()) - } - assert.Equal(t, exp, act) - assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():]) - }) - } -} - func TestRuneTypeScanner(t *testing.T) { t.Parallel() testcases := map[string]runeTypeScannerTestcase{ @@ -86,7 +78,7 @@ func TestRuneTypeScanner(t *testing.T) { {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, + {0, unreadRune, 0, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -104,8 +96,8 @@ func TestRuneTypeScanner(t *testing.T) { {'"', 1, internal.RuneTypeStringEnd, nil}, {':', 1, internal.RuneTypeObjectColon, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, -1, 0, nil}, - {0, -1, 0, ErrInvalidUnreadRune}, + {0, unreadRune, 0, nil}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, {'2', 1, internal.RuneTypeNumberIntDig, nil}, {'.', 1, internal.RuneTypeNumberFracDot, nil}, @@ -128,7 +120,7 @@ func TestRuneTypeScanner(t *testing.T) { {'0', 1, internal.RuneTypeNumberFracDig, nil}, {'}', 1, internal.RuneTypeObjectEnd, nil}, {0, 0, internal.RuneTypeEOF, nil}, - {0, -1, 0, ErrInvalidUnreadRune}, + {0, unreadRune, 0, ErrInvalidUnreadRune}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, @@ -174,150 +166,103 @@ func TestRuneTypeScanner(t *testing.T) { {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 0, Err: io.EOF}}, }}, - } - testRuneTypeScanner(t, testcases, func(reader io.RuneScanner) runeTypeScanner { - return &runeTypeScannerImpl{ - inner: reader, - } - }) -} - -func TestElemRuneTypeScanner(t *testing.T) { - t.Parallel() - toplevelTestcases := map[string]runeTypeScannerTestcase{ - "basic": {`1`, ``, []ReadRuneTypeResult{ + "basic2": {`1`, ``, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "syntax-error": {`[[0,]`, ``, []ReadRuneTypeResult{ - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'[', 1, internal.RuneTypeArrayBeg, nil}, - {'0', 1, internal.RuneTypeNumberIntZero, nil}, - {',', 1, internal.RuneTypeArrayComma, nil}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - {']', 1, internal.RuneTypeError, &DecodeSyntaxError{Offset: 5, Err: fmt.Errorf("invalid character %q looking for beginning of value", ']')}}, - }}, - "multi-value": {`1{}`, `{}`, []ReadRuneTypeResult{ + "fragment": {`1,`, ``, []ReadRuneTypeResult{ {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, + {',', 1, internal.RuneTypeEOF, nil}, }}, - "fragment": {`1,`, `,`, []ReadRuneTypeResult{ + "elem": {` { "foo" : 12.0 } `, ``, []ReadRuneTypeResult{ + {'{', 1, internal.RuneTypeObjectBeg, nil}, + {'"', 1, internal.RuneTypeStringBeg, nil}, + {'f', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'o', 1, internal.RuneTypeStringChar, nil}, + {'"', 1, internal.RuneTypeStringEnd, nil}, + {':', 1, internal.RuneTypeObjectColon, nil}, + {0, pushReadBarrier, 0, nil}, {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, + {'2', 1, internal.RuneTypeNumberIntDig, nil}, + {'.', 1, internal.RuneTypeNumberFracDot, nil}, + {'0', 1, internal.RuneTypeNumberFracDig, nil}, + {'}', 1, internal.RuneTypeEOF, nil}, + {'}', 1, internal.RuneTypeEOF, nil}, + {0, popReadBarrier, 0, nil}, + {'}', 1, internal.RuneTypeObjectEnd, nil}, {0, 0, internal.RuneTypeEOF, nil}, {0, 0, internal.RuneTypeEOF, nil}, }}, - "early-eof": {`{`, ``, []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - {0, 0, internal.RuneTypeError, &DecodeSyntaxError{Offset: 1, Err: io.ErrUnexpectedEOF}}, - }}, } - - childTestcases := make(map[string]runeTypeScannerTestcase, len(toplevelTestcases)) - for tcName, tc := range toplevelTestcases { - tc.Input = `[` + tc.Input - tc.Exp = append([]ReadRuneTypeResult(nil), tc.Exp...) // copy - for i, res := range tc.Exp { - if se, ok := res.e.(*DecodeSyntaxError); ok { - seCopy := *se - seCopy.Offset++ - tc.Exp[i].e = &seCopy + func() { + childTestcases := make(map[string]runeTypeScannerTestcase) + for tcName, tc := range testcases { + canChild := true + for _, res := range tc.Exp { + if res.s == pushReadBarrier { + canChild = false + break + } } - } - childTestcases[tcName] = tc - } - - t.Run("top-level", func(t *testing.T) { - t.Parallel() - testRuneTypeScanner(t, toplevelTestcases, func(reader io.RuneScanner) runeTypeScanner { - return &elemRuneTypeScanner{ - inner: &runeTypeScannerImpl{ - inner: reader, - }, + if !canChild { + continue } - }) - }) - t.Run("child", func(t *testing.T) { - t.Parallel() - testRuneTypeScanner(t, childTestcases, func(reader io.RuneScanner) runeTypeScanner { - inner := &runeTypeScannerImpl{ - inner: reader, + tc.Input = `[1,` + tc.Input + tc.Exp = append([]ReadRuneTypeResult{ + {'[', 1, internal.RuneTypeArrayBeg, nil}, + {'1', 1, internal.RuneTypeNumberIntDig, nil}, + {',', 1, internal.RuneTypeArrayComma, nil}, + {0, pushReadBarrier, 0, nil}, + }, tc.Exp...) + for i := 2; i < len(tc.Exp); i++ { + if se, ok := tc.Exp[i].e.(*DecodeSyntaxError); ok { + seCopy := *se + seCopy.Offset += 3 + tc.Exp[i].e = &seCopy + } } - var res ReadRuneTypeResult - res.r, res.s, res.t, res.e = inner.ReadRuneType() - require.Equal(t, - ReadRuneTypeResult{'[', 1, internal.RuneTypeArrayBeg, nil}.String(), - res.String()) - - return &elemRuneTypeScanner{ - inner: inner, + childTestcases["child-"+tcName] = tc + } + for tcName, tc := range childTestcases { + testcases[tcName] = tc + } + }() + for tcName, tc := range testcases { + tc := tc + t.Run(tcName, func(t *testing.T) { + t.Parallel() + t.Logf("input=%q", tc.Input) + reader := strings.NewReader(tc.Input) + sc := &runeTypeScanner{inner: reader} + var exp, act []string + for _, iExp := range tc.Exp { + var iAct ReadRuneTypeResult + switch iExp.s { + case unreadRune: + iAct.s = iExp.s + iAct.e = sc.UnreadRune() + case pushReadBarrier: + sc.PushReadBarrier() + iAct.s = iExp.s + case popReadBarrier: + sc.PopReadBarrier() + iAct.s = iExp.s + case reset: + sc.Reset() + iAct.s = iExp.s + default: + iAct.r, iAct.s, iAct.t, iAct.e = sc.ReadRuneType() + } + exp = append(exp, iExp.String()) + act = append(act, iAct.String()) } + assert.Equal(t, exp, act) + assert.Equal(t, tc.ExpRemainder, tc.Input[len(tc.Input)-reader.Len():]) }) - }) -} - -func TestElemRuneTypeScanner2(t *testing.T) { - t.Parallel() - parent := &runeTypeScannerImpl{ - inner: strings.NewReader(` { "foo" : 12.0 } `), - } - exp := []ReadRuneTypeResult{ - {'{', 1, internal.RuneTypeObjectBeg, nil}, - {'"', 1, internal.RuneTypeStringBeg, nil}, - {'f', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'o', 1, internal.RuneTypeStringChar, nil}, - {'"', 1, internal.RuneTypeStringEnd, nil}, - {':', 1, internal.RuneTypeObjectColon, nil}, - } - expStr := make([]string, 0, len(exp)) - actStr := make([]string, 0, len(exp)) - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) - } - - child := &elemRuneTypeScanner{ - inner: parent, - } - exp = []ReadRuneTypeResult{ - {'1', 1, internal.RuneTypeNumberIntDig, nil}, - {'2', 1, internal.RuneTypeNumberIntDig, nil}, - {'.', 1, internal.RuneTypeNumberFracDot, nil}, - {'0', 1, internal.RuneTypeNumberFracDig, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - } - expStr, actStr = nil, nil - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = child.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) - } - - exp = []ReadRuneTypeResult{ - {'}', 1, internal.RuneTypeObjectEnd, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - {0, 0, internal.RuneTypeEOF, nil}, - } - expStr, actStr = nil, nil - for _, iExp := range exp { - var iAct ReadRuneTypeResult - iAct.r, iAct.s, iAct.t, iAct.e = parent.ReadRuneType() - expStr = append(expStr, iExp.String()) - actStr = append(actStr, iAct.String()) - require.Equal(t, expStr, actStr) } } diff --git a/encode.go b/encode.go index 5fb4fbf..ca4e060 100644 --- a/encode.go +++ b/encode.go @@ -9,17 +9,13 @@ import ( "encoding" "encoding/base64" "encoding/json" - "errors" "fmt" "io" - iofs "io/fs" "reflect" "sort" "strconv" "strings" "unsafe" - - "git.lukeshu.com/go/lowmemjson/internal" ) // Encodable is the interface implemented by types that can encode @@ -98,7 +94,7 @@ var ( const startDetectingCyclesAfter = 1000 -func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error { +func encode(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, quote bool, cycleDepth uint, cycleSeen map[any]struct{}) error { if !val.IsValid() { return discardInt(w.WriteString("null")) } @@ -115,22 +111,22 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q if !ok { return discardInt(w.WriteString("null")) } - // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}} - if err := obj.EncodeJSON(validator); err != nil { + w.pushWriteBarrier() + if err := obj.EncodeJSON(w); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, } } - if err := validator.Close(); err != nil && !errors.Is(err, iofs.ErrClosed) { + if err := w.Close(); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "EncodeJSON", Err: err, } } + w.popWriteBarrier() case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(jsonMarshalerType): val = val.Addr() @@ -151,22 +147,22 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q Err: err, } } - // Use a sub-ReEncoder to check that it's a full element. - validator := &ReEncoder{out: w, ReEncoderConfig: ReEncoderConfig{BackslashEscape: EscapePreserve}} - if _, err := validator.Write(dat); err != nil { + w.pushWriteBarrier() + if _, err := w.Write(dat); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, } } - if err := validator.Close(); err != nil { + if err := w.Close(); err != nil { return &EncodeMethodError{ Type: val.Type(), SourceFunc: "MarshalJSON", Err: err, } } + w.popWriteBarrier() case val.Kind() != reflect.Pointer && val.CanAddr() && reflect.PointerTo(val.Type()).Implements(textMarshalerType): val = val.Addr() @@ -361,7 +357,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q for i := 0; iter.Next(); i++ { // TODO: Avoid buffering the map key var k strings.Builder - if err := encode(&k, iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil { + if err := encode(NewReEncoder(&k, ReEncoderConfig{BackslashEscape: escaper}), iter.Key(), escaper, false, cycleDepth, cycleSeen); err != nil { return err } kStr := k.String() @@ -496,7 +492,7 @@ func encode(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, q return nil } -func encodeArray(w internal.AllWriter, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error { +func encodeArray(w *ReEncoder, val reflect.Value, escaper BackslashEscaper, cycleDepth uint, cycleSeen map[any]struct{}) error { if err := w.WriteByte('['); err != nil { return err } diff --git a/internal/parse.go b/internal/parse.go index 9db57fb..36db4a9 100644 --- a/internal/parse.go +++ b/internal/parse.go @@ -313,6 +313,13 @@ type Parser struct { // a ["x","y" // ["x","y"] stack []RuneType + + barriers []barrier +} + +type barrier struct { + closed bool + stack []RuneType } func (par *Parser) init() { @@ -345,8 +352,22 @@ func (par *Parser) stackString() string { return buf.String() } +func (par *Parser) depth() int { + n := len(par.stack) + for _, barrier := range par.barriers { + n += len(barrier.stack) + } + return n +} + func (par *Parser) StackIsEmpty() bool { - return len(par.stack) == 0 || (len(par.stack) == 1 && par.stack[0] == runeTypeAny) + if len(par.barriers) > 0 { + return false + } + if len(par.stack) == 0 { + return true + } + return len(par.stack) == 1 && par.stack[0] == runeTypeAny } func (par *Parser) StackSize() int { @@ -360,6 +381,99 @@ func (par *Parser) Reset() { } } +// PushReadBarrier causes the parser to expect EOF once the end of the +// element that is started by the current top-of-stack is reached, +// until this is un-done with PopBarrier. It essentially turns the +// parser in to a sub-parser. +// +// PushReadBarrier may only be called at the beginning of an element, +// whether that be +// +// - runeTypeAny +// - RuneTypeObjectBeg +// - RuneTypeArrayBeg +// - RuneTypeStringBeg +// - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig +// - RuneTypeTrueT +// - RuneTypeFalseF +// - RuneTypeNullN +func (par *Parser) PushReadBarrier() { + // Sanity checking. + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushReadBarrier call: empty stack")) + } + curState := par.stack[len(par.stack)-1] + switch curState { + case runeTypeAny, + RuneTypeObjectBeg, + RuneTypeArrayBeg, + RuneTypeStringBeg, + RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, + RuneTypeTrueT, + RuneTypeFalseF, + RuneTypeNullN: + // OK + default: + panic(fmt.Errorf("illegal PushReadBarrier call: %q", curState)) + } + // Actually push. + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack[:len(par.stack)-1], + }) + par.stack = []RuneType{curState} +} + +// PushWriteBarrier causes the parser to expect EOF once the end of +// the about-to-start element is reached, until this is un-done with +// PopBarrier. It essentially turns the parser in to a sub-parser. +// +// PushWriteBarrier may only be called at the places where an element +// of any type may start: +// +// - runeTypeAny for top-level and object-value elements +// - RuneTypeArrayBeg for array-item elements +// +// PushWriteBarrier signals intent to write an element; if it is +// called in a place where an element is optional (at the beginning of +// an array), it becomes a syntax error to not write the element. +func (par *Parser) PushWriteBarrier() { + par.init() + if len(par.stack) == 0 { + panic(errors.New("illegal PushWriteBarrier call: empty stack")) + } + switch par.stack[len(par.stack)-1] { + case runeTypeAny: + par.popState() + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + case RuneTypeArrayBeg: + par.replaceState(RuneTypeArrayComma) + par.barriers = append(par.barriers, barrier{ + closed: par.closed, + stack: par.stack, + }) + par.stack = []RuneType{runeTypeAny} + default: + panic(fmt.Errorf("illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) + } +} + +// PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. +func (par *Parser) PopBarrier() { + if len(par.barriers) == 0 { + panic(errors.New("illegal PopBarrier call: empty barrier stack")) + } + barrier := par.barriers[len(par.barriers)-1] + par.barriers = par.barriers[:len(par.barriers)-1] + par.closed = barrier.closed + par.stack = append(barrier.stack, par.stack...) +} + // HandleEOF feeds EOF to the Parser. The returned RuneType is either // RuneTypeEOF or RuneTypeError. // @@ -439,12 +553,12 @@ func (par *Parser) HandleRune(c rune) (RuneType, error) { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case '{': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeObjectBeg), nil case '[': - if par.MaxDepth > 0 && len(par.stack) > par.MaxDepth { + if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeArrayBeg), nil diff --git a/reencode.go b/reencode.go index a5dc3c8..eae80db 100644 --- a/reencode.go +++ b/reencode.go @@ -106,6 +106,9 @@ type ReEncoder struct { fracZeros int64 expZero bool specu *speculation + + // state: .pushBarrier and .popBarrier + stackInputPos []int64 } type speculation struct { @@ -227,7 +230,7 @@ func (enc *ReEncoder) Close() error { } return enc.err } - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() } return nil @@ -264,7 +267,7 @@ rehandle: } enc.err = enc.handleRune(c, t, enc.par.StackSize()) if enc.err == nil && t == internal.RuneTypeEOF { - if enc.AllowMultipleValues { + if enc.AllowMultipleValues && len(enc.stackInputPos) == 0 { enc.par.Reset() goto rehandle } else { @@ -280,6 +283,20 @@ rehandle: return enc.written, enc.err } +// semi-public API ///////////////////////////////////////////////////////////// + +func (enc *ReEncoder) pushWriteBarrier() { + enc.par.PushWriteBarrier() + enc.stackInputPos = append(enc.stackInputPos, enc.inputPos) + enc.inputPos = 0 +} + +func (enc *ReEncoder) popWriteBarrier() { + enc.par.PopBarrier() + enc.inputPos += enc.stackInputPos[len(enc.stackInputPos)-1] + enc.stackInputPos = enc.stackInputPos[:len(enc.stackInputPos)-1] +} + // internal //////////////////////////////////////////////////////////////////// func (enc *ReEncoder) handleRune(c rune, t internal.RuneType, stackSize int) error { @@ -503,7 +520,7 @@ func (enc *ReEncoder) handleRuneMain(c rune, t internal.RuneType) error { case internal.RuneTypeEOF: // EOF implied by the start of the next top-level value enc.wasNumber = enc.lastNonSpace.IsNumber() switch { - case enc.ForceTrailingNewlines: + case enc.ForceTrailingNewlines && len(enc.stackInputPos) == 0: t = internal.RuneTypeError // enc.lastNonSpace : an NL isn't needed (we already printed one) err = enc.emitByte('\n') default: -- cgit v1.2.3-54-g00ecf