From b379bd7c2fba1e7d2c9429b3ffb93afdabd88cbd Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sat, 13 Aug 2022 18:20:30 -0600 Subject: parse: Add a general-purpose parser based on the reencoder --- parse.go | 700 +++++++++++++++++++++++++-------------------------------------- 1 file changed, 281 insertions(+), 419 deletions(-) (limited to 'parse.go') diff --git a/parse.go b/parse.go index 50c8ba3..e09b85a 100644 --- a/parse.go +++ b/parse.go @@ -5,403 +5,287 @@ package lowmemjson import ( - "errors" "fmt" "io" - "unicode/utf8" + iofs "io/fs" ) -type reencodeState func(rune) error - -type ReEncoder struct { - Out io.Writer - - // Whether to minify the JSON. - Compact bool - // String to use to indent; ignored if Compact is true. - Indent string - // String to put before indents, for testing-compat with - // encoding/json only. - prefix string - // Returns whether a given character in a string should be - // "\uXXXX" escaped. The bool argument is whether it was - // \u-escaped in the input. This does not affect characters - // that must or must-not be \u-escaped to be valid JSON. - // - // If not set, then EscapeUnicodeDefault is used. - UnicodeEscape func(rune, bool) bool - - bailAfterCurrent bool - - // state: .Write's utf8-decoding buffer - buf [utf8.UTFMax]byte - bufLen int - - // state: .WriteRune - err error - inputPos int64 - written int - stack []reencodeState - stack0IsNumber bool - curIndent int - - // state: reencodeState-specific - stateBuf []byte -} - -// public API ////////////////////////////////////////////////////////////////// - -func (enc *ReEncoder) Write(p []byte) (int, error) { - if len(p) == 0 { - return 0, nil - } - var n int - if enc.bufLen > 0 { - copy(enc.buf[enc.bufLen:], p) - c, size := utf8.DecodeRune(enc.buf[:]) - n += size - enc.bufLen - enc.bufLen = 0 - if _, err := enc.WriteRune(c); err != nil { - return 0, err - } - } - for utf8.FullRune(p[n:]) { - c, size := utf8.DecodeRune(p[n:]) - if _, err := enc.WriteRune(c); err != nil { - return n, err - } - n += size - } - enc.bufLen = copy(enc.buf[:], p[n:]) - return len(p), nil -} - -func (enc *ReEncoder) Flush() error { - if enc.bufLen > 0 { - return &SyntaxError{fmt.Sprintf("EOF: unflushed unicode garbage: %q", enc.buf[:enc.bufLen]), enc.inputPos} - } - switch len(enc.stack) { - case 0: - return nil - case 1: - if enc.stack0IsNumber { - enc.Compact = true - return enc.state('\n') - } - fallthrough - default: - return &SyntaxError{fmt.Sprintf("EOF: in the middle of a value"), enc.inputPos} - } -} +type RuneType uint8 + +const ( + RuneTypeError = RuneType(iota) + + RuneTypeSpace // whitespace + + RuneTypeObjectBeg // '{' + RuneTypeObjectColon // ':' + RuneTypeObjectComma // ',' + RuneTypeObjectEnd // '}' + + RuneTypeArrayBeg // '[' + RuneTypeArrayComma // ',' + RuneTypeArrayEnd // ']' + + RuneTypeStringBeg // opening '"' + RuneTypeStringChar // normal character + RuneTypeStringEsc // backslash + RuneTypeStringEsc1 // single-char after a backslash + RuneTypeStringEscU // \uABCD : u + RuneTypeStringEscUA // \uABCD : A + RuneTypeStringEscUB // \uABCD : B + RuneTypeStringEscUC // \uABCD : C + RuneTypeStringEscUD // \uABCD : D + RuneTypeStringEnd // closing '"' + + RuneTypeNumberInt // 0|[1-9][0-9]* + RuneTypeNumberFrac // \.[0-9]* + RuneTypeNumberExp // [eE][-+]?[0-9] + + RuneTypeTrueT + RuneTypeTrueR + RuneTypeTrueU + RuneTypeTrueE + + RuneTypeFalseF + RuneTypeFalseA + RuneTypeFalseL + RuneTypeFalseS + RuneTypeFalseE + + RuneTypeNullN + RuneTypeNullU + RuneTypeNullL1 + RuneTypeNullL2 +) -func (enc *ReEncoder) WriteRune(c rune) (n int, err error) { - if enc.err != nil { - return 0, enc.err - } - if enc.bufLen != 0 { - enc.err = errors.New("lowmemjson.ReEncoder: cannot .WriteRune() when there is a partial rune that has been .Write()n") - return 0, enc.err - } - enc.written = 0 - enc.err = enc.state(c) - enc.inputPos += int64(utf8.RuneLen(c)) - return enc.written, enc.err -} +type parseState func(rune) (RuneType, error) -// io helpers ////////////////////////////////////////////////////////////////// +type parser struct { + err error + closed bool -func (enc *ReEncoder) emitByte(c byte) error { - err := writeByte(enc.Out, c) - if err == nil { - enc.written++ - } - return err + stack []parseState + stack0IsNumber bool // whether stack[0] is a number-state; affects how EOF is handled } -func (enc *ReEncoder) emit(n int, err error) error { - enc.written += n - return err -} +// "public" API //////////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) nlIndent() error { - if enc.Compact || enc.Indent == "" { - return nil +func (par *parser) HandleRune(c rune) (RuneType, error) { + if par.closed { + return RuneTypeError, iofs.ErrClosed } - if err := enc.emitByte('\n'); err != nil { - return err + if par.err != nil { + return RuneTypeError, par.err } - if enc.prefix != "" { - if err := enc.emit(io.WriteString(enc.Out, enc.prefix)); err != nil { - return err - } - } - for i := 0; i < enc.curIndent; i++ { - if err := enc.emit(io.WriteString(enc.Out, enc.Indent)); err != nil { - return err + return par.state(c) +} + +func (par *parser) HandleEOF() error { + if par.closed { + return iofs.ErrClosed + } + if par.err == nil { + switch len(par.stack) { + case 0: + par.err = nil + case 1: + if par.stack0IsNumber { + _, par.err = par.state('\n') + } + fallthrough + default: + par.err = io.ErrUnexpectedEOF } } - return nil + par.closed = true + return par.err } -// state helpers /////////////////////////////////////////////////////////////// +// state helpers /////////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) pushState(state reencodeState, isNumber bool) { - if len(enc.stack) == 0 { - enc.stack0IsNumber = isNumber +func (par *parser) pushState(state parseState, isNumber bool) { + if len(par.stack) == 0 { + par.stack0IsNumber = isNumber } - enc.stack = append(enc.stack, state) + par.stack = append(par.stack, state) } -func (enc *ReEncoder) replaceState(state reencodeState, isNumber bool) { - if len(enc.stack) == 1 { - enc.stack0IsNumber = isNumber +func (par *parser) replaceState(state parseState, isNumber bool) { + if len(par.stack) == 1 { + par.stack0IsNumber = isNumber } - enc.stack[len(enc.stack)-1] = state + par.stack[len(par.stack)-1] = state } -func (enc *ReEncoder) popState() { - if len(enc.stack) == 1 { - enc.stack0IsNumber = false +func (par *parser) popState() { + if len(par.stack) == 1 { + par.stack0IsNumber = false } - enc.stack = enc.stack[:len(enc.stack)-1] + par.stack = par.stack[:len(par.stack)-1] } -var errBailedAfterCurrent = errors.New("bailed after current") - -func (enc *ReEncoder) state(c rune) error { - if len(enc.stack) == 0 { - if enc.bailAfterCurrent { - return errBailedAfterCurrent - } - enc.pushState(enc.stateAny, false) +func (par *parser) state(c rune) (RuneType, error) { + if len(par.stack) == 0 { + par.pushState(par.stateAny, false) } - return enc.stack[len(enc.stack)-1](c) + return par.stack[len(par.stack)-1](c) } -// any ///////////////////////////////////////////////////////////////////////////////////////////// +// state: any ////////////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateAny(c rune) error { +func (par *parser) stateAny(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: - if enc.Compact || enc.Indent != "" { - return nil - } + return RuneTypeSpace, nil case '{': - enc.replaceState(enc.stateInEmptyObject, false) - enc.curIndent++ + par.replaceState(par.stateInObject, false) + return RuneTypeObjectBeg, nil case '[': - enc.replaceState(enc.stateInEmptyArray, false) - enc.curIndent++ + par.replaceState(par.stateInArray, false) + return RuneTypeArrayBeg, nil case '"': - enc.replaceState(enc.stateInString, false) + par.replaceState(par.stateInString, false) + return RuneTypeStringBeg, nil case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - enc.replaceState(enc.stateNumberA, true) - return enc.state(c) + par.replaceState(par.stateNumberA, true) + return par.state(c) case 't': - enc.replaceState(enc.stateInTrue, false) - enc.stateBuf = append(enc.stateBuf[:0], 't') + par.replaceState(par.stateTrueT, false) + return RuneTypeTrueT, nil case 'f': - enc.replaceState(enc.stateInFalse, false) - enc.stateBuf = append(enc.stateBuf[:0], 'f') + par.replaceState(par.stateFalseF, false) + return RuneTypeFalseF, nil case 'n': - enc.replaceState(enc.stateInNull, false) - enc.stateBuf = append(enc.stateBuf[:0], 'n') + par.replaceState(par.stateNullN, false) + return RuneTypeNullN, nil default: - return &SyntaxError{fmt.Sprintf("any: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("any: unexpected character: %q", c) } - return enc.emitByte(byte(c)) } -// object ////////////////////////////////////////////////////////////////////////////////////////// +// state: object /////////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateInEmptyObject(c rune) error { return enc._stateInObject(c, false) } -func (enc *ReEncoder) stateInNonEmptyObject(c rune) error { return enc._stateInObject(c, true) } -func (enc *ReEncoder) _stateInObject(c rune, nonempty bool) error { +func (par *parser) stateInObject(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: - if enc.Compact || enc.Indent != "" { - return nil - } + return RuneTypeSpace, nil case '"': - if err := enc.nlIndent(); err != nil { - return err - } - enc.replaceState(enc.stateInKV, false) - enc.pushState(enc.stateInString, false) + par.replaceState(par.stateAfterK, false) + par.pushState(par.stateInString, false) + return RuneTypeStringBeg, nil case '}': - enc.popState() - enc.curIndent-- - if nonempty { - if err := enc.nlIndent(); err != nil { - return err - } - } + par.popState() + return RuneTypeObjectEnd, nil default: - return &SyntaxError{fmt.Sprintf("object: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("object: unexpected character: %q", c) } - return enc.emitByte(byte(c)) } -func (enc *ReEncoder) stateInKV(c rune) error { +func (par *parser) stateAfterK(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: - if enc.Compact || enc.Indent != "" { - return nil - } - return enc.emitByte(byte(c)) + return RuneTypeSpace, nil case ':': - enc.replaceState(enc.stateAfterV, false) - enc.pushState(enc.stateAny, false) - if err := enc.emitByte(byte(c)); err != nil { - return err - } - if !enc.Compact && enc.Indent != "" { - return enc.emitByte(' ') - } - return nil + par.replaceState(par.stateAfterV, false) + par.pushState(par.stateAny, false) + return RuneTypeObjectColon, nil default: - return &SyntaxError{fmt.Sprintf("object member: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) } } -func (enc *ReEncoder) stateAfterV(c rune) error { +func (par *parser) stateAfterV(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: - if enc.Compact || enc.Indent != "" { - return nil - } + return RuneTypeSpace, nil case ',': - enc.replaceState(enc.stateInNonEmptyObject, false) + par.replaceState(par.stateInObject, false) + return RuneTypeObjectComma, nil case '}': - enc.popState() - enc.curIndent-- - if err := enc.nlIndent(); err != nil { - return err - } + par.popState() + return RuneTypeObjectEnd, nil default: - return &SyntaxError{fmt.Sprintf("object member: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("object member: unexpected character: %q", c) } - return enc.emitByte(byte(c)) } -// array /////////////////////////////////////////////////////////////////////////////////////////// +// state: array //////////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateInEmptyArray(c rune) error { return enc._stateInArray(c, false) } -func (enc *ReEncoder) stateInNonEmptyArray(c rune) error { return enc._stateInArray(c, true) } -func (enc *ReEncoder) _stateInArray(c rune, nonempty bool) error { +func (par *parser) stateInArray(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: - if enc.Compact || enc.Indent != "" { - return nil - } + return RuneTypeSpace, nil case ']': - enc.popState() - enc.curIndent-- - if nonempty { - if err := enc.nlIndent(); err != nil { - return err - } - } + par.popState() + return RuneTypeArrayEnd, nil default: - if err := enc.nlIndent(); err != nil { - return err - } - enc.replaceState(enc.stateAfterItem, false) - enc.pushState(enc.stateAny, false) - return enc.state(c) + par.replaceState(par.stateAfterItem, false) + par.pushState(par.stateAny, false) + return par.state(c) } - return enc.emitByte(byte(c)) } -func (enc *ReEncoder) stateAfterItem(c rune) error { +func (par *parser) stateAfterItem(c rune) (RuneType, error) { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: - if enc.Compact || enc.Indent != "" { - return nil - } + return RuneTypeSpace, nil case ',': - enc.replaceState(enc.stateInNonEmptyArray, false) + par.replaceState(par.stateInArray, false) + return RuneTypeArrayComma, nil case ']': - enc.popState() - enc.curIndent-- - if err := enc.nlIndent(); err != nil { - return err - } + par.popState() + return RuneTypeArrayEnd, nil default: - return &SyntaxError{fmt.Sprintf("array: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("array: unexpected character: %q", c) } - return enc.emitByte(byte(c)) } -// string ////////////////////////////////////////////////////////////////////////////////////////// +// state: string /////////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateInString(c rune) error { +func (par *parser) stateInString(c rune) (RuneType, error) { switch { case c == '\\': - enc.replaceState(enc.stateInBackslash, false) - return nil + par.replaceState(par.stateInEsc, false) + return RuneTypeStringEsc, nil case c == '"': - enc.popState() - return enc.emitByte(byte(c)) + par.popState() + return RuneTypeStringEnd, nil case 0x0020 <= c && c <= 0x10FFFF: - return enc.emit(writeStringChar(enc.Out, c, false, enc.UnicodeEscape)) + return RuneTypeStringChar, nil default: - return &SyntaxError{fmt.Sprintf("string: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("string: unexpected character: %q", c) } } -func (enc *ReEncoder) stateInBackslash(c rune) error { +func (par *parser) stateInEsc(c rune) (RuneType, error) { switch c { - case '"': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '"', false, enc.UnicodeEscape)) - case '\\': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\\', false, enc.UnicodeEscape)) - case '/': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '/', false, enc.UnicodeEscape)) - case 'b': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\b', false, enc.UnicodeEscape)) - case 'f': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\f', false, enc.UnicodeEscape)) - case 'n': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\n', false, enc.UnicodeEscape)) - case 'r': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\r', false, enc.UnicodeEscape)) - case 't': - enc.replaceState(enc.stateInString, false) - return enc.emit(writeStringChar(enc.Out, '\t', false, enc.UnicodeEscape)) + case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': + par.replaceState(par.stateInString, false) + return RuneTypeStringEsc1, nil case 'u': - enc.replaceState(enc.stateInUnicode, false) - return nil + par.replaceState(par.stateInEscU, false) + return RuneTypeStringEscU, nil default: - return &SyntaxError{fmt.Sprintf("string backslash sequence: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("string backslash sequence: unexpected character: %q", c) } } -func (enc *ReEncoder) stateInUnicode(c rune) error { +func (par *parser) _stateInEscU(c rune, typ RuneType, nxt parseState) (RuneType, error) { switch { - case '0' <= c && c <= '9': - enc.stateBuf = append(enc.stateBuf, byte(c)-'0') - case 'a' <= c && c <= 'f': - enc.stateBuf = append(enc.stateBuf, byte(c)-'a'+10) - case 'A' <= c && c <= 'F': - enc.stateBuf = append(enc.stateBuf, byte(c)-'A'+10) + case ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'): + par.replaceState(nxt, false) + return typ, nil default: - return &SyntaxError{fmt.Sprintf("string unicode sequence: unexpected character: %c", c), enc.inputPos} - } - if len(enc.stateBuf) == 4 { - enc.replaceState(enc.stateInString, false) - c := 0 | - rune(enc.stateBuf[0])<<12 | - rune(enc.stateBuf[1])<<8 | - rune(enc.stateBuf[2])<<4 | - rune(enc.stateBuf[3])<<0 - enc.stateBuf = enc.stateBuf[:0] - return enc.emit(writeStringChar(enc.Out, c, true, enc.UnicodeEscape)) + return RuneTypeError, fmt.Errorf("string unicode sequence: unexpected character: %q", c) } - return nil +} +func (par *parser) stateInEscU(c rune) (RuneType, error) { + return par._stateInEscU(c, RuneTypeStringEscUA, par.stateInEscUA) +} +func (par *parser) stateInEscUA(c rune) (RuneType, error) { + return par._stateInEscU(c, RuneTypeStringEscUB, par.stateInEscUB) +} +func (par *parser) stateInEscUB(c rune) (RuneType, error) { + return par._stateInEscU(c, RuneTypeStringEscUC, par.stateInEscUC) +} +func (par *parser) stateInEscUC(c rune) (RuneType, error) { + return par._stateInEscU(c, RuneTypeStringEscUD, par.stateInString) } -// number ////////////////////////////////////////////////////////////////////////////////////////// +// state: number /////////////////////////////////////////////////////////////////////////////////// // Here's a flattened drawing of the syntax diagram from www.json.org : // @@ -415,7 +299,7 @@ func (enc *ReEncoder) stateInUnicode(c rune) error { // ╰─"+"─╯ // // Now here it is slightly redrawn, and with each distinct state our -// decoder can be in marked with a single-capital-letter: +// parser can be in marked with a single-capital-letter: // // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] // >─A─╮───────╭──╮─"0"─────────C─╭─────────╮──────────────────╭─────────╮──────────────────────────╭─> @@ -427,172 +311,150 @@ func (enc *ReEncoder) stateInUnicode(c rune) error { // ╰─"+"─╯ // // Which state we're at is the 'X' in 'stateNumberX'. -// -// Besides just traversing that, there are a few compressions we want to make: -// -// - trim trailing 0s from fraction the (but don't remove the -// fraction if it's all 0s); do this by making the F state a little -// special. This requires a little more state, because when we -// encounter the 0 we don't yet know if it's trailing. So, store -// the number of maybe-trailing zeros in enc.stateBuf[0]; if that -// reaches 255, then bleed over to enc.stateBuf[1] and so on. -// -// - trim leading 0s from the exponent (but don't remove the exponent -// if it's all 0s); do this by making the H state a little special. -// Record whether we've seen a non-zero digit in enc.stateBuf[0] -// (0=false, 1=true). -// integer-part //////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateNumberA(c rune) error { // start +// number: integer-part //////////////////////////////////////////////////////// +func (par *parser) stateNumberA(c rune) (RuneType, error) { // start switch c { case '-': - enc.replaceState(enc.stateNumberB, true) + par.replaceState(par.stateNumberB, true) + return RuneTypeNumberInt, nil case '0': - enc.replaceState(enc.stateNumberC, true) + par.replaceState(par.stateNumberC, true) + return RuneTypeNumberInt, nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': - enc.replaceState(enc.stateNumberD, true) + par.replaceState(par.stateNumberD, true) + return RuneTypeNumberInt, nil default: - return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) } - return enc.emitByte(byte(c)) } -func (enc *ReEncoder) stateNumberB(c rune) error { // got a leading "-" +func (par *parser) stateNumberB(c rune) (RuneType, error) { // got a leading "-" switch c { case '0': - enc.replaceState(enc.stateNumberC, true) + par.replaceState(par.stateNumberC, true) + return RuneTypeNumberInt, nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': - enc.replaceState(enc.stateNumberD, true) + par.replaceState(par.stateNumberD, true) + return RuneTypeNumberInt, nil default: - return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) } - return enc.emitByte(byte(c)) } -func (enc *ReEncoder) stateNumberC(c rune) error { // ready for the fraction or exponent part to start +func (par *parser) stateNumberC(c rune) (RuneType, error) { // ready for the fraction or exponent part to start switch c { case '.': - enc.replaceState(enc.stateNumberE, true) - return enc.emitByte('.') + par.replaceState(par.stateNumberE, true) + return RuneTypeNumberFrac, nil case 'e', 'E': - enc.replaceState(enc.stateNumberG, true) - enc.stateBuf = append(enc.stateBuf[:0], 0) - return enc.emitByte('e') + par.replaceState(par.stateNumberG, true) + return RuneTypeNumberExp, nil default: - enc.popState() - return enc.state(c) + par.popState() + return par.state(c) } } -func (enc *ReEncoder) stateNumberD(c rune) error { // in the integer part +func (par *parser) stateNumberD(c rune) (RuneType, error) { // in the integer part switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - return enc.emitByte(byte(c)) + return RuneTypeNumberInt, nil case '.': - enc.replaceState(enc.stateNumberE, true) - return enc.emitByte('.') + par.replaceState(par.stateNumberE, true) + return RuneTypeNumberFrac, nil case 'e', 'E': - enc.replaceState(enc.stateNumberG, true) - enc.stateBuf = append(enc.stateBuf[:0], 0) - return enc.emitByte('e') + par.replaceState(par.stateNumberG, true) + return RuneTypeNumberExp, nil default: - enc.popState() - return enc.state(c) + par.popState() + return par.state(c) } } -// fraction-part /////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateNumberE(c rune) error { // got a ".", ready to read a number for the fraction part +// number: fraction-part /////////////////////////////////////////////////////// +func (par *parser) stateNumberE(c rune) (RuneType, error) { // got a ".", ready to read a number for the fraction part switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': - enc.replaceState(enc.stateNumberF, true) - return enc.emitByte(byte(c)) + par.replaceState(par.stateNumberF, true) + return RuneTypeNumberFrac, nil default: - return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("number: unexpected character: %q", c) } } -func (enc *ReEncoder) stateNumberF(c rune) error { // in the fraction part +func (par *parser) stateNumberF(c rune) (RuneType, error) { // in the fraction part switch c { - case '0': - if len(enc.stateBuf) > 0 && enc.stateBuf[len(enc.stateBuf)-1] < 255 { - enc.stateBuf[len(enc.stateBuf)-1]++ - } else { - enc.stateBuf = append(enc.stateBuf, 1) - } - return nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - for len(enc.stateBuf) > 0 { - if err := enc.emitByte('0'); err != nil { - return err - } - if enc.stateBuf[len(enc.stateBuf)-1] == 1 { - enc.stateBuf = enc.stateBuf[:len(enc.stateBuf)-1] - } else { - enc.stateBuf[len(enc.stateBuf)-1]-- - } - } - return enc.emitByte(byte(c)) + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return RuneTypeNumberFrac, nil case 'e', 'E': - enc.replaceState(enc.stateNumberG, true) - enc.stateBuf = append(enc.stateBuf[:0], 0) - return enc.emitByte('e') + par.replaceState(par.stateNumberG, true) + return RuneTypeNumberExp, nil default: - enc.stateBuf = enc.stateBuf[:0] - enc.popState() - return enc.state(c) + par.popState() + return par.state(c) } } -// exponent-part /////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateNumberG(c rune) error { // got a leading "e" +// number: exponent-part /////////////////////////////////////////////////////// +func (par *parser) stateNumberG(c rune) (RuneType, error) { // got a leading "e" switch c { - case '-', '+': - enc.replaceState(enc.stateNumberH, true) - return enc.emitByte(byte(c)) - case '0': - enc.replaceState(enc.stateNumberH, true) - return nil - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - enc.replaceState(enc.stateNumberH, true) - enc.stateBuf[0] = 1 - return enc.emitByte(byte(c)) + case '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + par.replaceState(par.stateNumberH, true) + return RuneTypeNumberExp, nil default: - enc.stateBuf = enc.stateBuf[:0] - return &SyntaxError{fmt.Sprintf("number: unexpected character: %c", c), enc.inputPos} + return RuneTypeError, fmt.Errorf("number: unexpected character: %c", c) } } -func (enc *ReEncoder) stateNumberH(c rune) error { // in the exponent's number part +func (par *parser) stateNumberH(c rune) (RuneType, error) { // in the exponent's number part switch c { - case '0': - if enc.stateBuf[0] == 0 { - return nil - } - return enc.emitByte('0') - case '1', '2', '3', '4', '5', '6', '7', '8', '9': - enc.stateBuf[0] = 1 - return enc.emitByte(byte(c)) + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + return RuneTypeNumberExp, nil default: - if enc.stateBuf[0] == 0 { - if err := enc.emitByte('0'); err != nil { - return err - } - } - enc.stateBuf = enc.stateBuf[:0] - enc.popState() - return enc.state(c) + par.popState() + return par.state(c) } } -// literals //////////////////////////////////////////////////////////////////////////////////////// +// state: literals ///////////////////////////////////////////////////////////////////////////////// -func (enc *ReEncoder) stateInTrue(c rune) error { return enc._stateInLiteral(c, "true") } -func (enc *ReEncoder) stateInFalse(c rune) error { return enc._stateInLiteral(c, "false") } -func (enc *ReEncoder) stateInNull(c rune) error { return enc._stateInLiteral(c, "null") } -func (enc *ReEncoder) _stateInLiteral(c rune, full string) error { - if c != rune(full[len(enc.stateBuf)]) { - return &SyntaxError{fmt.Sprintf("%s: unexpected character: %c", full, c), enc.inputPos} +func (par *parser) l(c rune, full string, exp rune, typ RuneType, nxt parseState) (RuneType, error) { + if c != exp { + return RuneTypeError, fmt.Errorf("%s: unexpected character: %q", full, c) } - enc.stateBuf = append(enc.stateBuf, byte(c)) - if len(enc.stateBuf) == len(full) { - enc.stateBuf = enc.stateBuf[:0] - enc.popState() + if nxt == nil { + par.popState() + } else { + par.replaceState(nxt, false) } - return enc.emitByte(byte(c)) + return typ, nil +} + +func (par *parser) stateTrueT(c rune) (RuneType, error) { + return par.l(c, "true", 'r', RuneTypeTrueR, par.stateTrueR) +} +func (par *parser) stateTrueR(c rune) (RuneType, error) { + return par.l(c, "true", 'u', RuneTypeTrueU, par.stateTrueU) +} +func (par *parser) stateTrueU(c rune) (RuneType, error) { + return par.l(c, "true", 'e', RuneTypeTrueR, nil) +} + +func (par *parser) stateFalseF(c rune) (RuneType, error) { + return par.l(c, "false", 'a', RuneTypeFalseA, par.stateFalseA) +} +func (par *parser) stateFalseA(c rune) (RuneType, error) { + return par.l(c, "false", 'l', RuneTypeFalseL, par.stateFalseL) +} +func (par *parser) stateFalseL(c rune) (RuneType, error) { + return par.l(c, "false", 's', RuneTypeFalseS, par.stateFalseS) +} +func (par *parser) stateFalseS(c rune) (RuneType, error) { + return par.l(c, "false", 'e', RuneTypeFalseE, nil) +} + +func (par *parser) stateNullN(c rune) (RuneType, error) { + return par.l(c, "null", 'u', RuneTypeNullU, par.stateNullU) +} +func (par *parser) stateNullU(c rune) (RuneType, error) { + return par.l(c, "null", 'l', RuneTypeNullL1, par.stateNullL) +} +func (par *parser) stateNullL(c rune) (RuneType, error) { + return par.l(c, "null", 'l', RuneTypeNullL2, nil) } -- cgit v1.2.3-54-g00ecf