// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package jsonparse import ( "errors" "fmt" "io" iofs "io/fs" "strings" ) var ErrParserExceededMaxDepth = errors.New("exceeded max depth") type InvalidCharacterError struct { Char rune IsRune bool Where string } func (e *InvalidCharacterError) Error() string { if e.IsRune { return fmt.Sprintf("invalid character %q %s", e.Char, e.Where) } else { return fmt.Sprintf("invalid character '\\x%02x' %s", e.Char, e.Where) } } func isHex(c rune) bool { return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F') } // RuneType is the classification of a rune when parsing JSON input. // A Parser, rather than grouping runes into tokens and classifying // tokens, classifies runes directly. type RuneType uint8 const ( RuneTypeError RuneType = iota RuneTypeSpace // whitespace RuneTypeObjectBeg // '{' RuneTypeObjectColon // ':' RuneTypeObjectComma // ',' RuneTypeObjectEnd // '}' RuneTypeArrayBeg // '[' RuneTypeArrayComma // ',' RuneTypeArrayEnd // ']' RuneTypeStringBeg // opening '"' RuneTypeStringChar // normal character RuneTypeStringEsc // backslash RuneTypeStringEsc1 // single-char after a backslash RuneTypeStringEscU // \uABCD : u RuneTypeStringEscUA // \uABCD : A RuneTypeStringEscUB // \uABCD : B RuneTypeStringEscUC // \uABCD : C RuneTypeStringEscUD // \uABCD : D RuneTypeStringEnd // closing '"' RuneTypeNumberIntNeg RuneTypeNumberIntZero // leading zero only; non-leading zeros are IntDig, not IntZero RuneTypeNumberIntDig RuneTypeNumberFracDot RuneTypeNumberFracDig RuneTypeNumberExpE RuneTypeNumberExpSign RuneTypeNumberExpDig RuneTypeTrueT RuneTypeTrueR RuneTypeTrueU RuneTypeTrueE RuneTypeFalseF RuneTypeFalseA RuneTypeFalseL RuneTypeFalseS RuneTypeFalseE RuneTypeNullN RuneTypeNullU RuneTypeNullL1 RuneTypeNullL2 RuneTypeEOF // Not a real rune type, but used as a stack state. runeTypeAny ) // GoString implements fmt.GoStringer. // //nolint:dupl // False positive due to similarly shaped AST. func (t RuneType) GoString() string { str, ok := map[RuneType]string{ RuneTypeError: "RuneTypeError", RuneTypeSpace: "RuneTypeSpace", RuneTypeObjectBeg: "RuneTypeObjectBeg", RuneTypeObjectColon: "RuneTypeObjectColon", RuneTypeObjectComma: "RuneTypeObjectComma", RuneTypeObjectEnd: "RuneTypeObjectEnd", RuneTypeArrayBeg: "RuneTypeArrayBeg", RuneTypeArrayComma: "RuneTypeArrayComma", RuneTypeArrayEnd: "RuneTypeArrayEnd", RuneTypeStringBeg: "RuneTypeStringBeg", RuneTypeStringChar: "RuneTypeStringChar", RuneTypeStringEsc: "RuneTypeStringEsc", RuneTypeStringEsc1: "RuneTypeStringEsc1", RuneTypeStringEscU: "RuneTypeStringEscU", RuneTypeStringEscUA: "RuneTypeStringEscUA", RuneTypeStringEscUB: "RuneTypeStringEscUB", RuneTypeStringEscUC: "RuneTypeStringEscUC", RuneTypeStringEscUD: "RuneTypeStringEscUD", RuneTypeStringEnd: "RuneTypeStringEnd", RuneTypeNumberIntNeg: "RuneTypeNumberIntNeg", RuneTypeNumberIntZero: "RuneTypeNumberIntZero", RuneTypeNumberIntDig: "RuneTypeNumberIntDig", RuneTypeNumberFracDot: "RuneTypeNumberFracDot", RuneTypeNumberFracDig: "RuneTypeNumberFracDig", RuneTypeNumberExpE: "RuneTypeNumberExpE", RuneTypeNumberExpSign: "RuneTypeNumberExpSign", RuneTypeNumberExpDig: "RuneTypeNumberExpDig", RuneTypeTrueT: "RuneTypeTrueT", RuneTypeTrueR: "RuneTypeTrueR", RuneTypeTrueU: "RuneTypeTrueU", RuneTypeTrueE: "RuneTypeTrueE", RuneTypeFalseF: "RuneTypeFalseF", RuneTypeFalseA: "RuneTypeFalseA", RuneTypeFalseL: "RuneTypeFalseL", RuneTypeFalseS: "RuneTypeFalseS", RuneTypeFalseE: "RuneTypeFalseE", RuneTypeNullN: "RuneTypeNullN", RuneTypeNullU: "RuneTypeNullU", RuneTypeNullL1: "RuneTypeNullL1", RuneTypeNullL2: "RuneTypeNullL2", RuneTypeEOF: "RuneTypeEOF", runeTypeAny: "runeTypeAny", }[t] if ok { return str } return fmt.Sprintf("RuneType(%d)", t) } // String implements fmt.Stringer. // //nolint:dupl // False positive due to similarly shaped AST. func (t RuneType) String() string { str, ok := map[RuneType]string{ RuneTypeError: "x", RuneTypeSpace: " ", RuneTypeObjectBeg: "{", RuneTypeObjectColon: ":", RuneTypeObjectComma: "o", RuneTypeObjectEnd: "}", RuneTypeArrayBeg: "[", RuneTypeArrayComma: "a", RuneTypeArrayEnd: "]", RuneTypeStringBeg: "\"", RuneTypeStringChar: "c", RuneTypeStringEsc: "\\", RuneTypeStringEsc1: "b", RuneTypeStringEscU: "u", RuneTypeStringEscUA: "A", RuneTypeStringEscUB: "B", RuneTypeStringEscUC: "C", RuneTypeStringEscUD: "D", RuneTypeStringEnd: "ยป", RuneTypeNumberIntNeg: "-", RuneTypeNumberIntZero: "0", RuneTypeNumberIntDig: "1", RuneTypeNumberFracDot: ".", RuneTypeNumberFracDig: "2", RuneTypeNumberExpE: "e", RuneTypeNumberExpSign: "+", RuneTypeNumberExpDig: "3", RuneTypeTrueT: "๐•ฅ", // double-struck RuneTypeTrueR: "๐•ฃ", RuneTypeTrueU: "๐•ฆ", RuneTypeTrueE: "๐•–", RuneTypeFalseF: "๐”ฃ", // fraktur RuneTypeFalseA: "๐”ž", RuneTypeFalseL: "๐”ฉ", RuneTypeFalseS: "๐”ฐ", RuneTypeFalseE: "๐”ข", RuneTypeNullN: "โ“", // circled RuneTypeNullU: "โ“ค", RuneTypeNullL1: "โ“›", RuneTypeNullL2: "โ“", // +uppercase RuneTypeEOF: "$", runeTypeAny: "?", }[t] if ok { return str } return fmt.Sprintf("<%d>", t) } func (t RuneType) JSONType() string { return map[RuneType]string{ RuneTypeObjectBeg: "object", RuneTypeArrayBeg: "array", RuneTypeStringBeg: "string", RuneTypeNumberIntNeg: "number", RuneTypeNumberIntZero: "number", RuneTypeNumberIntDig: "number", RuneTypeTrueT: "true", RuneTypeFalseF: "false", RuneTypeNullN: "null", RuneTypeEOF: "eof", }[t] } // IsNumber returns whether the RuneType is one of the // RuneTypeNumberXXX values. func (t RuneType) IsNumber() bool { return RuneTypeNumberIntNeg <= t && t <= RuneTypeNumberExpDig } // Parser is the low-level JSON parser that powers both *Decoder and // *ReEncoder. type Parser struct { // Setting MaxError to a value greater than 0 causes // HandleRune to return ErrParserExceededMaxDepth if // objects/arrays become nested more deeply than this. MaxDepth int initialized bool err error closed bool // We reuse RuneTypes to store the stack. The base idea is: // stack items are "the most recently read stack-relevant // RuneType". // // The stack starts out with the special pseudo-RuneType // `runeTypeAny` that means we're willing to accept any // element type; an empty stack means that we have reached the // end of the top-level element and should accept no more // input except for whitespace. // // The "normal" stack-relevant RuneTypes are: // // "\uABC for strings // -01.2e+3 for numbers // ๐•ฅ๐•ฃ๐•ฆ for "true" // ๐”ฃ๐”ž๐”ฉ๐”ฐ for "false" // โ“โ“คโ“› for "null" // // Objects and arrays break the "most recently read RuneType" // rule; they need some special assignments: // // { object: waiting for key to start or '}' // ยป object: reading key / waiting for colon // o object: reading value / waiting for ',' or '}' // // [ array: waiting for item to start or ']' // a array: reading item / waiting for ',' or ']' // // Within each element type, the stack item is replaced, not pushed. // // (Keep each of these examples in-sync with parse_test.go.) // // For example, given the input string // // {"x":"y","a":"b"} // // The stack would be // // stack processed // ? // { { // ยป" {" // ยป" {"x // ยป {"x" // o? {"x": // o" {"x":" // o" {"x":"y // o {"x":"y" // { {"x":"y", // ยป" {"x":"y"," // ยป" {"x":"y","a // ยป {"x":"y","a" // o? {"x":"y","a": // o" {"x":"y","a":" // o" {"x":"y","a":"b // o {"x":"y","a":"b" // {"x":"y","a":"b"} // // Or, given the input string // // ["x","y"] // // The stack would be // // stack processed // ? // [ [ // a" [" // a" ["x // a ["x" // a? ["x", // a" ["x"," // a" ["x","y // a ["x","y" // ["x","y"] stack []RuneType barriers []barrier } type barrier struct { allowWS bool stack []RuneType } func (par *Parser) init() { if !par.initialized { par.initialized = true par.pushState(runeTypeAny) } } func (par *Parser) pushState(state RuneType) RuneType { par.stack = append(par.stack, state) return state } func (par *Parser) replaceState(state RuneType) RuneType { par.stack[len(par.stack)-1] = state return state } func (par *Parser) popState() { par.stack = par.stack[:len(par.stack)-1] } func (par *Parser) stackString() string { par.init() var buf strings.Builder for _, s := range par.stack { buf.WriteString(s.String()) } return buf.String() } func (par *Parser) depth() int { n := len(par.stack) for _, barrier := range par.barriers { n += len(barrier.stack) } return n } func (par *Parser) StackIsEmpty() bool { if len(par.barriers) > 0 { return false } if len(par.stack) == 0 { return true } return len(par.stack) == 1 && par.stack[0] == runeTypeAny } func (par *Parser) StackSize() int { return len(par.stack) } // Reset all Parser state. func (par *Parser) Reset() { *par = Parser{ MaxDepth: par.MaxDepth, } } // PushReadBarrier causes the parser to emit EOF once the end of the // element that is started by the current top-of-stack is reached // (which means that it will reject whitespace between the end of the // element and EOF), until this is un-done with PopBarrier. It // essentially turns the parser in to a sub-parser. // // PushReadBarrier may only be called at the beginning of an element, // whether that be // // - runeTypeAny // - RuneTypeObjectBeg // - RuneTypeArrayBeg // - RuneTypeStringBeg // - RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig // - RuneTypeTrueT // - RuneTypeFalseF // - RuneTypeNullN func (par *Parser) PushReadBarrier() { // Sanity checking. par.init() if len(par.stack) == 0 { panic(errors.New("should not happen: illegal PushReadBarrier call: empty stack")) } curState := par.stack[len(par.stack)-1] switch curState { case runeTypeAny, RuneTypeObjectBeg, RuneTypeArrayBeg, RuneTypeStringBeg, RuneTypeNumberIntNeg, RuneTypeNumberIntZero, RuneTypeNumberIntDig, RuneTypeTrueT, RuneTypeFalseF, RuneTypeNullN: // OK default: panic(fmt.Errorf("should not happen: illegal PushReadBarrier call: %q", curState)) } // Actually push. par.barriers = append(par.barriers, barrier{ allowWS: false, stack: par.stack[:len(par.stack)-1], }) par.stack = []RuneType{curState} } // PushWriteBarrier causes the parser to emit EOF once the end of the // about-to-start element is reached and any trailing whitespace has // been exhausted, until this is un-done with PopBarrier. It // essentially turns the parser in to a sub-parser. // // PushWriteBarrier may only be called at the places where an element // of any type may start: // // - runeTypeAny for top-level and object-value elements // - RuneTypeArrayBeg for array-item elements // // PushWriteBarrier signals intent to write an element; if it is // called in a place where an element is optional (at the beginning of // an array), it becomes a syntax error to not write the element. func (par *Parser) PushWriteBarrier() { par.init() if len(par.stack) == 0 { panic(errors.New("should not happen: illegal PushWriteBarrier call: empty stack")) } switch par.stack[len(par.stack)-1] { case runeTypeAny: par.popState() par.barriers = append(par.barriers, barrier{ allowWS: true, stack: par.stack, }) par.stack = []RuneType{runeTypeAny} case RuneTypeArrayBeg: par.replaceState(RuneTypeArrayComma) par.barriers = append(par.barriers, barrier{ allowWS: true, stack: par.stack, }) par.stack = []RuneType{runeTypeAny} default: panic(fmt.Errorf("should not happen: illegal PushWriteBarrier call: %q", par.stack[len(par.stack)-1])) } } // PopBarrier reverses a call to PushReadBarrier or PushWriteBarrier. func (par *Parser) PopBarrier() { if len(par.barriers) == 0 { panic(errors.New("should not happen: illegal PopBarrier call: empty barrier stack")) } barrier := par.barriers[len(par.barriers)-1] par.barriers = par.barriers[:len(par.barriers)-1] par.closed = false par.stack = append(barrier.stack, par.stack...) } // HandleEOF feeds EOF to the Parser. The returned RuneType is either // RuneTypeEOF or RuneTypeError. // // An error is returned if and only if the RuneType is RuneTypeError. // Returns io/fs.ErrClosed if .HandleEOF() has previously been called // (and .Reset() has not been called since). // // Once RuneTypeError or RuneTypeEOF has been returned, it will keep // being returned from both .HandleRune(c) and .HandleEOF() until // .Reset() is called. // // RuneTypeEOF indicates that a complete JSON document has been read. func (par *Parser) HandleEOF() (RuneType, error) { if par.closed { return RuneTypeError, iofs.ErrClosed } defer func() { par.closed = true }() if par.err != nil { return RuneTypeError, par.err } par.init() switch len(par.stack) { case 0: return RuneTypeEOF, nil case 1: switch { case par.stack[0].IsNumber(): if _, err := par.HandleRune('\n', true); err == nil { return RuneTypeEOF, nil } case par.stack[0] == runeTypeAny: par.err = io.EOF return RuneTypeError, par.err } fallthrough default: par.err = io.ErrUnexpectedEOF return RuneTypeError, par.err } } // IsAtBarrier returns whether a read-barrier has been reached and the // next HandleRune call would definitely return RuneTypeEOF. func (par *Parser) IsAtBarrier() bool { return par.initialized && // HandleRune wouldn't return early with an error. !par.closed && par.err == nil && // The current (sub-)parser has reached its end, and len(par.stack) == 0 && // there is a barrier, and len(par.barriers) > 0 && // that barrier would definitely return RuneTypeEOF. !par.barriers[len(par.barriers)-1].allowWS } // HandleRune feeds a Unicode rune to the Parser. // // An error is returned if and only if the RuneType is RuneTypeError. // Returns io/fs.ErrClosed if .HandleEOF() has previously been called // (and .Reset() has not been called since). // // Once RuneTypeError or RuneTypeEOF has been returned, it will keep // being returned from both .HandleRune(c) and .HandleEOF() until // .Reset() is called. // // RuneTypeEOF indicates that the rune cannot be appended to the JSON // document; a new JSON document must be started in order to process // that rune. func (par *Parser) HandleRune(c rune, isRune bool) (RuneType, error) { if par.closed { return RuneTypeError, iofs.ErrClosed } if par.err != nil { return RuneTypeError, par.err } par.init() if len(par.stack) == 0 { if len(par.barriers) == 0 || par.barriers[len(par.barriers)-1].allowWS { switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil } } if len(par.barriers) > 0 { return RuneTypeEOF, nil } else { return RuneTypeError, &InvalidCharacterError{c, isRune, "after top-level value"} } } switch par.stack[len(par.stack)-1] { // any ///////////////////////////////////////////////////////////////////////////////////// case runeTypeAny: switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case '{': if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeObjectBeg), nil case '[': if par.MaxDepth > 0 && par.depth() > par.MaxDepth { return RuneTypeError, ErrParserExceededMaxDepth } return par.replaceState(RuneTypeArrayBeg), nil case '"': return par.replaceState(RuneTypeStringBeg), nil case '-': return par.replaceState(RuneTypeNumberIntNeg), nil case '0': return par.replaceState(RuneTypeNumberIntZero), nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberIntDig), nil case 't': return par.replaceState(RuneTypeTrueT), nil case 'f': return par.replaceState(RuneTypeFalseF), nil case 'n': return par.replaceState(RuneTypeNullN), nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of value"} } // object ////////////////////////////////////////////////////////////////////////////////// case RuneTypeObjectBeg: // waiting for key to start or '}' switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case '"': par.replaceState(RuneTypeStringEnd) return par.pushState(RuneTypeStringBeg), nil case '}': par.popState() return RuneTypeObjectEnd, nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "looking for beginning of object key string"} } case RuneTypeStringEnd: // waiting for ':' switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ':': par.replaceState(RuneTypeObjectComma) par.pushState(runeTypeAny) return RuneTypeObjectColon, nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key"} } case RuneTypeObjectComma: // waiting for ',' or '}' switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ',': par.replaceState(RuneTypeObjectBeg) return RuneTypeObjectComma, nil case '}': par.popState() return RuneTypeObjectEnd, nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "after object key:value pair"} } // array /////////////////////////////////////////////////////////////////////////////////// case RuneTypeArrayBeg: // waiting for item to start or ']' switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ']': par.popState() return RuneTypeArrayEnd, nil default: par.replaceState(RuneTypeArrayComma) par.pushState(runeTypeAny) return par.HandleRune(c, isRune) } case RuneTypeArrayComma: // waiting for ',' or ']' switch c { case 0x0020, 0x000A, 0x000D, 0x0009: return RuneTypeSpace, nil case ',': par.pushState(runeTypeAny) return RuneTypeArrayComma, nil case ']': par.popState() return RuneTypeArrayEnd, nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "after array element"} } // string ////////////////////////////////////////////////////////////////////////////////// case RuneTypeStringBeg: // waiting for char or '"' switch { case c == '\\': return par.replaceState(RuneTypeStringEsc), nil case c == '"': par.popState() return RuneTypeStringEnd, nil case 0x0020 <= c && c <= 0x10FFFF: return RuneTypeStringChar, nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "in string literal"} } case RuneTypeStringEsc: // waiting for escape char switch c { case '"', '\\', '/', 'b', 'f', 'n', 'r', 't': par.replaceState(RuneTypeStringBeg) return RuneTypeStringEsc1, nil case 'u': return par.replaceState(RuneTypeStringEscU), nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "in string escape code"} } case RuneTypeStringEscU: if !isHex(c) { return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) } return par.replaceState(RuneTypeStringEscUA), nil case RuneTypeStringEscUA: if !isHex(c) { return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) } return par.replaceState(RuneTypeStringEscUB), nil case RuneTypeStringEscUB: if !isHex(c) { return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) } return par.replaceState(RuneTypeStringEscUC), nil case RuneTypeStringEscUC: if !isHex(c) { return RuneTypeError, fmt.Errorf(`invalid character %q in \u hexadecimal character escape`, c) } par.replaceState(RuneTypeStringBeg) return RuneTypeStringEscUD, nil // number ////////////////////////////////////////////////////////////////////////////////// // // Here's a flattened drawing of the syntax diagram from www.json.org : // // [------------ integer ----------][-- fraction ---][-------- exponent -------] // >โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ // โ•ฐโ”€"-"โ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ฏโ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"."โ”€โ•ญdigitโ•ฎโ”€โ•ฏ โ•ฐโ”€"e"โ”€โ•ญโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ•ฎโ”€โ•ฏ // โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ โ”‚ โ•ฐโ”€โ”€<โ”€โ”€โ•ฏ // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ // โ”‚ โ”‚ // โ•ฐโ”€"+"โ”€โ•ฏ // // Now here it is slightly redrawn, and with each distinct state our // parser can be in marked with a single-capital-letter: // // [-------------- integer ------------][--------- fraction --------][--------- exponent ---------] // >โ”€Aโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ•ฎโ”€"0"โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€Cโ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ•ญโ”€> // โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ โ”‚ // โ•ฐโ”€"-"โ”€Bโ”€โ•ฏ โ•ฐโ”€digit 1-9โ”€โ•ญโ”€Dโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"."โ”€Eโ”€digitโ”€โ”€โ•ญโ”€Fโ”€โ•ฏโ”€digitโ•ฎ โ•ฐโ”€"e"โ”€โ•ญโ”€Gโ”€โ•ฎโ”€โ”€โ”€โ”€โ”€โ•ญโ”€โ•ญdigitโ”€Iโ”€โ•ฏ // โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ”€โ”€โ•ฏ โ”‚ โ”‚ โ”‚ H โ•ฐโ”€โ”€โ”€โ”€<โ”€โ”€โ”€โ•ฏ // โ•ฐโ”€"E"โ”€โ•ฏ โ•ฐโ”€"-"โ”€โ•ฏ // โ”‚ โ”‚ // โ•ฐโ”€"+"โ”€โ•ฏ // // You may notice that each of these states may be uniquely identified // by the last-read RuneType: // // A = (nothing yet) // B = IntNeg // C = IntZero // D = IntDig // E = FracDot // F = FracDig // G = ExpE // H = ExpSign // I = ExpDig // // The 'A' state is part of the runeTypeAny case above, and // the remainder follow: case RuneTypeNumberIntNeg: // B switch c { case '0': return par.replaceState(RuneTypeNumberIntZero), nil case '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberIntDig), nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "in numeric literal"} } case RuneTypeNumberIntZero: // C switch c { case '.': return par.replaceState(RuneTypeNumberFracDot), nil case 'e', 'E': return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() return par.HandleRune(c, isRune) } case RuneTypeNumberIntDig: // D switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberIntDig), nil case '.': return par.replaceState(RuneTypeNumberFracDot), nil case 'e', 'E': return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() return par.HandleRune(c, isRune) } case RuneTypeNumberFracDot: // E switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberFracDig), nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "after decimal point in numeric literal"} } case RuneTypeNumberFracDig: // F switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberFracDig), nil case 'e', 'E': return par.replaceState(RuneTypeNumberExpE), nil default: par.popState() return par.HandleRune(c, isRune) } case RuneTypeNumberExpE: // G switch c { case '-', '+': return par.replaceState(RuneTypeNumberExpSign), nil case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberExpDig), nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"} } case RuneTypeNumberExpSign: // H switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberExpDig), nil default: return RuneTypeError, &InvalidCharacterError{c, isRune, "in exponent of numeric literal"} } case RuneTypeNumberExpDig: // I switch c { case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': return par.replaceState(RuneTypeNumberExpDig), nil default: par.popState() return par.HandleRune(c, isRune) } // literals //////////////////////////////////////////////////////////////////////////////// // true case RuneTypeTrueT: return par.expectRune(c, isRune, 'r', RuneTypeTrueR, "true", false) case RuneTypeTrueR: return par.expectRune(c, isRune, 'u', RuneTypeTrueU, "true", false) case RuneTypeTrueU: return par.expectRune(c, isRune, 'e', RuneTypeTrueE, "true", true) // false case RuneTypeFalseF: return par.expectRune(c, isRune, 'a', RuneTypeFalseA, "false", false) case RuneTypeFalseA: return par.expectRune(c, isRune, 'l', RuneTypeFalseL, "false", false) case RuneTypeFalseL: return par.expectRune(c, isRune, 's', RuneTypeFalseS, "false", false) case RuneTypeFalseS: return par.expectRune(c, isRune, 'e', RuneTypeFalseE, "false", true) // null case RuneTypeNullN: return par.expectRune(c, isRune, 'u', RuneTypeNullU, "null", false) case RuneTypeNullU: return par.expectRune(c, isRune, 'l', RuneTypeNullL1, "null", false) case RuneTypeNullL1: return par.expectRune(c, isRune, 'l', RuneTypeNullL2, "null", true) default: panic(fmt.Errorf(`should not happen: invalid stack: "%s"`, par.stackString())) } } func (par *Parser) expectRune(c rune, isRune bool, exp rune, typ RuneType, context string, pop bool) (RuneType, error) { if c != exp { return RuneTypeError, &InvalidCharacterError{c, isRune, fmt.Sprintf("in literal %s (expecting %q)", context, exp)} } if pop { par.popState() return typ, nil } else { return par.replaceState(typ), nil } }