// Copyright (C) 2022-2023 Luke Shumaker // // SPDX-License-Identifier: GPL-2.0-or-later package lowmemjson import ( "io" "unicode/utf8" "git.lukeshu.com/go/lowmemjson/internal/jsonparse" ) type runeTypeScanner struct { // everything that is not "initialized by constructor" starts // out as the zero value. inner io.RuneScanner // initialized by constructor parser jsonparse.Parser // initialized by constructor offset int64 rTypeOK bool repeat bool rRune rune rSize int rIsRune bool rType jsonparse.RuneType rErr error } // The returned error is a *ReadError, a *SyntaxError, or nil. // An EOF condition is represented as one of: // // end of value: (_, 0, RuneTypeEOF, nil) // end of file in middle of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.ErrUnexepctedEOF}) // end of file at start of value: (_, 0, RuneTypeError, &DecodeSyntaxError{Offset: offset: Err: io.EOF}) func (sc *runeTypeScanner) ReadRuneType() (rune, int, jsonparse.RuneType, error) { switch { case sc.rTypeOK && (sc.rType == jsonparse.RuneTypeError || sc.rType == jsonparse.RuneTypeEOF): // do nothing case sc.repeat: sc.offset += int64(sc.rSize) _, _, _ = sc.inner.ReadRune() case sc.parser.IsAtBarrier(): sc.rTypeOK = true sc.rType = jsonparse.RuneTypeEOF sc.rRune = 0 sc.rSize = 0 sc.rErr = nil default: sc.rTypeOK = true again: var err error sc.rRune, sc.rSize, err = sc.inner.ReadRune() sc.offset += int64(sc.rSize) switch err { case nil: sc.rIsRune = true if sc.rRune == utf8.RuneError && sc.rSize == 1 { if bs, ok := sc.inner.(io.ByteScanner); ok { _ = bs.UnreadByte() // UnreadRune doesn't back up the ReadByte-pos b, _ := bs.ReadByte() _ = bs.UnreadByte() _, _, _ = sc.inner.ReadRune() sc.rRune = rune(b) sc.rIsRune = false } } sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune) if err != nil { sc.rErr = &DecodeSyntaxError{ Offset: sc.offset - int64(sc.rSize), Err: err, } } else { sc.rErr = nil } switch sc.rType { case jsonparse.RuneTypeSpace: goto again case jsonparse.RuneTypeEOF: sc.offset -= int64(sc.rSize) sc.rRune = 0 sc.rSize = 0 _ = sc.inner.UnreadRune() } case io.EOF: sc.rType, err = sc.parser.HandleEOF() if err != nil { sc.rErr = &DecodeSyntaxError{ Offset: sc.offset, Err: err, } } else { sc.rErr = nil } default: sc.rType = 0 sc.rErr = &DecodeReadError{ Offset: sc.offset, Err: err, } } } sc.repeat = false if sc.rSize > 0 && !sc.rIsRune { return utf8.RuneError, sc.rSize, sc.rType, sc.rErr } return sc.rRune, sc.rSize, sc.rType, sc.rErr } // UnreadRune undoes a call to .ReadRuneType(). // // If the last call to .ReadRuneType() has already been unread, or if // that call returned a rune with size 0, then ErrInvalidUnreadRune is // returned. Otherwise, nil is returned. func (sc *runeTypeScanner) UnreadRune() error { if sc.repeat || sc.rSize == 0 { return ErrInvalidUnreadRune } sc.repeat = true sc.offset -= int64(sc.rSize) _ = sc.inner.UnreadRune() return nil } func (sc *runeTypeScanner) InputOffset() int64 { return sc.offset } func (sc *runeTypeScanner) PushReadBarrier() { sc.parser.PushReadBarrier() } func (sc *runeTypeScanner) PopReadBarrier() { sc.parser.PopBarrier() switch { case sc.repeat: // re-figure the rType and rErr var err error sc.rType, err = sc.parser.HandleRune(sc.rRune, sc.rIsRune) if err != nil { sc.rErr = &DecodeSyntaxError{ Offset: sc.offset - int64(sc.rSize), Err: err, } } else { sc.rErr = nil } case sc.rTypeOK && sc.rType == jsonparse.RuneTypeEOF: sc.rTypeOK = false // forget the sticky EOF } } func (sc *runeTypeScanner) Reset() { sc.parser.Reset() sc.rTypeOK = false // forget any sticky errors/EOF sc.repeat = false // feed the rune (if any) through the parser again }