summaryrefslogtreecommitdiff
path: root/reencode.go
diff options
context:
space:
mode:
Diffstat (limited to 'reencode.go')
-rw-r--r--reencode.go34
1 files changed, 26 insertions, 8 deletions
diff --git a/reencode.go b/reencode.go
index 7439bf0..c19e296 100644
--- a/reencode.go
+++ b/reencode.go
@@ -243,10 +243,13 @@ func (enc *ReEncoder) getRuneFromString(str string, pos int) (c rune, size int,
// but *ReEncoder does because it transforms the data written to it,
// and the number of bytes written may be wildly different than the
// number of bytes handled.
+//
+//nolint:dupl // Yes, this is mostly a duplicate of .WriteString().
func (enc *ReEncoder) Write(str []byte) (int, error) {
if len(str) == 0 {
return 0, nil
}
+ origBufLen := enc.bufLen
var n int
for {
c, size, full, isRune := enc.getRuneFromBytes(str, n)
@@ -261,14 +264,14 @@ func (enc *ReEncoder) Write(str []byte) (int, error) {
return len(str), nil
}
if enc.utf == InvalidUTF8Error && !isRune {
- return n, &ReEncodeSyntaxError{
+ return n - origBufLen, &ReEncodeSyntaxError{
Offset: enc.inputPos,
Err: fmt.Errorf("invalid UTF-8: %#02x", c),
}
}
enc.handleRune(c, size, isRune)
if enc.err != nil {
- return n, enc.err
+ return n - origBufLen, enc.err
}
n += size
}
@@ -276,10 +279,13 @@ func (enc *ReEncoder) Write(str []byte) (int, error) {
// WriteString implements io.StringWriter; it does what you'd expect,
// but see the notes on the Write method.
+//
+//nolint:dupl // Yes, this is mostly a duplicate of .Write().
func (enc *ReEncoder) WriteString(str string) (int, error) {
if len(str) == 0 {
return 0, nil
}
+ origBufLen := enc.bufLen
var n int
for {
c, size, full, isRune := enc.getRuneFromString(str, n)
@@ -294,14 +300,14 @@ func (enc *ReEncoder) WriteString(str string) (int, error) {
return len(str), nil
}
if enc.utf == InvalidUTF8Error && !isRune {
- return n, &ReEncodeSyntaxError{
+ return n - origBufLen, &ReEncodeSyntaxError{
Offset: enc.inputPos,
Err: fmt.Errorf("invalid UTF-8: %#02x", c),
}
}
enc.handleRune(c, size, isRune)
if enc.err != nil {
- return n, enc.err
+ return n - origBufLen, enc.err
}
n += size
}
@@ -323,9 +329,21 @@ func (enc *ReEncoder) WriteRune(c rune) (n int, err error) {
// if enc.AllowMultipleValues is set.
func (enc *ReEncoder) Close() error {
if enc.bufLen > 0 {
- return &ReEncodeSyntaxError{
- Offset: enc.inputPos,
- Err: fmt.Errorf("%w: unflushed unicode garbage: %q", io.ErrUnexpectedEOF, enc.buf[:enc.bufLen]),
+ if enc.utf == InvalidUTF8Error {
+ return &ReEncodeSyntaxError{
+ Offset: enc.inputPos,
+ Err: fmt.Errorf("truncated UTF-8: %q", enc.buf[:enc.bufLen]),
+ }
+ }
+ for i := 0; i < enc.bufLen; i++ {
+ if enc.utf == InvalidUTF8Replace {
+ enc.handleRune(utf8.RuneError, 1, true)
+ } else {
+ enc.handleRune(rune(enc.buf[i]), 1, false)
+ }
+ if enc.err != nil {
+ return enc.err
+ }
}
}
if _, err := enc.par.HandleEOF(); err != nil {
@@ -352,7 +370,7 @@ func (enc *ReEncoder) Close() error {
// isRune=false indicates that 'c' is a raw byte from invalid UTF-8.
func (enc *ReEncoder) handleRune(c rune, size int, isRune bool) {
- t, err := enc.par.HandleRune(c)
+ t, err := enc.par.HandleRune(c, isRune)
if err != nil {
enc.err = &ReEncodeSyntaxError{
Err: err,