From eb4c53216a3fac23bdca417f6d899c164fcef61a Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Thu, 20 Jul 2006 23:02:04 -0400 Subject: http://web.archive.org/web/20060720230204/http:/www.unicode.org:80/Public/BETA/CVTUTF-1-4/ --- ConvertUTF.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'ConvertUTF.c') diff --git a/ConvertUTF.c b/ConvertUTF.c index 9b3deeb..67ab49f 100644 --- a/ConvertUTF.c +++ b/ConvertUTF.c @@ -33,6 +33,7 @@ July 2003: slight mods to back out aggressive FFFE detection. Jan 2004: updated switches in from-UTF8 conversions. Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. + May 2006: updated isLegalUTF8Sequence. See the header file "ConvertUTF.h" for complete documentation. @@ -305,7 +306,7 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) { switch (*source) { /* no fall-through in this inner switch */ case 0xE0: if (a < 0xA0) return false; break; - case 0xED: if (a > 0x9F) return false; break; + case 0xED: if ((a < 0x80) || (a > 0x9F)) return false; break; case 0xF0: if (a < 0x90) return false; break; case 0xF4: if (a > 0x8F) return false; break; default: if (a < 0x80) return false; @@ -323,12 +324,25 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) { * Exported function to return whether a UTF-8 sequence is legal or not. * This is not used here; it's just exported. */ + Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { - int length = trailingBytesForUTF8[*source]+1; - if (source+length > sourceEnd) { - return false; + int length; + if (source == sourceEnd) { + return true; + } + while (true) { + length = trailingBytesForUTF8[*source]+1; + if (source+length > sourceEnd) { + return false; + } + if (!isLegalUTF8(source, length)) { + return false; + } + source += length; + if (source >= sourceEnd) { + return true; + } } - return isLegalUTF8(source, length); } /* --------------------------------------------------------------------- */ -- cgit v1.2.3