From eb4c53216a3fac23bdca417f6d899c164fcef61a Mon Sep 17 00:00:00 2001
From: Luke Shumaker <lukeshu@lukeshu.com>
Date: Thu, 20 Jul 2006 23:02:04 -0400
Subject: 
 http://web.archive.org/web/20060720230204/http:/www.unicode.org:80/Public/BETA/CVTUTF-1-4/

---
 ConvertUTF.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'ConvertUTF.c')

diff --git a/ConvertUTF.c b/ConvertUTF.c
index 9b3deeb..67ab49f 100644
--- a/ConvertUTF.c
+++ b/ConvertUTF.c
@@ -33,6 +33,7 @@
     July 2003: slight mods to back out aggressive FFFE detection.
     Jan 2004: updated switches in from-UTF8 conversions.
     Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
+    May 2006: updated isLegalUTF8Sequence.
 
     See the header file "ConvertUTF.h" for complete documentation.
 
@@ -305,7 +306,7 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
 	switch (*source) {
 	    /* no fall-through in this inner switch */
 	    case 0xE0: if (a < 0xA0) return false; break;
-	    case 0xED: if (a > 0x9F) return false; break;
+	    case 0xED: if ((a < 0x80) || (a > 0x9F)) return false; break;
 	    case 0xF0: if (a < 0x90) return false; break;
 	    case 0xF4: if (a > 0x8F) return false; break;
 	    default:   if (a < 0x80) return false;
@@ -323,12 +324,25 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
  * Exported function to return whether a UTF-8 sequence is legal or not.
  * This is not used here; it's just exported.
  */
+
 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
-    int length = trailingBytesForUTF8[*source]+1;
-    if (source+length > sourceEnd) {
-	return false;
+    int length;
+    if (source == sourceEnd) {
+        return true;
+    }
+    while (true) {
+        length = trailingBytesForUTF8[*source]+1;
+        if (source+length > sourceEnd) {
+            return false;
+        }
+        if (!isLegalUTF8(source, length)) {
+            return false;
+        }
+        source += length;
+        if (source >= sourceEnd) {
+            return true;
+        }
     }
-    return isLegalUTF8(source, length);
 }
 
 /* --------------------------------------------------------------------- */
-- 
cgit v1.2.3