1 files changed, 61 insertions, 11 deletions
diff --git a/harness.c b/harness.c
index b3dd500..25b3e9e 100644
--- a/harness.c
+++ b/harness.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001 Unicode, Inc.
+ * Copyright 2001-2004 Unicode, Inc.
  * 
  * Disclaimer
  * 
@@ -31,6 +31,11 @@
  *		$	gcc -g harness.c -o harness
  *
  * Rev History: Rick McGowan, new file April 2001.
+ * Sept 19, 2002: Corrected error on line 234:  utf16_buf[2] becomes utf16_result[2]
+ * 	per report from Iain Murray.
+ * July 3, 2003: Updated printout message.
+ * Oct 19, 2004: Updated isLegalUTF8 test data and corrected switch statements to catch
+ *	illegal surrogate use in UTF-8, per report from Frank Tang.
  *
  */
 
@@ -51,7 +56,9 @@
 	00-7F				  0000-  007F
 	C2-DF	80-BF			  0080-  07FF
 	E0	A0-BF	80-BF		  0800-  0FFF
-	E1-EF	80-BF	80-BF		  1000-  FFFF
+	E1-EC   80-BF   80-BF             1000-  CFFF
+	ED      80-9F   80-BF             D000-  D7FF
+	EE-EF   80-BF   80-BF             E000-  FFFF
 	F0	90-BF	80-BF	80-BF	 10000- 3FFFF
 	F1-F3	80-BF	80-BF	80-BF	 40000- FFFFF
 	F4	80-8F	80-BF	80-BF	100000-10FFFF
@@ -85,9 +92,16 @@ struct utf8_test utf8_testData[] = {
     { 0,	2,	{ 0xC0, 0xAF, 0x00, 0x00, 0x00 }},	/* 15 */
     { 0,	3,	{ 0xE0, 0x9F, 0x80, 0x00, 0x00 }},	/* 16 */
     { 0,	4,	{ 0xF0, 0x93, 0xB2, 0xC1, 0x00 }},	/* 17 */
-/* for all > 17 use "short" buffer lengths to detect over-run */
+
+    { 1,	3,	{ 0xED, 0x9F, 0xBF, 0x00, 0x00 }},	/* 18 */
+    { 1,	3,	{ 0xEE, 0x80, 0x80, 0x00, 0x00 }},	/* 19 */
+    { 0,	3,	{ 0xED, 0xA0, 0x80, 0x00, 0x00 }},	/* 20 */
+    { 0,	3,	{ 0xED, 0xBF, 0xBF, 0x00, 0x00 }},	/* 21 */
+
+/* for all > 21 use "short" buffer lengths to detect over-run */
     { 0,	4,	{ 0xF0, 0x93, 0xB2, 0xC3, 0x00 }},	/* 18 use short buflen */
-    { 0,	0,	{ 0x00, 0x00, 0x00, 0x00, 0x00 }}
+    { 0,	0,	{ 0x00, 0x00, 0x00, 0x00, 0x00 }},
+
 };
 
 int test01() {
@@ -100,8 +114,8 @@ int test01() {
 	for (i = 0; utf8_testData[i].utf8_len; i++) {
 		wantVal1 = wantVal2 = utf8_testData[i].utf8_legal;
 		gotVal1 = isLegalUTF8(&(utf8_testData[i].utf8_seq[0]), utf8_testData[i].utf8_len);
-		/* use truncated length for tests over 17 */
-		if (i <= 17) { len2 = 4; } else { len2 = utf8_testData[i].utf8_len-1; wantVal2 = 0; }
+		/* use truncated length for tests over 21 */
+		if (i <= 21) { len2 = 4; } else { len2 = utf8_testData[i].utf8_len-1; wantVal2 = 0; }
 		gotVal2 = isLegalUTF8Sequence(&(utf8_testData[i].utf8_seq[0]), &(utf8_testData[i].utf8_seq[0])+len2);
 		if ((gotVal1 != wantVal1) || (gotVal2 != wantVal2)) {
 			printf("Test01 error: seq %d is %d & %d (should be %d & %d) for bytes (%x,%x,%x,%x,%x,) & len %d\n",
@@ -195,7 +209,6 @@ int test02() {
 		case sourceIllegal: printf("sourceIllegal\t"); break;
 		}
 		if (result != conversionOK) {
-
 			printf("Test02B for %d (0x%x), input %04x,%04x; output %s; result %d\n",
 				i, utf32_buf[0], utf16_buf[0], utf16_buf[1], utf8_buf, result);
 			if ((i != UNI_SUR_LOW_START) && (i != UNI_SUR_HIGH_START)) {
@@ -230,7 +243,7 @@ int test02() {
 		/*
 		 * Test UTF8 -> UTF16, with legality check on.
 		 */
-		result = ConvertUTF8toUTF16((const UTF8 **) &utf8SourceStart, &(utf8_buf[trailingBytesForUTF8[utf8_buf[0]]+1]), &utf16TargetStart, &(utf16_buf[2]), strictConversion);
+		result = ConvertUTF8toUTF16((const UTF8 **) &utf8SourceStart, &(utf8_buf[trailingBytesForUTF8[utf8_buf[0]]+1]), &utf16TargetStart, &(utf16_result[2]), strictConversion);
 		switch (result) {
 		default: fprintf(stderr, "Test02C fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
 		case conversionOK: break;
@@ -281,8 +294,6 @@ int test02() {
 			printf("Test02E for %d: utf32 input %08x; trip output %08x (utf_16buf is %04x,%04x)\n", i, utf32_buf[0], utf32_result[0], utf16_buf[0], utf16_buf[1]);
 			return 0;
 		}
-
-
 	}
 	return 1;
 }
@@ -380,11 +391,48 @@ int test03() {
 	return 1;
 }
 
+/* ---------------------------------------------------------------------
+	test04 - Test an illegal UTF-32 value > 10FFFF conversion to UTF-8.
+	Expect it will be turned into UNI_REPLACEMENT_CHAR.
+
+   --------------------------------------------------------------------- */
+
+int test04() {
+	int i, n;
+	ConversionResult result;
+	UTF32 utf32_buf[2];
+	UTF8 utf8_buf[8];
+	UTF32 *utf32SourceStart, *utf32TargetStart;
+	UTF8 *utf8SourceStart, *utf8TargetStart;
+
+	printf("Begin Test04\n"); fflush(stdout);
+
+	i = 0x10FFFF + 21; /* an arbitrary value > legal */
+
+	utf32_buf[0] = i; utf32_buf[1] = 0;
+	for (n = 0; n < 8; n++) utf8_buf[n] = 0;
+
+	utf32SourceStart = utf32_buf;
+	utf8TargetStart = utf8_buf;
+
+	/*
+	 * Test UTF32 -> UTF8, with legality check on.
+	 */
+	result = ConvertUTF32toUTF8((const UTF32 **) &utf32SourceStart, &(utf32_buf[1]), & utf8TargetStart, &(utf8_buf[7]), strictConversion);
+	if (result != sourceIllegal) {
+		fprintf(stderr, "Test04A fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
+	}
+
+	return 1;
+}
+
 /* --------------------------------------------------------------------- */
 
 main() {
 	printf("Three tests of round-trip conversions will be performed.\n");
-	printf("Two illegal result messages are expected; one in test 02A; one in test 03A .\n\n");
+	printf("One test of illegal UTF-32 will be peroformed.\n");
+	printf("Two illegal result messages are expected; one in test 02A; one in test 03A.\n");
+	printf("These are for tests of Surrogate conversion.\n\n");
 	fflush(stdout);
 	if (test01()) {	printf("******** Test01 succeeded without error. ********\n\n"); }
 	else { printf("-------- Test01 failed. --------\n\n"); }
@@ -392,4 +440,6 @@ main() {
 	else { printf("-------- Test02 failed. --------\n\n"); }
 	if (test03()) { printf("******** Test03 succeeded without error. ********\n\n"); }
 	else { printf("-------- Test03 failed. --------\n\n"); }
+	if (test04()) { printf("******** Test04 succeeded without error. ********\n\n"); }
+	else { printf("-------- Test04 failed. --------\n\n"); }
 }