diff options
Diffstat (limited to 'harness.c')
-rw-r--r-- | harness.c | 72 |
1 files changed, 61 insertions, 11 deletions
@@ -1,5 +1,5 @@ /* - * Copyright 2001 Unicode, Inc. + * Copyright 2001-2004 Unicode, Inc. * * Disclaimer * @@ -31,6 +31,11 @@ * $ gcc -g harness.c -o harness * * Rev History: Rick McGowan, new file April 2001. + * Sept 19, 2002: Corrected error on line 234: utf16_buf[2] becomes utf16_result[2] + * per report from Iain Murray. + * July 3, 2003: Updated printout message. + * Oct 19, 2004: Updated isLegalUTF8 test data and corrected switch statements to catch + * illegal surrogate use in UTF-8, per report from Frank Tang. * */ @@ -51,7 +56,9 @@ 00-7F 0000- 007F C2-DF 80-BF 0080- 07FF E0 A0-BF 80-BF 0800- 0FFF - E1-EF 80-BF 80-BF 1000- FFFF + E1-EC 80-BF 80-BF 1000- CFFF + ED 80-9F 80-BF D000- D7FF + EE-EF 80-BF 80-BF E000- FFFF F0 90-BF 80-BF 80-BF 10000- 3FFFF F1-F3 80-BF 80-BF 80-BF 40000- FFFFF F4 80-8F 80-BF 80-BF 100000-10FFFF @@ -85,9 +92,16 @@ struct utf8_test utf8_testData[] = { { 0, 2, { 0xC0, 0xAF, 0x00, 0x00, 0x00 }}, /* 15 */ { 0, 3, { 0xE0, 0x9F, 0x80, 0x00, 0x00 }}, /* 16 */ { 0, 4, { 0xF0, 0x93, 0xB2, 0xC1, 0x00 }}, /* 17 */ -/* for all > 17 use "short" buffer lengths to detect over-run */ + + { 1, 3, { 0xED, 0x9F, 0xBF, 0x00, 0x00 }}, /* 18 */ + { 1, 3, { 0xEE, 0x80, 0x80, 0x00, 0x00 }}, /* 19 */ + { 0, 3, { 0xED, 0xA0, 0x80, 0x00, 0x00 }}, /* 20 */ + { 0, 3, { 0xED, 0xBF, 0xBF, 0x00, 0x00 }}, /* 21 */ + +/* for all > 21 use "short" buffer lengths to detect over-run */ { 0, 4, { 0xF0, 0x93, 0xB2, 0xC3, 0x00 }}, /* 18 use short buflen */ - { 0, 0, { 0x00, 0x00, 0x00, 0x00, 0x00 }} + { 0, 0, { 0x00, 0x00, 0x00, 0x00, 0x00 }}, + }; int test01() { @@ -100,8 +114,8 @@ int test01() { for (i = 0; utf8_testData[i].utf8_len; i++) { wantVal1 = wantVal2 = utf8_testData[i].utf8_legal; gotVal1 = isLegalUTF8(&(utf8_testData[i].utf8_seq[0]), utf8_testData[i].utf8_len); - /* use truncated length for tests over 17 */ - if (i <= 17) { len2 = 4; } else { len2 = utf8_testData[i].utf8_len-1; wantVal2 = 0; } + /* use truncated length for tests over 21 */ + if (i <= 21) { len2 = 4; } else { len2 = utf8_testData[i].utf8_len-1; wantVal2 = 0; } gotVal2 = isLegalUTF8Sequence(&(utf8_testData[i].utf8_seq[0]), &(utf8_testData[i].utf8_seq[0])+len2); if ((gotVal1 != wantVal1) || (gotVal2 != wantVal2)) { printf("Test01 error: seq %d is %d & %d (should be %d & %d) for bytes (%x,%x,%x,%x,%x,) & len %d\n", @@ -195,7 +209,6 @@ int test02() { case sourceIllegal: printf("sourceIllegal\t"); break; } if (result != conversionOK) { - printf("Test02B for %d (0x%x), input %04x,%04x; output %s; result %d\n", i, utf32_buf[0], utf16_buf[0], utf16_buf[1], utf8_buf, result); if ((i != UNI_SUR_LOW_START) && (i != UNI_SUR_HIGH_START)) { @@ -230,7 +243,7 @@ int test02() { /* * Test UTF8 -> UTF16, with legality check on. */ - result = ConvertUTF8toUTF16((const UTF8 **) &utf8SourceStart, &(utf8_buf[trailingBytesForUTF8[utf8_buf[0]]+1]), &utf16TargetStart, &(utf16_buf[2]), strictConversion); + result = ConvertUTF8toUTF16((const UTF8 **) &utf8SourceStart, &(utf8_buf[trailingBytesForUTF8[utf8_buf[0]]+1]), &utf16TargetStart, &(utf16_result[2]), strictConversion); switch (result) { default: fprintf(stderr, "Test02C fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1); case conversionOK: break; @@ -281,8 +294,6 @@ int test02() { printf("Test02E for %d: utf32 input %08x; trip output %08x (utf_16buf is %04x,%04x)\n", i, utf32_buf[0], utf32_result[0], utf16_buf[0], utf16_buf[1]); return 0; } - - } return 1; } @@ -380,11 +391,48 @@ int test03() { return 1; } +/* --------------------------------------------------------------------- + test04 - Test an illegal UTF-32 value > 10FFFF conversion to UTF-8. + Expect it will be turned into UNI_REPLACEMENT_CHAR. + + --------------------------------------------------------------------- */ + +int test04() { + int i, n; + ConversionResult result; + UTF32 utf32_buf[2]; + UTF8 utf8_buf[8]; + UTF32 *utf32SourceStart, *utf32TargetStart; + UTF8 *utf8SourceStart, *utf8TargetStart; + + printf("Begin Test04\n"); fflush(stdout); + + i = 0x10FFFF + 21; /* an arbitrary value > legal */ + + utf32_buf[0] = i; utf32_buf[1] = 0; + for (n = 0; n < 8; n++) utf8_buf[n] = 0; + + utf32SourceStart = utf32_buf; + utf8TargetStart = utf8_buf; + + /* + * Test UTF32 -> UTF8, with legality check on. + */ + result = ConvertUTF32toUTF8((const UTF32 **) &utf32SourceStart, &(utf32_buf[1]), & utf8TargetStart, &(utf8_buf[7]), strictConversion); + if (result != sourceIllegal) { + fprintf(stderr, "Test04A fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1); + } + + return 1; +} + /* --------------------------------------------------------------------- */ main() { printf("Three tests of round-trip conversions will be performed.\n"); - printf("Two illegal result messages are expected; one in test 02A; one in test 03A .\n\n"); + printf("One test of illegal UTF-32 will be peroformed.\n"); + printf("Two illegal result messages are expected; one in test 02A; one in test 03A.\n"); + printf("These are for tests of Surrogate conversion.\n\n"); fflush(stdout); if (test01()) { printf("******** Test01 succeeded without error. ********\n\n"); } else { printf("-------- Test01 failed. --------\n\n"); } @@ -392,4 +440,6 @@ main() { else { printf("-------- Test02 failed. --------\n\n"); } if (test03()) { printf("******** Test03 succeeded without error. ********\n\n"); } else { printf("-------- Test03 failed. --------\n\n"); } + if (test04()) { printf("******** Test04 succeeded without error. ********\n\n"); } + else { printf("-------- Test04 failed. --------\n\n"); } } |