From 766942acf8f0c0d9ef6c16ffbdedefdfda0af4b2 Mon Sep 17 00:00:00 2001 From: Luke Shumaker Date: Sun, 4 Apr 2004 06:02:31 -0500 Subject: http://web.archive.org/web/20040404060231/http:/www.unicode.org:80/Public/BETA/CVTUTF-1-2/ --- .metadata.txt | 7 +++++++ ConvertUTF.c | 7 ++++++- readme.txt | 21 +++++++++++++++------ 3 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 .metadata.txt diff --git a/.metadata.txt b/.metadata.txt new file mode 100644 index 0000000..4a86b4a --- /dev/null +++ b/.metadata.txt @@ -0,0 +1,7 @@ +CVTUTF7.C 2004-01-06 17:42 +CVTUTF7.H 2004-01-06 17:42 +ConvertUTF.c 2004-01-06 17:42 +ConvertUTF.h 2004-01-06 17:42 +ExpectedOutput.txt 2004-01-06 17:42 +harness.c 2004-01-06 17:42 +readme.txt 2004-01-06 17:42 diff --git a/ConvertUTF.c b/ConvertUTF.c index 0a18518..649fbc8 100644 --- a/ConvertUTF.c +++ b/ConvertUTF.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2003 Unicode, Inc. + * Copyright 2001-2004 Unicode, Inc. * * Disclaimer * @@ -31,6 +31,7 @@ source sequences, enhanced error detection, added casts to eliminate compiler warnings. July 2003: slight mods to back out aggressive FFFE detection. + Jan 2004: updated switches in from-UTF8 conversions. See the header file "ConvertUTF.h" for complete documentation. @@ -345,6 +346,8 @@ ConversionResult ConvertUTF8toUTF16 ( * The cases all fall through. See "Note A" below. */ switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; case 3: ch += *source++; ch <<= 6; case 2: ch += *source++; ch <<= 6; case 1: ch += *source++; ch <<= 6; @@ -465,6 +468,8 @@ ConversionResult ConvertUTF8toUTF32 ( * The cases all fall through. See "Note A" below. */ switch (extraBytesToRead) { + case 5: ch += *source++; ch <<= 6; + case 4: ch += *source++; ch <<= 6; case 3: ch += *source++; ch <<= 6; case 2: ch += *source++; ch <<= 6; case 1: ch += *source++; ch <<= 6; diff --git a/readme.txt b/readme.txt index 7be1443..722c6f4 100644 --- a/readme.txt +++ b/readme.txt @@ -13,18 +13,27 @@ formats of Unicode characters. The following conversions are supported: In addition, there is a test harness which runs various tests. The files "CVTUTF7.C" and "CVTUTF7.H" are for archival and historical purposes -only. They have not been updated to Unicode 3.0 and should be considered -obsolescent. "CVTUTF7.C" contains two functions that can convert between -UCS2 (i.e., the BMP characters only) and UTF-7. Surrogates are not supported, -the code has not been tested, and should be considered unsuitable for general -purpose use. +only. They have not been updated to Unicode 3.0 or later and should be +considered obsolescent. "CVTUTF7.C" contains two functions that can convert +between UCS2 (i.e., the BMP characters only) and UTF-7. Surrogates are +not supported, the code has not been tested, and should be considered +unsuitable for general purpose use. Please submit any bug reports about these programs here: http://www.unicode.org/unicode/reporting.html Version 1.0: initial version. + Version 1.1: corrected some minor problems; added stricter checks. -Last update: July 3, 2003 +Version 1.2: corrected switch statements associated with "extraBytesToRead" + in 4 & 5 byte cases, in functions for conversion from UTF8. + Note: formally, the 4 & 5 byte cases are illegal in the latest + UTF8, but the table and this code has always catered for those, + cases since at one time they were legal. + + +Last update: January 6, 2004 + -- cgit v1.2.3