summaryrefslogtreecommitdiff
path: root/ConvertUTF.c
diff options
context:
space:
mode:
Diffstat (limited to 'ConvertUTF.c')
-rw-r--r--ConvertUTF.c67
1 files changed, 36 insertions, 31 deletions
diff --git a/ConvertUTF.c b/ConvertUTF.c
index 491fa14..23834c4 100644
--- a/ConvertUTF.c
+++ b/ConvertUTF.c
@@ -25,6 +25,8 @@
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
Author: Mark E. Davis, 1994.
Rev History: Rick McGowan, fixes & updates May 2001.
+ Sept 2001: fixed const & error conditions per
+ mods suggested by S. Parent & A. Lillich.
See the header file "ConvertUTF.h" for complete documentation.
@@ -51,10 +53,10 @@ static const UTF32 halfMask = 0x3FFUL;
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF16 (
- UTF32** sourceStart, const UTF32* sourceEnd,
- UTF16** targetStart, const UTF16* targetEnd, const ConversionFlags flags) {
+ const UTF32** sourceStart, const UTF32* sourceEnd,
+ UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
- UTF32* source = *sourceStart;
+ const UTF32* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
@@ -79,6 +81,7 @@ ConversionResult ConvertUTF32toUTF16 (
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
+ --source; /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
@@ -94,13 +97,14 @@ ConversionResult ConvertUTF32toUTF16 (
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF32 (
- UTF16** sourceStart, UTF16* sourceEnd,
- UTF32** targetStart, const UTF32* targetEnd, const ConversionFlags flags) {
+ const UTF16** sourceStart, const UTF16* sourceEnd,
+ UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
- UTF16* source = *sourceStart;
+ const UTF16* source = *sourceStart;
UTF32* target = *targetStart;
UTF32 ch, ch2;
while (source < sourceEnd) {
+ const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END && source < sourceEnd) {
ch2 = *source;
@@ -120,6 +124,7 @@ ConversionResult ConvertUTF16toUTF32 (
break;
}
if (target >= targetEnd) {
+ source = oldSource; /* Back up source pointer! */
result = targetExhausted; break;
}
*target++ = ch;
@@ -181,16 +186,17 @@ static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF16toUTF8 (
- UTF16** sourceStart, const UTF16* sourceEnd,
- UTF8** targetStart, const UTF8* targetEnd, ConversionFlags flags) {
+ const UTF16** sourceStart, const UTF16* sourceEnd,
+ UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
- UTF16* source = *sourceStart;
+ const UTF16* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
unsigned short bytesToWrite = 0;
const UTF32 byteMask = 0xBF;
const UTF32 byteMark = 0x80;
+ const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
ch = *source++;
/* If we have a surrogate pair, convert to UTF32 first. */
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END && source < sourceEnd) {
@@ -220,6 +226,7 @@ ConversionResult ConvertUTF16toUTF8 (
target += bytesToWrite;
if (target > targetEnd) {
+ source = oldSource; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
@@ -248,9 +255,9 @@ ConversionResult ConvertUTF16toUTF8 (
* definition of UTF-8 goes up to 4-byte sequences.
*/
-static Boolean isLegalUTF8(UTF8 *source, int length) {
+static Boolean isLegalUTF8(const UTF8 *source, int length) {
UTF8 a;
- UTF8 *srcptr = source+length;
+ const UTF8 *srcptr = source+length;
switch (length) {
default: return false;
/* Everything else falls through when "true"... */
@@ -276,7 +283,7 @@ static Boolean isLegalUTF8(UTF8 *source, int length) {
* Exported function to return whether a UTF-8 sequence is legal or not.
* This is not used here; it's just exported.
*/
-Boolean isLegalUTF8Sequence(UTF8 *source, UTF8 *sourceEnd) {
+Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
int length = trailingBytesForUTF8[*source]+1;
if (source+length > sourceEnd) {
return false;
@@ -287,10 +294,10 @@ Boolean isLegalUTF8Sequence(UTF8 *source, UTF8 *sourceEnd) {
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF16 (
- UTF8** sourceStart, UTF8* sourceEnd,
- UTF16** targetStart, const UTF16* targetEnd, const ConversionFlags flags) {
+ const UTF8** sourceStart, const UTF8* sourceEnd,
+ UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
- UTF8* source = *sourceStart;
+ const UTF8* source = *sourceStart;
UTF16* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
@@ -315,11 +322,12 @@ ConversionResult ConvertUTF8toUTF16 (
ch -= offsetsFromUTF8[extraBytesToRead];
if (target >= targetEnd) {
+ source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
if ((flags == strictConversion) && (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)) {
- --source; /* return to the illegal value itself */
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
result = sourceIllegal;
break;
} else {
@@ -328,13 +336,15 @@ ConversionResult ConvertUTF8toUTF16 (
} else if (ch > UNI_MAX_UTF16) {
if (flags == strictConversion) {
result = sourceIllegal;
- source -= extraBytesToRead; /* return to the start */
+ source -= (extraBytesToRead+1); /* return to the start */
+ break; /* Bail out; shouldn't continue */
} else {
*target++ = UNI_REPLACEMENT_CHAR;
}
} else {
/* target is a character in range 0xFFFF - 0x10FFFF. */
if (target + 1 >= targetEnd) {
+ source -= (extraBytesToRead+1); /* Back up source pointer! */
result = targetExhausted; break;
}
ch -= halfBase;
@@ -350,10 +360,10 @@ ConversionResult ConvertUTF8toUTF16 (
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF32toUTF8 (
- UTF32** sourceStart, const UTF32* sourceEnd,
- UTF8** targetStart, const UTF8* targetEnd, ConversionFlags flags) {
+ const UTF32** sourceStart, const UTF32* sourceEnd,
+ UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
- UTF32* source = *sourceStart;
+ const UTF32* source = *sourceStart;
UTF8* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch;
@@ -380,6 +390,7 @@ ConversionResult ConvertUTF32toUTF8 (
target += bytesToWrite;
if (target > targetEnd) {
+ --source; /* Back up source pointer! */
target -= bytesToWrite; result = targetExhausted; break;
}
switch (bytesToWrite) { /* note: everything falls through. */
@@ -398,10 +409,10 @@ ConversionResult ConvertUTF32toUTF8 (
/* --------------------------------------------------------------------- */
ConversionResult ConvertUTF8toUTF32 (
- UTF8** sourceStart, UTF8* sourceEnd,
- UTF32** targetStart, const UTF32* targetEnd, ConversionFlags flags) {
+ const UTF8** sourceStart, const UTF8* sourceEnd,
+ UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
ConversionResult result = conversionOK;
- UTF8* source = *sourceStart;
+ const UTF8* source = *sourceStart;
UTF32* target = *targetStart;
while (source < sourceEnd) {
UTF32 ch = 0;
@@ -426,19 +437,13 @@ ConversionResult ConvertUTF8toUTF32 (
ch -= offsetsFromUTF8[extraBytesToRead];
if (target >= targetEnd) {
+ source -= (extraBytesToRead+1); /* Back up the source pointer! */
result = targetExhausted; break;
}
if (ch <= UNI_MAX_UTF32) {
*target++ = ch;
- } else if (ch > UNI_MAX_UTF32) {
+ } else { /* i.e., ch > UNI_MAX_UTF32 */
*target++ = UNI_REPLACEMENT_CHAR;
- } else {
- if (target + 1 >= targetEnd) {
- result = targetExhausted; break;
- }
- ch -= halfBase;
- *target++ = (ch >> halfShift) + UNI_SUR_HIGH_START;
- *target++ = (ch & halfMask) + UNI_SUR_LOW_START;
}
}
*sourceStart = source;