diff options
Diffstat (limited to 'CVTUTF.H')
-rw-r--r-- | CVTUTF.H | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/CVTUTF.H b/CVTUTF.H new file mode 100644 index 0000000..85fd8ef --- /dev/null +++ b/CVTUTF.H @@ -0,0 +1,106 @@ +/* ================================================================ */ +/* +File: ConvertUTF.h +Author: Mark E. Davis +Copyright (C) 1994 Taligent, Inc. All rights reserved. + +This code is copyrighted. Under the copyright laws, this code may not +be copied, in whole or part, without prior written consent of Taligent. + +Taligent grants the right to use or reprint this code as long as this +ENTIRE copyright notice is reproduced in the code or reproduction. +The code is provided AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES, +EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN +NO EVENT WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, +WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS +INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY +LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN +IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. +BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF +LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE +LIMITATION MAY NOT APPLY TO YOU. + +RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the +government is subject to restrictions as set forth in subparagraph +(c)(l)(ii) of the Rights in Technical Data and Computer Software +clause at DFARS 252.227-7013 and FAR 52.227-19. + +This code may be protected by one or more U.S. and International +Patents. + +TRADEMARKS: Taligent and the Taligent Design Mark are registered +trademarks of Taligent, Inc. +*/ +/* ================================================================ */ + +#include <stdio.h> +#include <stdlib.h> +// #include <types.h> +#include <string.h> + +/* ================================================================ */ +/* The following 4 definitions are compiler-specific. + I would use wchar_t for UCS2/UTF16, except that the C standard + does not guarantee that it has at least 16 bits, so wchar_t is + no less portable than unsigned short! +*/ + +typedef unsigned long UCS4; +typedef unsigned short UCS2; +typedef unsigned short UTF16; +typedef unsigned char UTF8; + +typedef enum {false, true} Boolean; + + +const UCS4 kReplacementCharacter = 0x0000FFFDUL; +const UCS4 kMaximumUCS2 = 0x0000FFFFUL; +const UCS4 kMaximumUTF16 = 0x0010FFFFUL; +const UCS4 kMaximumUCS4 = 0x7FFFFFFFUL; + +/* ================================================================ */ +/* Each of these routines converts the text between *sourceStart and +sourceEnd, putting the result into the buffer between *targetStart and +targetEnd. Note: the end pointers are *after* the last item: e.g. +*(sourceEnd - 1) is the last item. + + The return result indicates whether the conversion was successful, +and if not, whether the problem was in the source or target buffers. + + After the conversion, *sourceStart and *targetStart are both +updated to point to the end of last text successfully converted in +the respective buffers. +*/ + +typedef enum { + ok, /* conversion successful */ + sourceExhausted, /* partial character in source, but hit end */ + targetExhausted /* insuff. room in target for conversion */ +} ConversionResult; + +ConversionResult ConvertUCS4toUTF16 ( + UCS4** sourceStart, const UCS4* sourceEnd, + UTF16** targetStart, const UTF16* targetEnd); + +ConversionResult ConvertUTF16toUCS4 ( + UTF16** sourceStart, UTF16* sourceEnd, + UCS4** targetStart, const UCS4* targetEnd); + +ConversionResult ConvertUTF16toUTF8 ( + UTF16** sourceStart, const UTF16* sourceEnd, + UTF8** targetStart, const UTF8* targetEnd); + +ConversionResult ConvertUTF8toUTF16 ( + UTF8** sourceStart, UTF8* sourceEnd, + UTF16** targetStart, const UTF16* targetEnd); + +ConversionResult ConvertUCS4toUTF8 ( + UCS4** sourceStart, const UCS4* sourceEnd, + UTF8** targetStart, const UTF8* targetEnd); + +ConversionResult ConvertUTF8toUCS4 ( + UTF8** sourceStart, UTF8* sourceEnd, + UCS4** targetStart, const UCS4* targetEnd); + +/* ================================================================ */ |