summaryrefslogtreecommitdiff
path: root/CVTUTF.H
diff options
context:
space:
mode:
Diffstat (limited to 'CVTUTF.H')
-rw-r--r--CVTUTF.H106
1 files changed, 106 insertions, 0 deletions
diff --git a/CVTUTF.H b/CVTUTF.H
new file mode 100644
index 0000000..85fd8ef
--- /dev/null
+++ b/CVTUTF.H
@@ -0,0 +1,106 @@
+/* ================================================================ */
+/*
+File: ConvertUTF.h
+Author: Mark E. Davis
+Copyright (C) 1994 Taligent, Inc. All rights reserved.
+
+This code is copyrighted. Under the copyright laws, this code may not
+be copied, in whole or part, without prior written consent of Taligent.
+
+Taligent grants the right to use or reprint this code as long as this
+ENTIRE copyright notice is reproduced in the code or reproduction.
+The code is provided AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES,
+EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN
+NO EVENT WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
+WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
+INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
+LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
+IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
+LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
+LIMITATION MAY NOT APPLY TO YOU.
+
+RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
+government is subject to restrictions as set forth in subparagraph
+(c)(l)(ii) of the Rights in Technical Data and Computer Software
+clause at DFARS 252.227-7013 and FAR 52.227-19.
+
+This code may be protected by one or more U.S. and International
+Patents.
+
+TRADEMARKS: Taligent and the Taligent Design Mark are registered
+trademarks of Taligent, Inc.
+*/
+/* ================================================================ */
+
+#include <stdio.h>
+#include <stdlib.h>
+// #include <types.h>
+#include <string.h>
+
+/* ================================================================ */
+/* The following 4 definitions are compiler-specific.
+ I would use wchar_t for UCS2/UTF16, except that the C standard
+ does not guarantee that it has at least 16 bits, so wchar_t is
+ no less portable than unsigned short!
+*/
+
+typedef unsigned long UCS4;
+typedef unsigned short UCS2;
+typedef unsigned short UTF16;
+typedef unsigned char UTF8;
+
+typedef enum {false, true} Boolean;
+
+
+const UCS4 kReplacementCharacter = 0x0000FFFDUL;
+const UCS4 kMaximumUCS2 = 0x0000FFFFUL;
+const UCS4 kMaximumUTF16 = 0x0010FFFFUL;
+const UCS4 kMaximumUCS4 = 0x7FFFFFFFUL;
+
+/* ================================================================ */
+/* Each of these routines converts the text between *sourceStart and
+sourceEnd, putting the result into the buffer between *targetStart and
+targetEnd. Note: the end pointers are *after* the last item: e.g.
+*(sourceEnd - 1) is the last item.
+
+ The return result indicates whether the conversion was successful,
+and if not, whether the problem was in the source or target buffers.
+
+ After the conversion, *sourceStart and *targetStart are both
+updated to point to the end of last text successfully converted in
+the respective buffers.
+*/
+
+typedef enum {
+ ok, /* conversion successful */
+ sourceExhausted, /* partial character in source, but hit end */
+ targetExhausted /* insuff. room in target for conversion */
+} ConversionResult;
+
+ConversionResult ConvertUCS4toUTF16 (
+ UCS4** sourceStart, const UCS4* sourceEnd,
+ UTF16** targetStart, const UTF16* targetEnd);
+
+ConversionResult ConvertUTF16toUCS4 (
+ UTF16** sourceStart, UTF16* sourceEnd,
+ UCS4** targetStart, const UCS4* targetEnd);
+
+ConversionResult ConvertUTF16toUTF8 (
+ UTF16** sourceStart, const UTF16* sourceEnd,
+ UTF8** targetStart, const UTF8* targetEnd);
+
+ConversionResult ConvertUTF8toUTF16 (
+ UTF8** sourceStart, UTF8* sourceEnd,
+ UTF16** targetStart, const UTF16* targetEnd);
+
+ConversionResult ConvertUCS4toUTF8 (
+ UCS4** sourceStart, const UCS4* sourceEnd,
+ UTF8** targetStart, const UTF8* targetEnd);
+
+ConversionResult ConvertUTF8toUCS4 (
+ UTF8** sourceStart, UTF8* sourceEnd,
+ UCS4** targetStart, const UCS4* targetEnd);
+
+/* ================================================================ */