summaryrefslogtreecommitdiff
path: root/includes/zhtable/printutf8.c
blob: b6ccf17cd337ab0aa65cf25fe999729907113044 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* 
 Unicode                   UTF8
0x00000000 - 0x0000007F: 0xxxxxxx
0x00000080 - 0x000007FF: 110xxx xx 10xx xxxx
0x00000800 - 0x0000FFFF: 1110xxxx  10xxxx xx 10xx xxxx
0x00010000 - 0x001FFFFF: 11110x xx 10xx xxxx 10xxxx xx 10xx xxxx
0x00200000 - 0x03FFFFFF: 111110xx  10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx
0x04000000 - 0x7FFFFFFF: 1111110x  10xx xxxx 10xxxx xx 10xx xxxx 10xxxx xx 10xx xxxx

0000 0      1001 9
0001 1      1010 A
0010 2      1011 B
0011 3      1100 C
0100 4      1101 D 
0101 5      1110 E
0110 6      1111 F
0111 7
1000 8
*/
void printUTF8(long long u) {
  long long m;
  if(u<0x80) {
    printf("%c", (unsigned char)u);
  }
  else if(u<0x800) {
    m = ((u&0x7c0)>>6) | 0xc0;
    printf("%c", (unsigned char)m);
    m = (u&0x3f) | 0x80;
    printf("%c", (unsigned char)m);
  }
  else if(u<0x10000) {
    m = ((u&0xf000)>>12) | 0xe0;
    printf("%c",(unsigned char)m);
    m = ((u&0xfc0)>>6) | 0x80;
    printf("%c",(unsigned char)m);
    m = (u & 0x3f) | 0x80;
    printf("%c",(unsigned char)m);
  }
  else if(u<0x200000) {
    m = ((u&0x1c0000)>>18) | 0xf0;
    printf("%c", (unsigned char)m);
    m = ((u& 0x3f000)>>12) | 0x80;
    printf("%c", (unsigned char)m);
    m = ((u& 0xfc0)>>6) | 0x80;
    printf("%c", (unsigned char)m);
    m = (u&0x3f) | 0x80;
    printf("%c", (unsigned char)m);
  }
  else if(u<0x4000000){
    m = ((u&0x3000000)>>24) | 0xf8;
    printf("%c", (unsigned char)m);
    m = ((u&0xfc0000)>>18) | 0x80;
    printf("%c", (unsigned char)m);
    m = ((u&0x3f000)>>12) | 0x80;
    printf("%c", (unsigned char)m);
    m = ((u&0xfc00)>>6) | 0x80;
    printf("%c", (unsigned char)m);
    m = (u&0x3f) | 0x80;
    printf("%c", (unsigned char)m);
  }
  else {
    m = ((u&0x40000000)>>30) | 0xfc;
    printf("%c", (unsigned char)m);
    m = ((u&0x3f000000)>>24) | 0x80;
    printf("%c", (unsigned char)m);
    m = ((u&0xfc0000)>>18) | 0x80;
    printf("%c", (unsigned char)m);
    m = ((u&0x3f000)>>12) | 0x80;
    printf("%c", (unsigned char)m);
    m = ((u&0xfc0)>>6) | 0x80;
    printf("%c", (unsigned char)m);
    m = (u&0x3f)| 0x80;
    printf("%c", (unsigned char)m);
  }
}

int main() {
  int i,j;
  long long n1, n2;
  unsigned char b1[15], b2[15];
  unsigned char buf[1024];
  i=0;
  while(fgets(buf, 1024, stdin)) {
    //    printf("read %s\n", buf);
    for(i=0;i<strlen(buf); i++) 
      if(buf[i]=='U') {
	if(buf[i+1] == '+') {
	  n1 = strtoll(buf+i+2,0,16);
	  printf("U+%05x", n1);
	  printUTF8(n1);printf("|");
	}
      }
    printf("\n");
  }
}