94 lines
2 KiB
Text
94 lines
2 KiB
Text
```
|
|
#include <stdio.h>
|
|
```
|
|
|
|
morph_string_as_ascii_to_utf8 ()
|
|
<Seq ~ <ValueTerminated 0> Char~Ascii~native.UInt8>
|
|
--> <Seq Char~Unicode>
|
|
~ UTF-8
|
|
~ <Seq~<ValueTerminated 0> native.UInt8>
|
|
```
|
|
while( *src ) { *dst++ = *src++; }
|
|
*dst = 0;
|
|
return 0;
|
|
```
|
|
|
|
morph_string_as_utf8_to_ascii ()
|
|
<Seq Char~Unicode>
|
|
~ UTF-8
|
|
~ <Seq~<ValueTerminated 0> native.UInt8>
|
|
--> <Seq ~ <ValueTerminated 0> Char~Ascii~native.UInt8>
|
|
```
|
|
while( *src ) {
|
|
if( *src < 128 ) {
|
|
*dst++ = *src++;
|
|
} else {
|
|
fprintf(stderr, "(morph UTF-8 to Ascii) ignore multi-byte character\n");
|
|
while( *++src >= 128 );
|
|
}
|
|
}
|
|
*dst = 0;
|
|
return 0;
|
|
```
|
|
|
|
morph_string_as_ascii_to_utf32 ()
|
|
<Seq ~ <ValueTerminated 0> Char~Ascii~native.UInt8>
|
|
--> <Seq Char~Unicode>
|
|
~ UTF-32
|
|
~ <Seq~<ValueTerminated 0> native.UInt32>
|
|
```
|
|
while( *src ) { *dst++ = *src++; }
|
|
*dst = 0;
|
|
return 0;
|
|
```
|
|
|
|
morph_string_as_utf8_to_utf32 ()
|
|
|
|
<Seq Char~Unicode>
|
|
~ UTF-8
|
|
~ <Seq~<ValueTerminated 0> native.UInt8>
|
|
|
|
--> <Seq Char~Unicode>
|
|
~ UTF-32
|
|
~ <Seq~<ValueTerminated 0> native.UInt32>
|
|
|
|
```
|
|
bool has_multibyte = false;
|
|
uint32_t val = 0;
|
|
while( *src ) {
|
|
uint8_t tag = (*src >> 6)&0b11;
|
|
switch( tag ) {
|
|
// single byte
|
|
case 0b00:
|
|
case 0b01:
|
|
if( has_multibyte ) {
|
|
*dst++ = val;
|
|
has_multibyte = false;
|
|
}
|
|
*dst++ = *src++;
|
|
break;
|
|
|
|
// start multibyte
|
|
case 0b11:
|
|
if( has_multibyte ) {
|
|
*dst++ = val;
|
|
}
|
|
has_multibyte = true;
|
|
val = (*src++) & 0b111111;
|
|
break;
|
|
|
|
// continue multibyte
|
|
case 0b10:
|
|
val <<= 6;
|
|
val |= (*src++) & 0b111111;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if( has_multibyte )
|
|
*dst++ = val;
|
|
|
|
*dst++ = 0;
|
|
|
|
return 0;
|
|
```
|