Compare commits
3 commits
630948139b
...
90eb43475b
Author | SHA1 | Date | |
---|---|---|---|
90eb43475b | |||
2eb3728027 | |||
d0118b56b1 |
5 changed files with 274 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
target
|
93
morphisms/unicode.morphism-base
Normal file
93
morphisms/unicode.morphism-base
Normal file
|
@ -0,0 +1,93 @@
|
|||
```
|
||||
```
|
||||
|
||||
morph_string_as_ascii_to_utf8 ()
|
||||
<Seq ~ <ValueTerminated '\0'> Char~Ascii~x86.UInt8>
|
||||
--> <Seq Char~Unicode>
|
||||
~ UTF-8
|
||||
~ <Seq~<ValueTerminated 0> x86.UInt8>
|
||||
```
|
||||
while( *src ) { *dst++ = *src++; }
|
||||
*dst = 0;
|
||||
return 0;
|
||||
```
|
||||
|
||||
morph_string_as_utf8_to_ascii ()
|
||||
<Seq Char~Unicode>
|
||||
~ UTF-8
|
||||
~ <Seq~<ValueTerminated 0> x86.UInt8>
|
||||
--> <Seq ~ <ValueTerminated '\0'> Char~Ascii~x86.UInt8>
|
||||
```
|
||||
while( *src ) {
|
||||
if( *src < 128 ) {
|
||||
*dst++ = *src++;
|
||||
} else {
|
||||
fprintf(stderr, "(morph UTF-8 to Ascii) ignore multi-byte character\n");
|
||||
while( *++src >= 128 );
|
||||
}
|
||||
}
|
||||
*dst = 0;
|
||||
return 0;
|
||||
```
|
||||
|
||||
morph_string_as_ascii_to_utf32 ()
|
||||
<Seq ~ <ValueTerminated '\0'> Char~Ascii~x86.UInt8>
|
||||
--> <Seq Char~Unicode>
|
||||
~ UTF-32LE
|
||||
~ <Seq~<ValueTerminated 0> x86.UInt32>
|
||||
```
|
||||
while( *src ) { *dst++ = *src++; }
|
||||
*dst = 0;
|
||||
return 0;
|
||||
```
|
||||
|
||||
morph_string_as_utf8_to_utf32 ()
|
||||
|
||||
<Seq Char~Unicode>
|
||||
~ UTF-8
|
||||
~ <Seq~<ValueTerminated 0> x86.UInt8>
|
||||
|
||||
--> <Seq Char~Unicode>
|
||||
~ UTF-32LE
|
||||
~ <Seq~<ValueTerminated 0> x86.UInt32>
|
||||
|
||||
```
|
||||
bool has_multibyte = false;
|
||||
uint32_t val = 0;
|
||||
while( *src ) {
|
||||
uint8_t tag = (*src >> 6)&0b11;
|
||||
switch( tag ) {
|
||||
// single byte
|
||||
case 0b00:
|
||||
case 0b01:
|
||||
if( has_multibyte ) {
|
||||
*dst++ = val;
|
||||
has_multibyte = false;
|
||||
}
|
||||
*dst++ = *src++;
|
||||
break;
|
||||
|
||||
// start multibyte
|
||||
case 0b11:
|
||||
if( has_multibyte ) {
|
||||
*dst++ = val;
|
||||
}
|
||||
has_multibyte = true;
|
||||
val = (*src++) & 0b111111;
|
||||
break;
|
||||
|
||||
// continue multibyte
|
||||
case 0b10:
|
||||
val <<= 6;
|
||||
val |= (*src++) & 0b111111;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( has_multibyte )
|
||||
*dst++ = val;
|
||||
|
||||
*dst++ = 0;
|
||||
|
||||
return 0;
|
||||
```
|
71
morphisms/value_delim.morphism-base
Normal file
71
morphisms/value_delim.morphism-base
Normal file
|
@ -0,0 +1,71 @@
|
|||
```
|
||||
#include <array/length-prefix.h>
|
||||
#include <stdlib.h>
|
||||
```
|
||||
|
||||
morph_seqseq_valsep_uint8 (T: Type, SrcDelim: T, DstDelim: T)
|
||||
< Seq <Seq T> >
|
||||
~ < ValueSep SrcDelim T >
|
||||
~ < Seq~<LengthPrefix x86.UInt64> T >
|
||||
|
||||
--> < Seq <Seq T> >
|
||||
~ < ValueSep DstDelim T >
|
||||
~ < Seq~<LengthPrefix x86.UInt64> T >
|
||||
```
|
||||
length_prefix_uint8_array_clear( dst );
|
||||
|
||||
uint8_t * dst_items = dst->items;
|
||||
for( uint64_t i = 0; i < src->len; ++i ) {
|
||||
if( src->items[i] == SrcDelim ) {
|
||||
length_prefix_uint8_array_push( dst, DstDelim );
|
||||
} else if( src->items[i] == DstDelim ) {
|
||||
if( DstDelim == '\n' ) {
|
||||
length_prefix_uint8_array_push( dst, '\\' );
|
||||
length_prefix_uint8_array_push( dst, 'n' );
|
||||
}
|
||||
} else {
|
||||
length_prefix_uint8_array_push( dst, src->items[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
```
|
||||
|
||||
|
||||
morph_seqseq_as_valsep_to_lenpfx (T: Type, Delim: T, EscKey: T)
|
||||
< Seq <Seq T> >
|
||||
~ < ValueSep T Delim >
|
||||
~ < Seq~<LengthPrefix x86.UInt64> T >
|
||||
|
||||
--> < Seq~<LengthPrefix x86.UInt64>
|
||||
<Seq~<LengthPrefix x86.UInt64> T >
|
||||
~ <RefMut < Seq~<LengthPrefix x86.UInt64> T>>
|
||||
~ x86.Address
|
||||
~ x86.UInt64
|
||||
>
|
||||
```
|
||||
length_prefix_uint64_array_clear( dst );
|
||||
|
||||
struct LengthPrefixUInt8Array * cur_item = NULL;
|
||||
|
||||
uint8_t const * start = &src->items[0];
|
||||
uint8_t const * cur = start;
|
||||
uint8_t const * end = &src->items[src->len];
|
||||
|
||||
while( cur < end ) {
|
||||
if( *cur == Delim || cur+1 == end ) {
|
||||
uint64_t len = cur - start;
|
||||
|
||||
cur_item = malloc( sizeof(uint64_t) + sizeof(uint8_t) * len );
|
||||
cur_item->len = len;
|
||||
memcpy( cur_item->items, start, len );
|
||||
|
||||
length_prefix_uint64_array_push( dst, (uint64_t)cur_item );
|
||||
start = ++cur;
|
||||
} else {
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
```
|
26
morphisms/zigzag.morphism-base
Normal file
26
morphisms/zigzag.morphism-base
Normal file
|
@ -0,0 +1,26 @@
|
|||
```
|
||||
```
|
||||
|
||||
morph_i64_as_twos_complement_to_zigzag ()
|
||||
ℤ ~ x86.Int64
|
||||
--> ℤ ~ ZigZagInt ~ ℕ ~ x86.UInt64
|
||||
```
|
||||
if( *src >= 0 ) {
|
||||
*dst = (2 * (uint64_t)*src)
|
||||
} else {
|
||||
*dst = (2 * (uint64_t)(- *src)) - 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
```
|
||||
|
||||
morph_i64_as_zigzag_to_twos_complement ()
|
||||
ℤ ~ ZigZagInt ~ ℕ ~ x86.UInt64
|
||||
--> ℤ ~ x86.Int64
|
||||
```
|
||||
if( *src % 2 == 0 ) {
|
||||
*dst = *src / 2;
|
||||
} else {
|
||||
*dst = - ((*src+1) / 2);
|
||||
}
|
||||
```
|
83
test/test.sh
Executable file
83
test/test.sh
Executable file
|
@ -0,0 +1,83 @@
|
|||
#!/bin/sh
|
||||
|
||||
run_test_case() {
|
||||
mkdir -p target/src
|
||||
mkdir -p .tmp
|
||||
|
||||
echo "
|
||||
-----------------------------------------------------------------------------
|
||||
Running test case ${TEST_NAME}"
|
||||
|
||||
ldmc "${SRC_TYPE}" "${DST_TYPE}" ../morphisms/*.morphism-base 2>|.tmp/ldmc_err 1>| target/src/${TEST_NAME}.c \
|
||||
|| (echo "... error at generation:"; cat .tmp/ldmc_err; return -1);
|
||||
|
||||
gcc -I../morphisms/runtime/include target/src/${TEST_NAME}.c ../morphisms/runtime/src/*.c -o target/${TEST_NAME} \
|
||||
|| (echo "... error at compilation:"; return -2);
|
||||
|
||||
|
||||
LEN="$(echo -n "${EXPECT}" | wc -c)"
|
||||
RESULT="$(echo -n ${INPUT} | ./target/${TEST_NAME} 2>.tmp/target_err | head -c ${LEN})"
|
||||
|
||||
if [ "${RESULT}" == "${EXPECT}" ];
|
||||
then
|
||||
echo "... ok"
|
||||
else
|
||||
echo -e "... incorrect result\n"
|
||||
cat .tmp/target_err
|
||||
echo -e ""
|
||||
echo -e "INPUT:\n$(echo -n "${INPUT}" | hexyl)"
|
||||
echo -e "EXPECTED:\n$(echo -n "${EXPECT}" | hexyl)"
|
||||
echo -e "GOT:\n$(echo -n "${RESULT}" | hexyl)"
|
||||
fi
|
||||
|
||||
rm -rf .tmp
|
||||
}
|
||||
|
||||
TEST_NAME=test-radix-convert
|
||||
SRC_TYPE="ℕ ~ <PosInt 10 BigEndian> ~ <Seq~<ValueTerminated 0> <Digit 10> ~ Char ~ Ascii ~ x86.UInt8>"
|
||||
DST_TYPE="ℕ ~ <PosInt 16 BigEndian> ~ <Seq~<ValueTerminated 0> <Digit 16> ~ Char ~ Ascii ~ x86.UInt8>"
|
||||
INPUT="255"
|
||||
EXPECT="ff"
|
||||
run_test_case
|
||||
|
||||
TEST_NAME=test-msb-cont
|
||||
SRC_TYPE="<Seq~<ValueTerminated 0> x86.UInt8>"
|
||||
DST_TYPE="<Seq~MsbCont x86.UInt8>"
|
||||
INPUT=$(printf '\x01\x02\x03')
|
||||
EXPECT=$(printf '\x81\x82\x03')
|
||||
run_test_case
|
||||
|
||||
TEST_NAME=test-value-sep1
|
||||
SRC_TYPE="<Seq <Seq Char~x86.UInt8>> ~ <ValueSep ':' Char~x86.UInt8> ~ <Seq~<ValueTerminated 0> Char~x86.UInt8>"
|
||||
DST_TYPE="<Seq <Seq Char~x86.UInt8>> ~ <ValueSep ',' Char~x86.UInt8> ~ <Seq~<ValueTerminated 0> Char~x86.UInt8>"
|
||||
INPUT="abc:def:hello world:test"
|
||||
EXPECT="abc,def,hello world,test"
|
||||
run_test_case
|
||||
|
||||
TEST_NAME=test-value-sep2
|
||||
SRC_TYPE="<Seq <Seq x86.UInt8>> ~ <ValueSep ':' x86.UInt8> ~ <Seq~<ValueTerminated 0> x86.UInt8>"
|
||||
DST_TYPE="<Seq <Seq x86.UInt8>> ~ <ValueSep '\\n' x86.UInt8> ~ <Seq~<ValueTerminated 0> x86.UInt8>"
|
||||
INPUT="abc:def:hello world:test"
|
||||
EXPECT=$(echo -en "abc\ndef\nhello world\ntest")
|
||||
run_test_case
|
||||
|
||||
TEST_NAME=test-value-sep-digit
|
||||
SRC_TYPE="<Seq <Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep ':' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
|
||||
DST_TYPE="<Seq <Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep '.' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
|
||||
INPUT="c0:ff:ee"
|
||||
EXPECT=$(echo -en "c0.ff.ee")
|
||||
run_test_case
|
||||
|
||||
TEST_NAME=test-utf8-to-ascii
|
||||
SRC_TYPE="<Seq Char~Unicode> ~ UTF-8 ~ <Seq ~ <ValueTerminated 0> x86.UInt8>"
|
||||
DST_TYPE="<Seq~<ValueTerminated 0> Char ~ Ascii ~ x86.UInt8>"
|
||||
INPUT="Hℵelαlo WΓΓΓorl⇒d"
|
||||
EXPECT="Hello World"
|
||||
run_test_case
|
||||
|
||||
TEST_NAME=test-value-sep-posint
|
||||
SRC_TYPE="<Seq ℕ~<PosInt 16 BigEndian>~<Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep ':' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
|
||||
DST_TYPE="<Seq ℕ~<PosInt 16 BigEndian>~<Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep '.' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
|
||||
INPUT="c0:ff:ee"
|
||||
EXPECT=$(echo -en "c0.ff.ee")
|
||||
run_test_case
|
Loading…
Add table
Add a link
Reference in a new issue