From d0118b56b1a647f066cbc58a43d038a72a0a0ccd Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Sat, 15 Mar 2025 18:49:06 +0100 Subject: [PATCH 1/3] add morphisms - unicode - value delimited seq - zigzag encoding (still lacking any way to get to signed integer yet'..) --- morphisms/unicode.morphism-base | 93 +++++++++++++++++++++++++++++ morphisms/value_delim.morphism-base | 71 ++++++++++++++++++++++ morphisms/zigzag.morphism-base | 26 ++++++++ 3 files changed, 190 insertions(+) create mode 100644 morphisms/unicode.morphism-base create mode 100644 morphisms/value_delim.morphism-base create mode 100644 morphisms/zigzag.morphism-base diff --git a/morphisms/unicode.morphism-base b/morphisms/unicode.morphism-base new file mode 100644 index 0000000..8805ace --- /dev/null +++ b/morphisms/unicode.morphism-base @@ -0,0 +1,93 @@ +``` +``` + +morph_string_as_ascii_to_utf8 () + <Seq ~ <ValueTerminated '\0'> Char~Ascii~x86.UInt8> +--> <Seq Char~Unicode> + ~ UTF-8 + ~ <Seq~<ValueTerminated 0> x86.UInt8> +``` + while( *src ) { *dst++ = *src++; } + *dst = 0; + return 0; +``` + +morph_string_as_utf8_to_ascii () + <Seq Char~Unicode> + ~ UTF-8 + ~ <Seq~<ValueTerminated 0> x86.UInt8> +--> <Seq ~ <ValueTerminated '\0'> Char~Ascii~x86.UInt8> +``` + while( *src ) { + if( *src < 128 ) { + *dst++ = *src++; + } else { + fprintf(stderr, "(morph UTF-8 to Ascii) ignore multi-byte character\n"); + while( *++src >= 128 ); + } + } + *dst = 0; + return 0; +``` + +morph_string_as_ascii_to_utf32 () + <Seq ~ <ValueTerminated '\0'> Char~Ascii~x86.UInt8> +--> <Seq Char~Unicode> + ~ UTF-32LE + ~ <Seq~<ValueTerminated 0> x86.UInt32> +``` + while( *src ) { *dst++ = *src++; } + *dst = 0; + return 0; +``` + +morph_string_as_utf8_to_utf32 () + + <Seq Char~Unicode> + ~ UTF-8 + ~ <Seq~<ValueTerminated 0> x86.UInt8> + +--> <Seq Char~Unicode> + ~ UTF-32LE + ~ <Seq~<ValueTerminated 0> x86.UInt32> + +``` + bool has_multibyte = false; + uint32_t val = 0; + while( *src ) { + uint8_t tag = (*src >> 6)&0b11; + switch( tag ) { + // single byte + case 0b00: + case 0b01: + if( has_multibyte ) { + *dst++ = val; + has_multibyte = false; + } + *dst++ = *src++; + break; + + // start multibyte + case 0b11: + if( has_multibyte ) { + *dst++ = val; + } + has_multibyte = true; + val = (*src++) & 0b111111; + break; + + // continue multibyte + case 0b10: + val <<= 6; + val |= (*src++) & 0b111111; + break; + } + } + + if( has_multibyte ) + *dst++ = val; + + *dst++ = 0; + + return 0; +``` diff --git a/morphisms/value_delim.morphism-base b/morphisms/value_delim.morphism-base new file mode 100644 index 0000000..1870ff7 --- /dev/null +++ b/morphisms/value_delim.morphism-base @@ -0,0 +1,71 @@ +``` +#include <array/length-prefix.h> +#include <stdlib.h> +``` + +morph_seqseq_valsep_uint8 (T: Type, SrcDelim: T, DstDelim: T) + < Seq <Seq T> > + ~ < ValueSep SrcDelim T > + ~ < Seq~<LengthPrefix x86.UInt64> T > + +--> < Seq <Seq T> > + ~ < ValueSep DstDelim T > + ~ < Seq~<LengthPrefix x86.UInt64> T > +``` + length_prefix_uint8_array_clear( dst ); + + uint8_t * dst_items = dst->items; + for( uint64_t i = 0; i < src->len; ++i ) { + if( src->items[i] == SrcDelim ) { + length_prefix_uint8_array_push( dst, DstDelim ); + } else if( src->items[i] == DstDelim ) { + if( DstDelim == '\n' ) { + length_prefix_uint8_array_push( dst, '\\' ); + length_prefix_uint8_array_push( dst, 'n' ); + } + } else { + length_prefix_uint8_array_push( dst, src->items[i] ); + } + } + + return 0; +``` + + +morph_seqseq_as_valsep_to_lenpfx (T: Type, Delim: T, EscKey: T) + < Seq <Seq T> > + ~ < ValueSep T Delim > + ~ < Seq~<LengthPrefix x86.UInt64> T > + +--> < Seq~<LengthPrefix x86.UInt64> + <Seq~<LengthPrefix x86.UInt64> T > + ~ <RefMut < Seq~<LengthPrefix x86.UInt64> T>> + ~ x86.Address + ~ x86.UInt64 + > +``` + length_prefix_uint64_array_clear( dst ); + + struct LengthPrefixUInt8Array * cur_item = NULL; + + uint8_t const * start = &src->items[0]; + uint8_t const * cur = start; + uint8_t const * end = &src->items[src->len]; + + while( cur < end ) { + if( *cur == Delim || cur+1 == end ) { + uint64_t len = cur - start; + + cur_item = malloc( sizeof(uint64_t) + sizeof(uint8_t) * len ); + cur_item->len = len; + memcpy( cur_item->items, start, len ); + + length_prefix_uint64_array_push( dst, (uint64_t)cur_item ); + start = ++cur; + } else { + cur++; + } + } + + return 0; +``` diff --git a/morphisms/zigzag.morphism-base b/morphisms/zigzag.morphism-base new file mode 100644 index 0000000..0d54033 --- /dev/null +++ b/morphisms/zigzag.morphism-base @@ -0,0 +1,26 @@ +``` +``` + +morph_i64_as_twos_complement_to_zigzag () + ℤ ~ x86.Int64 +--> ℤ ~ ZigZagInt ~ ℕ ~ x86.UInt64 +``` + if( *src >= 0 ) { + *dst = (2 * (uint64_t)*src) + } else { + *dst = (2 * (uint64_t)(- *src)) - 1; + } + + return 0; +``` + +morph_i64_as_zigzag_to_twos_complement () + ℤ ~ ZigZagInt ~ ℕ ~ x86.UInt64 +--> ℤ ~ x86.Int64 +``` + if( *src % 2 == 0 ) { + *dst = *src / 2; + } else { + *dst = - ((*src+1) / 2); + } +``` From 2eb3728027a0545874b81f55f4b7d3d6344efe9a Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Sat, 15 Mar 2025 18:49:29 +0100 Subject: [PATCH 2/3] add test script --- test/test.sh | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100755 test/test.sh diff --git a/test/test.sh b/test/test.sh new file mode 100755 index 0000000..402e9f1 --- /dev/null +++ b/test/test.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +run_test_case() { + mkdir -p target/src + mkdir -p .tmp + + echo " +----------------------------------------------------------------------------- +Running test case ${TEST_NAME}" + + ldmc "${SRC_TYPE}" "${DST_TYPE}" ../morphisms/*.morphism-base 2>|.tmp/ldmc_err 1>| target/src/${TEST_NAME}.c \ + || (echo "... error at generation:"; cat .tmp/ldmc_err; return -1); + + gcc -I../morphisms/runtime/include target/src/${TEST_NAME}.c ../morphisms/runtime/src/*.c -o target/${TEST_NAME} \ + || (echo "... error at compilation:"; return -2); + + + LEN="$(echo -n "${EXPECT}" | wc -c)" + RESULT="$(echo -n ${INPUT} | ./target/${TEST_NAME} 2>.tmp/target_err | head -c ${LEN})" + + if [ "${RESULT}" == "${EXPECT}" ]; + then + echo "... ok" + else + echo -e "... incorrect result\n" + cat .tmp/target_err + echo -e "" + echo -e "INPUT:\n$(echo -n "${INPUT}" | hexyl)" + echo -e "EXPECTED:\n$(echo -n "${EXPECT}" | hexyl)" + echo -e "GOT:\n$(echo -n "${RESULT}" | hexyl)" + fi + + rm -rf .tmp +} + +TEST_NAME=test-radix-convert +SRC_TYPE="ℕ ~ <PosInt 10 BigEndian> ~ <Seq~<ValueTerminated 0> <Digit 10> ~ Char ~ Ascii ~ x86.UInt8>" +DST_TYPE="ℕ ~ <PosInt 16 BigEndian> ~ <Seq~<ValueTerminated 0> <Digit 16> ~ Char ~ Ascii ~ x86.UInt8>" +INPUT="255" +EXPECT="ff" +run_test_case + +TEST_NAME=test-msb-cont +SRC_TYPE="<Seq~<ValueTerminated 0> x86.UInt8>" +DST_TYPE="<Seq~MsbCont x86.UInt8>" +INPUT=$(printf '\x01\x02\x03') +EXPECT=$(printf '\x81\x82\x03') +run_test_case + +TEST_NAME=test-value-sep1 +SRC_TYPE="<Seq <Seq Char~x86.UInt8>> ~ <ValueSep ':' Char~x86.UInt8> ~ <Seq~<ValueTerminated 0> Char~x86.UInt8>" +DST_TYPE="<Seq <Seq Char~x86.UInt8>> ~ <ValueSep ',' Char~x86.UInt8> ~ <Seq~<ValueTerminated 0> Char~x86.UInt8>" +INPUT="abc:def:hello world:test" +EXPECT="abc,def,hello world,test" +run_test_case + +TEST_NAME=test-value-sep2 +SRC_TYPE="<Seq <Seq x86.UInt8>> ~ <ValueSep ':' x86.UInt8> ~ <Seq~<ValueTerminated 0> x86.UInt8>" +DST_TYPE="<Seq <Seq x86.UInt8>> ~ <ValueSep '\\n' x86.UInt8> ~ <Seq~<ValueTerminated 0> x86.UInt8>" +INPUT="abc:def:hello world:test" +EXPECT=$(echo -en "abc\ndef\nhello world\ntest") +run_test_case + +TEST_NAME=test-value-sep-digit +SRC_TYPE="<Seq <Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep ':' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>" +DST_TYPE="<Seq <Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep '.' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>" +INPUT="c0:ff:ee" +EXPECT=$(echo -en "c0.ff.ee") +run_test_case + +TEST_NAME=test-utf8-to-ascii +SRC_TYPE="<Seq Char~Unicode> ~ UTF-8 ~ <Seq ~ <ValueTerminated 0> x86.UInt8>" +DST_TYPE="<Seq~<ValueTerminated 0> Char ~ Ascii ~ x86.UInt8>" +INPUT="Hℵelαlo WΓΓΓorl⇒d" +EXPECT="Hello World" +run_test_case + +TEST_NAME=test-value-sep-posint +SRC_TYPE="<Seq ℕ~<PosInt 16 BigEndian>~<Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep ':' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>" +DST_TYPE="<Seq ℕ~<PosInt 16 BigEndian>~<Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep '.' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>" +INPUT="c0:ff:ee" +EXPECT=$(echo -en "c0.ff.ee") +run_test_case From 90eb43475b8c969e88473a2ccc47b68f8907c21a Mon Sep 17 00:00:00 2001 From: Michael Sippel <micha@fragmental.art> Date: Sat, 15 Mar 2025 18:49:39 +0100 Subject: [PATCH 3/3] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target