From d0118b56b1a647f066cbc58a43d038a72a0a0ccd Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Sat, 15 Mar 2025 18:49:06 +0100
Subject: [PATCH 1/3] add morphisms

- unicode
- value delimited seq
- zigzag encoding (still lacking any way to get to signed integer yet'..)
---
 morphisms/unicode.morphism-base     | 93 +++++++++++++++++++++++++++++
 morphisms/value_delim.morphism-base | 71 ++++++++++++++++++++++
 morphisms/zigzag.morphism-base      | 26 ++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 morphisms/unicode.morphism-base
 create mode 100644 morphisms/value_delim.morphism-base
 create mode 100644 morphisms/zigzag.morphism-base

diff --git a/morphisms/unicode.morphism-base b/morphisms/unicode.morphism-base
new file mode 100644
index 0000000..8805ace
--- /dev/null
+++ b/morphisms/unicode.morphism-base
@@ -0,0 +1,93 @@
+```
+```
+
+morph_string_as_ascii_to_utf8 ()
+     <Seq ~ <ValueTerminated '\0'>  Char~Ascii~x86.UInt8>
+-->  <Seq Char~Unicode>
+    ~ UTF-8
+    ~ <Seq~<ValueTerminated 0> x86.UInt8>
+```
+    while( *src ) { *dst++ = *src++; }
+    *dst = 0;
+    return 0;
+```
+
+morph_string_as_utf8_to_ascii ()
+    <Seq Char~Unicode>
+    ~ UTF-8
+    ~ <Seq~<ValueTerminated 0> x86.UInt8>
+--> <Seq ~ <ValueTerminated '\0'>  Char~Ascii~x86.UInt8>
+```
+    while( *src ) {
+        if( *src < 128 ) {
+            *dst++ = *src++;
+        } else {
+            fprintf(stderr, "(morph UTF-8 to Ascii) ignore multi-byte character\n");
+            while( *++src >= 128 );
+        }
+    }
+    *dst = 0;
+    return 0;
+```
+
+morph_string_as_ascii_to_utf32 ()
+     <Seq ~ <ValueTerminated '\0'>  Char~Ascii~x86.UInt8>
+-->  <Seq Char~Unicode>
+    ~ UTF-32LE
+    ~ <Seq~<ValueTerminated 0> x86.UInt32>
+```
+    while( *src ) { *dst++ = *src++; }
+    *dst = 0;
+    return 0;
+```
+
+morph_string_as_utf8_to_utf32 ()
+
+      <Seq Char~Unicode>
+    ~ UTF-8
+    ~ <Seq~<ValueTerminated 0> x86.UInt8>
+
+-->   <Seq Char~Unicode>
+    ~ UTF-32LE
+    ~ <Seq~<ValueTerminated 0> x86.UInt32>
+
+```
+    bool has_multibyte = false;
+    uint32_t val = 0;
+    while( *src ) {
+        uint8_t tag = (*src >> 6)&0b11;
+        switch( tag ) {
+            // single byte
+            case 0b00:
+            case 0b01:
+                if( has_multibyte ) {
+                    *dst++ = val;
+                    has_multibyte = false;
+                }
+                *dst++ = *src++;
+                break;
+
+            // start multibyte
+            case 0b11:
+                if( has_multibyte ) {
+                    *dst++ = val;
+                }
+                has_multibyte = true;
+                val = (*src++) & 0b111111;
+                break;
+
+            // continue multibyte
+            case 0b10:
+                val <<= 6;
+                val |= (*src++) & 0b111111;
+                break;
+        }
+    }
+
+    if( has_multibyte )
+        *dst++ = val;
+
+    *dst++ = 0;
+
+    return 0;
+```
diff --git a/morphisms/value_delim.morphism-base b/morphisms/value_delim.morphism-base
new file mode 100644
index 0000000..1870ff7
--- /dev/null
+++ b/morphisms/value_delim.morphism-base
@@ -0,0 +1,71 @@
+```
+#include <array/length-prefix.h>
+#include <stdlib.h>
+```
+
+morph_seqseq_valsep_uint8 (T: Type, SrcDelim: T, DstDelim: T)
+      < Seq <Seq T> >
+    ~ < ValueSep SrcDelim T >
+    ~ < Seq~<LengthPrefix x86.UInt64> T >
+
+-->   < Seq <Seq T> >
+    ~ < ValueSep DstDelim T >
+    ~ < Seq~<LengthPrefix x86.UInt64> T >
+```
+    length_prefix_uint8_array_clear( dst );
+
+    uint8_t * dst_items = dst->items;
+    for( uint64_t i = 0; i < src->len; ++i ) {
+        if( src->items[i] == SrcDelim ) {
+            length_prefix_uint8_array_push( dst, DstDelim );
+        } else if( src->items[i] == DstDelim ) {
+            if( DstDelim == '\n' ) {
+                length_prefix_uint8_array_push( dst, '\\' );
+                length_prefix_uint8_array_push( dst, 'n' );
+            }
+        } else {
+            length_prefix_uint8_array_push( dst, src->items[i] );
+        }
+    }
+
+    return 0;
+```
+
+
+morph_seqseq_as_valsep_to_lenpfx (T: Type, Delim: T, EscKey: T)
+      < Seq <Seq T> >
+    ~ < ValueSep T Delim >
+    ~ < Seq~<LengthPrefix x86.UInt64> T >
+
+--> < Seq~<LengthPrefix x86.UInt64>
+        <Seq~<LengthPrefix x86.UInt64> T >
+        ~ <RefMut < Seq~<LengthPrefix x86.UInt64> T>>
+        ~ x86.Address
+        ~ x86.UInt64
+    >
+```
+    length_prefix_uint64_array_clear( dst );
+
+    struct LengthPrefixUInt8Array * cur_item = NULL;
+
+    uint8_t const * start = &src->items[0];
+    uint8_t const * cur = start;
+    uint8_t const * end = &src->items[src->len];
+
+    while( cur < end ) {
+        if( *cur == Delim || cur+1 == end ) {
+            uint64_t len = cur - start;
+
+            cur_item = malloc( sizeof(uint64_t) + sizeof(uint8_t) * len );
+            cur_item->len = len;
+            memcpy( cur_item->items, start, len );
+
+            length_prefix_uint64_array_push( dst, (uint64_t)cur_item );
+            start = ++cur;
+        } else {
+            cur++;
+        }
+    }
+
+    return 0;
+```
diff --git a/morphisms/zigzag.morphism-base b/morphisms/zigzag.morphism-base
new file mode 100644
index 0000000..0d54033
--- /dev/null
+++ b/morphisms/zigzag.morphism-base
@@ -0,0 +1,26 @@
+```
+```
+
+morph_i64_as_twos_complement_to_zigzag ()
+    ℤ ~ x86.Int64
+--> ℤ ~ ZigZagInt ~ ℕ ~ x86.UInt64
+```
+    if( *src >= 0 ) {
+        *dst = (2 * (uint64_t)*src)
+    } else {
+        *dst = (2 * (uint64_t)(- *src)) - 1;
+    }
+
+    return 0;
+```
+
+morph_i64_as_zigzag_to_twos_complement ()
+    ℤ ~ ZigZagInt ~ ℕ ~ x86.UInt64
+--> ℤ ~ x86.Int64
+```
+    if( *src % 2 == 0 ) {
+        *dst = *src / 2;
+    } else {
+        *dst = - ((*src+1) / 2);
+    }
+```

From 2eb3728027a0545874b81f55f4b7d3d6344efe9a Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Sat, 15 Mar 2025 18:49:29 +0100
Subject: [PATCH 2/3] add test script

---
 test/test.sh | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100755 test/test.sh

diff --git a/test/test.sh b/test/test.sh
new file mode 100755
index 0000000..402e9f1
--- /dev/null
+++ b/test/test.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+run_test_case() {
+    mkdir -p target/src
+    mkdir -p .tmp
+
+    echo "
+-----------------------------------------------------------------------------
+Running test case ${TEST_NAME}"
+
+    ldmc "${SRC_TYPE}" "${DST_TYPE}" ../morphisms/*.morphism-base 2>|.tmp/ldmc_err 1>| target/src/${TEST_NAME}.c \
+    || (echo "... error at generation:"; cat .tmp/ldmc_err; return -1);
+
+    gcc -I../morphisms/runtime/include target/src/${TEST_NAME}.c ../morphisms/runtime/src/*.c -o target/${TEST_NAME} \
+    || (echo "... error at compilation:"; return -2);
+
+
+    LEN="$(echo -n "${EXPECT}" | wc -c)"
+    RESULT="$(echo -n ${INPUT} | ./target/${TEST_NAME} 2>.tmp/target_err | head -c ${LEN})"
+
+    if [ "${RESULT}" == "${EXPECT}" ];
+    then
+        echo "... ok"
+    else
+        echo -e "... incorrect result\n"
+        cat .tmp/target_err
+        echo -e ""
+        echo -e "INPUT:\n$(echo -n "${INPUT}" | hexyl)"
+        echo -e "EXPECTED:\n$(echo -n "${EXPECT}" | hexyl)"
+        echo -e "GOT:\n$(echo -n "${RESULT}" | hexyl)"
+    fi
+
+    rm -rf .tmp
+}
+
+TEST_NAME=test-radix-convert
+SRC_TYPE="ℕ ~ <PosInt 10 BigEndian> ~ <Seq~<ValueTerminated 0> <Digit 10> ~ Char ~ Ascii ~ x86.UInt8>"
+DST_TYPE="ℕ ~ <PosInt 16 BigEndian> ~ <Seq~<ValueTerminated 0> <Digit 16> ~ Char ~ Ascii ~ x86.UInt8>"
+INPUT="255"
+EXPECT="ff"
+run_test_case
+
+TEST_NAME=test-msb-cont
+SRC_TYPE="<Seq~<ValueTerminated 0> x86.UInt8>"
+DST_TYPE="<Seq~MsbCont x86.UInt8>"
+INPUT=$(printf '\x01\x02\x03')
+EXPECT=$(printf '\x81\x82\x03')
+run_test_case
+
+TEST_NAME=test-value-sep1
+SRC_TYPE="<Seq <Seq Char~x86.UInt8>> ~ <ValueSep ':' Char~x86.UInt8> ~ <Seq~<ValueTerminated 0> Char~x86.UInt8>"
+DST_TYPE="<Seq <Seq Char~x86.UInt8>> ~ <ValueSep ',' Char~x86.UInt8> ~ <Seq~<ValueTerminated 0> Char~x86.UInt8>"
+INPUT="abc:def:hello world:test"
+EXPECT="abc,def,hello world,test"
+run_test_case
+
+TEST_NAME=test-value-sep2
+SRC_TYPE="<Seq <Seq x86.UInt8>> ~ <ValueSep ':' x86.UInt8> ~ <Seq~<ValueTerminated 0> x86.UInt8>"
+DST_TYPE="<Seq <Seq x86.UInt8>> ~ <ValueSep '\\n' x86.UInt8> ~ <Seq~<ValueTerminated 0> x86.UInt8>"
+INPUT="abc:def:hello world:test"
+EXPECT=$(echo -en "abc\ndef\nhello world\ntest")
+run_test_case
+
+TEST_NAME=test-value-sep-digit
+SRC_TYPE="<Seq <Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep ':' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
+DST_TYPE="<Seq <Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep '.' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
+INPUT="c0:ff:ee"
+EXPECT=$(echo -en "c0.ff.ee")
+run_test_case
+
+TEST_NAME=test-utf8-to-ascii
+SRC_TYPE="<Seq Char~Unicode> ~ UTF-8 ~ <Seq ~ <ValueTerminated 0> x86.UInt8>"
+DST_TYPE="<Seq~<ValueTerminated 0>  Char ~ Ascii ~ x86.UInt8>"
+INPUT="Hℵelαlo WΓΓΓorl⇒d"
+EXPECT="Hello World"
+run_test_case
+
+TEST_NAME=test-value-sep-posint
+SRC_TYPE="<Seq ℕ~<PosInt 16 BigEndian>~<Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep ':' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
+DST_TYPE="<Seq ℕ~<PosInt 16 BigEndian>~<Seq <Digit 16>~Char~Ascii~x86.UInt8>> ~ <ValueSep '.' Char~Ascii~x86.UInt8> ~ <Seq~<ValueTerminated '\0'> Char~Ascii~x86.UInt8>"
+INPUT="c0:ff:ee"
+EXPECT=$(echo -en "c0.ff.ee")
+run_test_case

From 90eb43475b8c969e88473a2ccc47b68f8907c21a Mon Sep 17 00:00:00 2001
From: Michael Sippel <micha@fragmental.art>
Date: Sat, 15 Mar 2025 18:49:39 +0100
Subject: [PATCH 3/3] gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..eb5a316
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+target