From 96a1a153d4834ea5b2add47d5f2e081c3c8b2fbc Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 17 Nov 2025 09:00:23 +0100 Subject: [PATCH 01/17] PATCH_OBJECT_ARM32_ABS patching added to support 32 bit ARM --- src/copapy/_binwrite.py | 1 + src/coparun/runmem.c | 42 +++++++++++++++++++++++++++++++++++++---- src/coparun/runmem.h | 1 + 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/copapy/_binwrite.py b/src/copapy/_binwrite.py index 27147bc..ff77918 100644 --- a/src/copapy/_binwrite.py +++ b/src/copapy/_binwrite.py @@ -10,6 +10,7 @@ Command = Enum('Command', [('ALLOCATE_DATA', 1), ('COPY_DATA', 2), ('PATCH_OBJECT_HI21', 0x2001), ('PATCH_OBJECT_ABS', 0x2002), ('PATCH_OBJECT_REL', 0x2003), + ('PATCH_OBJECT_ARM32_ABS', 0x2004), ('ENTRY_POINT', 7), ('RUN_PROG', 64), ('READ_DATA', 65), ('END_COM', 256), ('FREE_MEMORY', 257), ('DUMP_CODE', 258)]) diff --git a/src/coparun/runmem.c b/src/coparun/runmem.c index 1def456..b0ddbd3 100644 --- a/src/coparun/runmem.c +++ b/src/coparun/runmem.c @@ -40,6 +40,30 @@ void patch_hi21(uint8_t *patch_addr, int32_t page_offset) { *(uint32_t *)patch_addr = instr; } +void patch_arm32_abs(uint8_t *patch_addr, uint32_t imm16) +{ + uint32_t i = (imm16 >> 11) & 0x1; + uint32_t imm4 = (imm16 >> 12) & 0xF; + uint32_t imm3 = (imm16 >> 8) & 0x7; + uint32_t imm8 = imm16 & 0xFF; + + uint32_t instr = *((uint32_t *)patch_addr); + + // Clear the fields we are going to replace: + // imm4 → bits 19:16 + // imm3 → bits 14:12 + // i → bit 26 + // imm8 → bits 7:0 + instr &= ~(uint32_t)((0xF << 16) | (0x7 << 12) | (1 << 26) | 0xFF); + + instr |= (imm4 << 16); + instr |= (imm3 << 12); + instr |= (i << 26); + instr |= (imm8); + + *((uint32_t *)patch_addr) = instr; +} + void free_memory() { deallocate_memory(executable_memory, executable_memory_len); deallocate_memory(data_memory, data_memory_len); @@ -141,7 +165,7 @@ int parse_commands(uint8_t *bytes) { case PATCH_OBJECT_REL: offs = *(uint32_t*)bytes; bytes += 4; - patch_mask = *(uint32_t*)bytes; bytes += 4; + bytes += 4; patch_scale = *(int32_t*)bytes; bytes += 4; value = *(int32_t*)bytes; bytes += 4; LOG("PATCH_OBJECT_REL patch_offs=%i patch_addr=%p scale=%i value=%i\n", @@ -150,13 +174,23 @@ int parse_commands(uint8_t *bytes) { break; case PATCH_OBJECT_HI21: + offs = *(uint32_t*)bytes; bytes += 4; + bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; + value = *(int32_t*)bytes; bytes += 4; + LOG("PATCH_OBJECT_HI21 patch_offs=%i scale=%i value=%i res_value=%i\n", + offs, patch_scale, value, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); + patch_hi21(executable_memory + offs, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); + break; + + case PATCH_OBJECT_ARM32_ABS: offs = *(uint32_t*)bytes; bytes += 4; patch_mask = *(uint32_t*)bytes; bytes += 4; patch_scale = *(int32_t*)bytes; bytes += 4; value = *(int32_t*)bytes; bytes += 4; - LOG("PATCH_OBJECT_HI21 patch_offs=%i patch_mask=%#08x scale=%i value=%i res_value=%i\n", - offs, patch_mask, patch_scale, value, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); - patch_hi21(executable_memory + offs, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); + LOG("PATCH_OBJECT_MOVW_ABS patch_offs=%i value=%i\n", + offs, value); + patch_arm32_abs(executable_memory + offs, (uint32_t)((uintptr_t)(data_memory + value) & patch_mask) / (uint32_t)patch_scale); break; case ENTRY_POINT: diff --git a/src/coparun/runmem.h b/src/coparun/runmem.h index 983d0f0..7f11dde 100644 --- a/src/coparun/runmem.h +++ b/src/coparun/runmem.h @@ -24,6 +24,7 @@ #define PATCH_OBJECT_HI21 0x2001 #define PATCH_OBJECT_ABS 0x2002 #define PATCH_OBJECT_REL 0x2003 +#define PATCH_OBJECT_ARM32_ABS 0x2004 #define ENTRY_POINT 7 #define RUN_PROG 64 #define READ_DATA 65 From 3b1c8f0e7274a54f63f877352fb9c72daf0e3b1f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 17 Nov 2025 09:01:09 +0100 Subject: [PATCH 02/17] detection for different arm architectures added --- src/copapy/_stencils.py | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index 35e7483..865b865 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -49,7 +49,8 @@ def detect_process_arch() -> str: elif arch in ('arm64', 'aarch64'): arch_family = 'arm64' elif 'arm' in arch: - arch_family = 'arm' + # Detect specific ARM version for Raspberry Pi (v6, v7, etc.) + arch_family = _detect_arm_version() elif 'mips' in arch: arch_family = 'mips64' if bits == 64 else 'mips' elif 'riscv' in arch: @@ -60,6 +61,23 @@ def detect_process_arch() -> str: return arch_family +def _detect_arm_version() -> str: + """Detect specific ARM version from /proc/cpuinfo on Linux. + """ + with open('/proc/cpuinfo', 'r') as f: + cpuinfo = f.read() + # Look for "CPU Architecture:" field which contains version info + for line in cpuinfo.split('\n'): + if line.startswith('CPU Architecture:'): + # Extracts "ARMv6", "ARMv7", "ARMv8", etc. + arch_str = line.split(':')[1].strip().lower() + if 'armv6' in arch_str: + return 'armv6' + elif 'armv7' in arch_str or 'armv8' in arch_str: + return 'armv7' # ARMv8 in 32-bit -> armv7 compatible + raise NotImplementedError(f"Unsupported ARM architecture version. CPU info: {cpuinfo}") + + def get_return_function_type(symbol: elf_symbol) -> str: if symbol.relocations: for reloc in reversed(symbol.relocations): @@ -273,8 +291,23 @@ class stencil_database(): scale = 8 #print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") + elif pr.type.endswith('_MOVW_ABS_NC'): + # R_ARM_MOVW_ABS_NC + # (S + A) & 0xFFFF + mask = 0xFFFF + patch_value = symbol_address + pr.fields['r_addend'] + symbol_type = symbol_type + 0x04 # Absolut value + + elif pr.type.endswith('_MOVT_ABS'): + # R_ARM_MOVT_ABS + # (S + A) & 0xFFFF0000 + mask = 0xFFFF0000 + patch_value = symbol_address + pr.fields['r_addend'] + symbol_type = symbol_type + 0x04 # Absolut value + scale = 0x10000 + else: - raise NotImplementedError(f"Relocation type {pr.type} not implemented") + raise NotImplementedError(f"Relocation type {pr.type} in {relocation.pelfy_reloc.target_section.name} pointing to {relocation.pelfy_reloc.symbol.name} not implemented") return patch_entry(mask, patch_offset, patch_value, scale, symbol_type) From a93cdd4b8156f4da99b91b7897e698496d8d707d Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 17 Nov 2025 09:03:27 +0100 Subject: [PATCH 03/17] ci and build scripts updated to build arm v6 and v7 --- tools/build.bat | 26 ++++++++++++++++++++++--- tools/cross_compiler_unix/Dockerfile | 2 +- tools/cross_compiler_unix/build_musl.sh | 4 ++++ tools/crosscompile.sh | 10 +++++++--- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/tools/build.bat b/tools/build.bat index d93eda4..bd7c053 100644 --- a/tools/build.bat +++ b/tools/build.bat @@ -29,18 +29,38 @@ echo - Build runner for linux x86 32 bit... wsl i686-linux-gnu-gcc-12 -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-x86 echo - Build stencils x86 32 bit... -REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh i686-linux-gnu-gcc-12 i686-linux-gnu-ld ../copapy/build/musl/musl_objects_x86.o +REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh i686-linux-gnu-gcc-12 i686-linux-gnu-ld ../copapy/build/musl/musl_objects_x86.o -fno-pic wsl i686-linux-gnu-gcc-12 -fno-pic -ffunction-sections -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o wsl i686-linux-gnu-ld -r build/stencils/stencils.o build/musl/musl_objects_x86.o -o src/copapy/obj/stencils_x86_O3.o wsl i686-linux-gnu-objdump -d -x src/copapy/obj/stencils_x86_O3.o > build/stencils/stencils_x86_O3.asm echo --------------arm64 64 bit---------------- -echo - Build stencils for aarch64... wsl aarch64-linux-gnu-gcc-12 -fno-pic -ffunction-sections -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o wsl aarch64-linux-gnu-ld -r build/stencils/stencils.o build/musl/musl_objects_arm64.o -o src/copapy/obj/stencils_arm64_O3.o wsl aarch64-linux-gnu-objdump -d -x src/copapy/obj/stencils_arm64_O3.o > build/stencils/stencils_arm64_O3.asm - echo ------------------------------ echo - Build runner for Aarch64... wsl aarch64-linux-gnu-gcc-12 -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-aarch64 + + +echo --------------arm-v6 32 bit---------------- +REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld ../copapy/build/musl/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -marm" +wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv6 -mfpu=vfp -marm -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o +wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv6.o -o src/copapy/obj/stencils_armv6_O3.o +wsl arm-none-eabi-objdump -d -x src/copapy/obj/stencils_armv6_O3.o > build/stencils/stencils_armv6_O3.asm +echo ------------------------------ +REM echo - Build runner +REM wsl arm-none-eabi-gcc -march=armv6 -mfpu=vfp -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv6 + + + +echo --------------arm-v7 32 bit---------------- +REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld ../copapy/build/musl/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -marm" +wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -marm -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o +wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o -o src/copapy/obj/stencils_armv7_O3.o +wsl arm-none-eabi-objdump -d -x src/copapy/obj/stencils_armv7_O3.o > build/stencils/stencils_armv7_O3.asm +echo ------------------------------ +echo - Build runner +wsl arm-linux-gnueabihf-gcc -march=armv7-a -mfpu=neon-vfpv3 -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv7 + diff --git a/tools/cross_compiler_unix/Dockerfile b/tools/cross_compiler_unix/Dockerfile index acfd78c..7ba23ae 100644 --- a/tools/cross_compiler_unix/Dockerfile +++ b/tools/cross_compiler_unix/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y \ build-essential \ gcc-13 \ gcc-13-i686-linux-gnu \ - gcc-13-arm-linux-gnueabihf \ + gcc-13-arm-none-eabi \ gcc-13-mips-linux-gnu \ gcc-13-mipsel-linux-gnu \ gcc-13-riscv64-linux-gnu \ diff --git a/tools/cross_compiler_unix/build_musl.sh b/tools/cross_compiler_unix/build_musl.sh index 8c66202..3727284 100644 --- a/tools/cross_compiler_unix/build_musl.sh +++ b/tools/cross_compiler_unix/build_musl.sh @@ -16,6 +16,10 @@ sh ../packobjs.sh i686-linux-gnu-gcc-13 i686-linux-gnu-ld /object_files/musl_obj sh ../packobjs.sh aarch64-linux-gnu-gcc-13 aarch64-linux-gnu-ld /object_files/musl_objects_arm64.o +sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -marm" + +sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -marm" + #sh ../packobjs.sh mips mips-linux-gnu-gcc-13 mips-linux-gnu-ld #sh ../packobjs.sh riscv64 riscv64-linux-gnu-gcc-13 riscv64-linux-gnu-ld diff --git a/tools/crosscompile.sh b/tools/crosscompile.sh index 9b634bc..10d81d6 100644 --- a/tools/crosscompile.sh +++ b/tools/crosscompile.sh @@ -31,9 +31,13 @@ i686-linux-gnu-ld -r $STMP /object_files/musl_objects_x86.o -o $DEST/stencils_x8 aarch64-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $STMP aarch64-linux-gnu-ld -r $STMP /object_files/musl_objects_arm64.o -o $DEST/stencils_arm64_$OPT.o -# ARMv7 -#arm-linux-gnueabihf-gcc-13 $FLAGS -$OPT -c $SRC -o $STMP -#arm-linux-gnu-ld -r $STMP /object_files/musl_objects_arm.o -o $DEST/stencils_arm_$OPT.o +# ARMv6 hardware fp +arm-none-eabi-gcc-13 -march=armv6 -mfpu=vfp -marm $FLAGS -$OPT -c $SRC -o $STMP +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv6.o -o $DEST/stencils_armv6_$OPT.o + +# ARMv7 hardware fp +arm-none-eabi-gcc-13 -march=armv7-a -mfpu=neon-vfpv3 -marm $FLAGS -$OPT -c $SRC -o $STMP +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7.o -o $DEST/stencils_armv7_$OPT.o # PowerPC64LE # powerpc64le-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $DEST/stencils_ppc64le_$OPT.o From 1e654c65f80fd98dd7685330ba36ab27871e65a2 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 17 Nov 2025 09:04:10 +0100 Subject: [PATCH 04/17] test added for 32 bit arm --- tests/test_compile_armv7.py | 98 +++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 tests/test_compile_armv7.py diff --git a/tests/test_compile_armv7.py b/tests/test_compile_armv7.py new file mode 100644 index 0000000..ce5451f --- /dev/null +++ b/tests/test_compile_armv7.py @@ -0,0 +1,98 @@ +from copapy import NumLike +from copapy.backend import Write, compile_to_dag, add_read_command +import subprocess +from copapy import _binwrite +import copapy.backend as backend +import copapy as cp +import os +import warnings +import pytest + +if os.name == 'nt': + # On Windows wsl and qemu-user is required: + # sudo apt install qemu-user + qemu_command = ['wsl', 'qemu-arm'] +else: + qemu_command = ['qemu-arm'] + + +def run_command(command: list[str]) -> str: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + assert result.returncode != 11, f"SIGSEGV (segmentation fault)\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + assert result.returncode == 0, f"\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + return result.stdout + + +def check_for_qemu() -> bool: + command = qemu_command + ['--version'] + try: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + except Exception: + return False + return result.returncode == 0 + + +def function(c1: NumLike, c2: NumLike) -> tuple[NumLike, ...]: + i1 = c1 // 3.3 + 5 + i2 = c2 * 5 + c1 + r1 = i1 + i2 * 55 / 4 + r2 = 4 * i2 + 5 + + return i1, i2, r1, r2 + + +@pytest.mark.runner +def test_compile(): + #t1 = cp.vector([10, 11, 12]) + cp.vector(cp.variable(v) for v in range(3)) + #t2 = t1.sum() + + #t3 = cp.vector(cp.variable(1 / (v + 1)) for v in range(3)) + #t4 = ((t3 * t1) * 2).sum() + #t5 = ((t3 * t1) * 2).magnitude() + + #ret = (t2, t4, t5) + + ret = (cp.variable(5) * 5 + 8,) + + out = [Write(r) for r in ret] + + sdb = backend.stencil_db_from_package('armv7') + il, variables = compile_to_dag(out, sdb) + + # run program command + il.write_com(_binwrite.Command.RUN_PROG) + #il.write_com(_binwrite.Command.DUMP_CODE) + + for net in ret: + assert isinstance(net, backend.Net) + add_read_command(il, variables, net) + + il.write_com(_binwrite.Command.END_COM) + + print('* Data to runner:') + il.print() + + il.to_file('build/runner/test-armv7.copapy') + + if not check_for_qemu(): + warnings.warn("qemu-aarch64 not found, aarch64 test skipped!", UserWarning) + elif not os.path.isfile('build/runner/coparun-aarch64'): + warnings.warn("aarch64 runner not found, aarch64 test skipped!", UserWarning) + else: + command = ['build/runner/coparun-armv7', 'build/runner/test-armv7.copapy', 'build/runner/test-armv7.copapy.bin'] + result = run_command(qemu_command + command) + print('* Output from runner:\n--') + print(result) + print('--') + + assert 'Return value: 1' in result + + # Compare to x86_64 reference results + assert " size=4 data=24 00 00 00" in result + assert " size=4 data=56 55 25 42" in result + assert " size=4 data=B4 F9 C8 41" in result + + +if __name__ == "__main__": + #test_example() + test_compile() From fcd34e693729af9800a2422683f7e84714e7f533 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 23 Nov 2025 17:10:53 +0100 Subject: [PATCH 05/17] stencil alignment for ARM set to 4 - gcc complaints otherwise --- stencils/stencil_helper.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stencils/stencil_helper.h b/stencils/stencil_helper.h index e753047..35ff96b 100644 --- a/stencils/stencil_helper.h +++ b/stencils/stencil_helper.h @@ -3,7 +3,11 @@ // Remove function alignment for stencils #if defined(__GNUC__) #define NOINLINE __attribute__((noinline)) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(__thumb__) || defined(_M_ARM) +#define STENCIL __attribute__((aligned(4))) +#else #define STENCIL __attribute__((aligned(1))) +#endif #else #define NOINLINE #define STENCIL From d5786d29572fe7a3aa1f58255f93a42712d2512f Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 23 Nov 2025 17:11:55 +0100 Subject: [PATCH 06/17] addend retrieval removed from patch_arm32_abs (moved to pelfy) --- src/coparun/runmem.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/coparun/runmem.c b/src/coparun/runmem.c index b0ddbd3..7e0762e 100644 --- a/src/coparun/runmem.c +++ b/src/coparun/runmem.c @@ -42,24 +42,18 @@ void patch_hi21(uint8_t *patch_addr, int32_t page_offset) { void patch_arm32_abs(uint8_t *patch_addr, uint32_t imm16) { - uint32_t i = (imm16 >> 11) & 0x1; - uint32_t imm4 = (imm16 >> 12) & 0xF; - uint32_t imm3 = (imm16 >> 8) & 0x7; - uint32_t imm8 = imm16 & 0xFF; - uint32_t instr = *((uint32_t *)patch_addr); - // Clear the fields we are going to replace: - // imm4 → bits 19:16 - // imm3 → bits 14:12 - // i → bit 26 - // imm8 → bits 7:0 - instr &= ~(uint32_t)((0xF << 16) | (0x7 << 12) | (1 << 26) | 0xFF); + // Split the 16-bit immediate into A1 MOVT fields + uint32_t imm4 = (imm16 >> 12) & 0xF; + uint32_t imm12 = imm16 & 0xFFF; + // Clear the immediate fields: imm4 (bits 19:16) and imm12 (bits 11:0) + instr &= ~(uint32_t)((0xF << 16) | 0xFFF); + + // Set new immediate fields instr |= (imm4 << 16); - instr |= (imm3 << 12); - instr |= (i << 26); - instr |= (imm8); + instr |= imm12; *((uint32_t *)patch_addr) = instr; } @@ -188,8 +182,8 @@ int parse_commands(uint8_t *bytes) { patch_mask = *(uint32_t*)bytes; bytes += 4; patch_scale = *(int32_t*)bytes; bytes += 4; value = *(int32_t*)bytes; bytes += 4; - LOG("PATCH_OBJECT_MOVW_ABS patch_offs=%i value=%i\n", - offs, value); + LOG("PATCH_OBJECT_ARM32_ABS patch_offs=%i patch_mask=%#08x scale=%i value=%i imm16=%#04x\n", + offs, patch_mask, patch_scale, value, (uint32_t)((uintptr_t)(data_memory + value) & patch_mask) / (uint32_t)patch_scale); patch_arm32_abs(executable_memory + offs, (uint32_t)((uintptr_t)(data_memory + value) & patch_mask) / (uint32_t)patch_scale); break; From 45808d8373eaf8aa245c6462e3e8e622ca90fb7d Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 23 Nov 2025 17:17:04 +0100 Subject: [PATCH 07/17] Switched to pelfy 1.0.7 to support armv7 relocations --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e2332e6..6108eda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "pelfy>=1.0.5" + "pelfy>=1.0.7" ] [project.urls] From 4c4e83a18e1fc71032409f17145fd8033f5f1c82 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 23 Nov 2025 17:33:30 +0100 Subject: [PATCH 08/17] R_ARM_JUMP24 & R_ARM_CALL patching added --- src/copapy/_stencils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index 865b865..46aea29 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -247,6 +247,13 @@ class stencil_database(): symbol_type = symbol_type + 0x03 # Relative to data section #print(f" *> {pr.type} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {pr.bits=}, {function_offset=} {patch_offset=}") + elif pr.type.endswith('_ARM_JUMP24') or pr.type.endswith('_ARM_CALL'): + # R_ARM_JUMP24 & R_ARM_CALL + # ((S + A) - P) >> 2 + mask = 0xffffff # 24 bit + patch_value = symbol_address + pr.fields['r_addend'] - patch_offset + scale = 4 + elif pr.type.endswith('_CALL26') or pr.type.endswith('_JUMP26'): # R_AARCH64_CALL26 # ((S + A) - P) >> 2 @@ -297,6 +304,7 @@ class stencil_database(): mask = 0xFFFF patch_value = symbol_address + pr.fields['r_addend'] symbol_type = symbol_type + 0x04 # Absolut value + #print(f" *> {pr.type} {patch_value=} {symbol_address=}, {function_offset=}") elif pr.type.endswith('_MOVT_ABS'): # R_ARM_MOVT_ABS From 8db2c41e1b72b458cff6391e1a7d5f880258176b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 23 Nov 2025 17:46:03 +0100 Subject: [PATCH 09/17] ci and build scripts updated for armv7 --- .github/workflows/build_docker_image.yml | 31 +++++++++++++++++- .github/workflows/ci.yml | 37 +++++++++++++++++++--- tools/build.bat | 2 +- tools/build.sh | 24 +++++++++----- tools/cross_compiler_unix/build_musl.sh | 4 +-- tools/crosscompile.sh | 4 +-- tools/inspect.sh | 13 ++++++-- tools/{arm64_test => qemu_test}/Dockerfile | 0 8 files changed, 94 insertions(+), 21 deletions(-) rename tools/{arm64_test => qemu_test}/Dockerfile (100%) diff --git a/.github/workflows/build_docker_image.yml b/.github/workflows/build_docker_image.yml index 6f9ab27..b67c98e 100644 --- a/.github/workflows/build_docker_image.yml +++ b/.github/workflows/build_docker_image.yml @@ -61,5 +61,34 @@ jobs: run: echo "IMAGE_NAME=ghcr.io/${GITHUB_REPOSITORY_OWNER,,}/arm64_test:1" >> $GITHUB_ENV - name: Build & Push Docker image - run: docker buildx build --platform linux/arm64 --push -t $IMAGE_NAME tools/arm64_test/ + run: docker buildx build --platform linux/arm64 --push -t $IMAGE_NAME tools/qemu_test/ + docker-build-armv7: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: arm + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Set image name + run: echo "IMAGE_NAME=ghcr.io/${GITHUB_REPOSITORY_OWNER,,}/armv7_test:1" >> $GITHUB_ENV + + - name: Build & Push Docker image + run: docker buildx build --platform linux/arm/v7 --push -t $IMAGE_NAME tools/qemu_test/ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6274454..41a47f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI Pipeline on: push: - branches: [main] + branches: [main, dev] pull_request: - branches: [main] + branches: [main, dev] jobs: build_stencils: @@ -149,6 +149,34 @@ jobs: name: runner-linux-arm64 path: build/runner/* + build-armv7: + needs: [build_stencils] + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: stencil-object-files + path: src/copapy/obj + - name: Set up QEMU for ARMv7 + uses: docker/setup-qemu-action@v3 + with: + platforms: linux/arm/v7 + - name: Use ARMv7 container + run: | + docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ + bash -lc "pip install . && \ + mkdir -p build/runner && \ + gcc -O3 -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \ + src/coparun/coparun.c src/coparun/mem_man.c && \ + pytest" + + - uses: actions/upload-artifact@v4 + with: + name: runner-linux-armv7 + path: build/runner/* + build-windows: needs: [build_stencils] runs-on: windows-latest @@ -204,7 +232,7 @@ jobs: path: build/runner/* release-stencils: - needs: [build_stencils, build-ubuntu, build-windows, build-arm64] + needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv7] runs-on: ubuntu-latest if: github.event_name == 'push' permissions: @@ -236,7 +264,8 @@ jobs: cp tmp/musl-object-files/* release/ cp tmp/cross-runner/coparun-* release/ cp tmp/runner-linux/coparun release/ - cp tmp/runner-linux-arm64/coparun release/ + cp tmp/runner-linux-arm64/coparun release/coparun-aarch64 + cp tmp/runner-linux-armv7/coparun release/coparun-armv7 cp tmp/runner-win/coparun*.exe release/ TAG="${{ steps.version.outputs.version }}" diff --git a/tools/build.bat b/tools/build.bat index bd7c053..5402235 100644 --- a/tools/build.bat +++ b/tools/build.bat @@ -51,7 +51,7 @@ wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv6. wsl arm-none-eabi-objdump -d -x src/copapy/obj/stencils_armv6_O3.o > build/stencils/stencils_armv6_O3.asm echo ------------------------------ REM echo - Build runner -REM wsl arm-none-eabi-gcc -march=armv6 -mfpu=vfp -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv6 +REM wsl arm-linux-gnueabihf-gcc -march=armv6 -mfpu=vfp -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv6 diff --git a/tools/build.sh b/tools/build.sh index 73f697e..884b7a5 100644 --- a/tools/build.sh +++ b/tools/build.sh @@ -1,20 +1,28 @@ #!/bin/bash set -e set -v -mkdir -p bin + +mkdir -p build/stencils +mkdir -p build/runner + SRC=build/stencils/stencils.c DEST=src/copapy/obj python3 stencils/generate_stencils.py $SRC mkdir -p $DEST -gcc --version -#gcc -fno-pic -c $SRC -O0 -o $DEST/stencils_x86_64_O0.o -#gcc -fno-pic -c $SRC -O1 -o $DEST/stencils_x86_64_O1.o -#gcc -fno-pic -c $SRC -O2 -o $DEST/stencils_x86_64_O2.o -gcc -fno-pic -c $SRC -O3 -o $DEST/stencils_x86_64_O3.o + +gcc -fno-pic -ffunction-sections -c $SRC -O3 -o build/stencils/stencils.o +ld -r build/stencils/stencils.o build/musl/musl_objects_x86_64.o -o $DEST/stencils_x86_64_O3.o +objdump -d -x $DEST/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm mkdir bin -p gcc -Wall -Wextra -Wconversion -Wsign-conversion \ -Wshadow -Wstrict-overflow -Werror -g -O3 \ - -DENABLE_LOGGING + -DENABLE_LOGGING \ src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun -#x86_64-w64-mingw32-gcc -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -Werror src/runner/runmem2.c -Wall -O3 -o bin/runmem2.exe + + +echo "--------------arm-v7 32 bit----------------" +arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -c $SRC -O3 -o build/stencils/stencils.o +arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o -o $DEST/stencils_armv7_O3.o +arm-none-eabi-objdump -d -x $DEST/stencils_armv7_O3.o > build/stencils/stencils_armv7_O3.asm +arm-linux-gnueabihf-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv7 diff --git a/tools/cross_compiler_unix/build_musl.sh b/tools/cross_compiler_unix/build_musl.sh index 3727284..a250830 100644 --- a/tools/cross_compiler_unix/build_musl.sh +++ b/tools/cross_compiler_unix/build_musl.sh @@ -16,9 +16,9 @@ sh ../packobjs.sh i686-linux-gnu-gcc-13 i686-linux-gnu-ld /object_files/musl_obj sh ../packobjs.sh aarch64-linux-gnu-gcc-13 aarch64-linux-gnu-ld /object_files/musl_objects_arm64.o -sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -marm" +sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -mfloat-abi=hard -marm" -sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -marm" +sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm" #sh ../packobjs.sh mips mips-linux-gnu-gcc-13 mips-linux-gnu-ld diff --git a/tools/crosscompile.sh b/tools/crosscompile.sh index 10d81d6..555fe56 100644 --- a/tools/crosscompile.sh +++ b/tools/crosscompile.sh @@ -32,11 +32,11 @@ aarch64-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $STMP aarch64-linux-gnu-ld -r $STMP /object_files/musl_objects_arm64.o -o $DEST/stencils_arm64_$OPT.o # ARMv6 hardware fp -arm-none-eabi-gcc-13 -march=armv6 -mfpu=vfp -marm $FLAGS -$OPT -c $SRC -o $STMP +arm-none-eabi-gcc-13 -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv6.o -o $DEST/stencils_armv6_$OPT.o # ARMv7 hardware fp -arm-none-eabi-gcc-13 -march=armv7-a -mfpu=neon-vfpv3 -marm $FLAGS -$OPT -c $SRC -o $STMP +arm-none-eabi-gcc-13 -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7.o -o $DEST/stencils_armv7_$OPT.o # PowerPC64LE diff --git a/tools/inspect.sh b/tools/inspect.sh index f08b019..c25e898 100644 --- a/tools/inspect.sh +++ b/tools/inspect.sh @@ -1,9 +1,16 @@ #!/bin/bash -source tools/build.sh +set -e +set -v -objdump -d -j .text src/copapy/obj/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm +sh tools/build.sh + +#objdump -d -j .text src/copapy/obj/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm python3 tools/make_example.py -python3 tools/extract_code.py "build/runner/test.copapy" "build/runner/test.copapy.bin" + +build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin objdump -D -b binary -m i386:x86-64 --adjust-vma=0x1000 build/runner/test.copapy.bin > build/runner/test.copapy.asm + +build/runner/coparun-armv7 build/runner/test-armv7.copapy build/runner/test.copapy-armv7.bin +arm-none-eabi-objdump -D -b binary -marm --adjust-vma=0x50000 build/runner/test.copapy-armv7.bin > build/runner/test.copapy-armv7.asm diff --git a/tools/arm64_test/Dockerfile b/tools/qemu_test/Dockerfile similarity index 100% rename from tools/arm64_test/Dockerfile rename to tools/qemu_test/Dockerfile From 2191889082ba33aff61c048fa90848e37d9047c6 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 23 Nov 2025 17:46:18 +0100 Subject: [PATCH 10/17] test for armv7 added --- tests/test_compile_armv7.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/test_compile_armv7.py b/tests/test_compile_armv7.py index ce5451f..bf0fa89 100644 --- a/tests/test_compile_armv7.py +++ b/tests/test_compile_armv7.py @@ -43,16 +43,14 @@ def function(c1: NumLike, c2: NumLike) -> tuple[NumLike, ...]: @pytest.mark.runner def test_compile(): - #t1 = cp.vector([10, 11, 12]) + cp.vector(cp.variable(v) for v in range(3)) - #t2 = t1.sum() + t1 = cp.vector([10, 11, 12]) + cp.vector(cp.variable(v) for v in range(3)) + t2 = t1.sum() - #t3 = cp.vector(cp.variable(1 / (v + 1)) for v in range(3)) - #t4 = ((t3 * t1) * 2).sum() - #t5 = ((t3 * t1) * 2).magnitude() + t3 = cp.vector(cp.variable(1 / (v + 1)) for v in range(3)) + t4 = ((t3 * t1) * 2).sum() + t5 = ((t3 * t1) * 2).magnitude() - #ret = (t2, t4, t5) - - ret = (cp.variable(5) * 5 + 8,) + ret = (t2, t4, t5) out = [Write(r) for r in ret] @@ -69,15 +67,15 @@ def test_compile(): il.write_com(_binwrite.Command.END_COM) - print('* Data to runner:') - il.print() + #print('* Data to runner:') + #il.print() il.to_file('build/runner/test-armv7.copapy') if not check_for_qemu(): - warnings.warn("qemu-aarch64 not found, aarch64 test skipped!", UserWarning) - elif not os.path.isfile('build/runner/coparun-aarch64'): - warnings.warn("aarch64 runner not found, aarch64 test skipped!", UserWarning) + warnings.warn("qemu-armv7 not found, aarch64 test skipped!", UserWarning) + elif not os.path.isfile('build/runner/coparun-armv7'): + warnings.warn("armv7 runner not found, aarch64 test skipped!", UserWarning) else: command = ['build/runner/coparun-armv7', 'build/runner/test-armv7.copapy', 'build/runner/test-armv7.copapy.bin'] result = run_command(qemu_command + command) From 0d119f5a0c173a452e0e636a67ddcf83394ae1dd Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 23 Nov 2025 17:55:45 +0100 Subject: [PATCH 11/17] compiler version removed for arm --- tools/cross_compiler_unix/Dockerfile | 2 +- tools/cross_compiler_unix/build_musl.sh | 4 ++-- tools/crosscompile.sh | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/cross_compiler_unix/Dockerfile b/tools/cross_compiler_unix/Dockerfile index 7ba23ae..3bef513 100644 --- a/tools/cross_compiler_unix/Dockerfile +++ b/tools/cross_compiler_unix/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y \ build-essential \ gcc-13 \ gcc-13-i686-linux-gnu \ - gcc-13-arm-none-eabi \ + gcc-arm-none-eabi \ gcc-13-mips-linux-gnu \ gcc-13-mipsel-linux-gnu \ gcc-13-riscv64-linux-gnu \ diff --git a/tools/cross_compiler_unix/build_musl.sh b/tools/cross_compiler_unix/build_musl.sh index a250830..ce4f1ef 100644 --- a/tools/cross_compiler_unix/build_musl.sh +++ b/tools/cross_compiler_unix/build_musl.sh @@ -16,9 +16,9 @@ sh ../packobjs.sh i686-linux-gnu-gcc-13 i686-linux-gnu-ld /object_files/musl_obj sh ../packobjs.sh aarch64-linux-gnu-gcc-13 aarch64-linux-gnu-ld /object_files/musl_objects_arm64.o -sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -mfloat-abi=hard -marm" +sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -mfloat-abi=hard -marm" -sh ../packobjs.sh arm-none-eabi-gcc-13 arm-none-eabi-ld /object_files/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm" +sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm" #sh ../packobjs.sh mips mips-linux-gnu-gcc-13 mips-linux-gnu-ld diff --git a/tools/crosscompile.sh b/tools/crosscompile.sh index 555fe56..da80b0c 100644 --- a/tools/crosscompile.sh +++ b/tools/crosscompile.sh @@ -32,11 +32,11 @@ aarch64-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $STMP aarch64-linux-gnu-ld -r $STMP /object_files/musl_objects_arm64.o -o $DEST/stencils_arm64_$OPT.o # ARMv6 hardware fp -arm-none-eabi-gcc-13 -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP +arm-none-eabi-gcc -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv6.o -o $DEST/stencils_armv6_$OPT.o # ARMv7 hardware fp -arm-none-eabi-gcc-13 -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP +arm-none-eabi-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7.o -o $DEST/stencils_armv7_$OPT.o # PowerPC64LE From 4504abf5cb0779b05207c9da3a427610836b39b1 Mon Sep 17 00:00:00 2001 From: Nicolas Kruse Date: Sun, 23 Nov 2025 18:31:22 +0100 Subject: [PATCH 12/17] Update musl repository URL in build scripts with a mirror --- tools/cross_compiler_unix/build_musl.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/cross_compiler_unix/build_musl.sh b/tools/cross_compiler_unix/build_musl.sh index ce4f1ef..02a6b40 100644 --- a/tools/cross_compiler_unix/build_musl.sh +++ b/tools/cross_compiler_unix/build_musl.sh @@ -5,7 +5,8 @@ set -v mkdir -p /object_files -git clone --single-branch --branch master --depth 1 https://git.musl-libc.org/git/musl +#git clone --single-branch --branch master --depth 1 https://git.musl-libc.org/git/musl +git clone --single-branch --branch master --depth 1 https://repo.or.cz/musl.git cd musl #./configure CFLAGS="-O2 -fno-stack-protector -ffast-math" From 1a062760f7eab1f8478232f1944c642b99285436 Mon Sep 17 00:00:00 2001 From: Nicolas Kruse Date: Sun, 23 Nov 2025 21:04:50 +0100 Subject: [PATCH 13/17] detect_process_arch() updated to work for armv7 --- src/copapy/_stencils.py | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index 46aea29..c00b618 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -48,9 +48,10 @@ def detect_process_arch() -> str: arch_family = 'x86' elif arch in ('arm64', 'aarch64'): arch_family = 'arm64' - elif 'arm' in arch: - # Detect specific ARM version for Raspberry Pi (v6, v7, etc.) - arch_family = _detect_arm_version() + elif 'armv7' in arch or 'armv8' in arch: + arch_family = 'armv7' # Treat armv8 (64 bit CPU) as armv7 for 32 bit + elif 'armv6' in arch: + arch_family = 'armv6' elif 'mips' in arch: arch_family = 'mips64' if bits == 64 else 'mips' elif 'riscv' in arch: @@ -61,23 +62,6 @@ def detect_process_arch() -> str: return arch_family -def _detect_arm_version() -> str: - """Detect specific ARM version from /proc/cpuinfo on Linux. - """ - with open('/proc/cpuinfo', 'r') as f: - cpuinfo = f.read() - # Look for "CPU Architecture:" field which contains version info - for line in cpuinfo.split('\n'): - if line.startswith('CPU Architecture:'): - # Extracts "ARMv6", "ARMv7", "ARMv8", etc. - arch_str = line.split(':')[1].strip().lower() - if 'armv6' in arch_str: - return 'armv6' - elif 'armv7' in arch_str or 'armv8' in arch_str: - return 'armv7' # ARMv8 in 32-bit -> armv7 compatible - raise NotImplementedError(f"Unsupported ARM architecture version. CPU info: {cpuinfo}") - - def get_return_function_type(symbol: elf_symbol) -> str: if symbol.relocations: for reloc in reversed(symbol.relocations): From 78a159b4eabfe7a3f27e51382e0db4a9005aa560 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 24 Nov 2025 09:41:34 +0100 Subject: [PATCH 14/17] linking for required libgcc function on 32 bit arm added --- tools/build.sh | 6 +++++- tools/crosscompile.sh | 6 ++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/build.sh b/tools/build.sh index 884b7a5..ae78afc 100644 --- a/tools/build.sh +++ b/tools/build.sh @@ -22,7 +22,11 @@ gcc -Wall -Wextra -Wconversion -Wsign-conversion \ echo "--------------arm-v7 32 bit----------------" +LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) +#LIBM=$(arm-none-eabi-gcc -print-file-name=libm.a) +#LIBC=$(arm-none-eabi-gcc -print-file-name=libc.a) + arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -c $SRC -O3 -o build/stencils/stencils.o -arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o -o $DEST/stencils_armv7_O3.o +arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o $LIBGCC -o $DEST/stencils_armv7_O3.o arm-none-eabi-objdump -d -x $DEST/stencils_armv7_O3.o > build/stencils/stencils_armv7_O3.asm arm-linux-gnueabihf-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv7 diff --git a/tools/crosscompile.sh b/tools/crosscompile.sh index da80b0c..67a118c 100644 --- a/tools/crosscompile.sh +++ b/tools/crosscompile.sh @@ -33,11 +33,13 @@ aarch64-linux-gnu-ld -r $STMP /object_files/musl_objects_arm64.o -o $DEST/stenci # ARMv6 hardware fp arm-none-eabi-gcc -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP -arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv6.o -o $DEST/stencils_armv6_$OPT.o +LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv6.o $LIBGCC -o $DEST/stencils_armv6_$OPT.o # ARMv7 hardware fp arm-none-eabi-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP -arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7.o -o $DEST/stencils_armv7_$OPT.o +LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7.o $LIBGCC -o $DEST/stencils_armv7_$OPT.o # PowerPC64LE # powerpc64le-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $DEST/stencils_ppc64le_$OPT.o From 77924a5655086149237bf799316469fec91e0116 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 24 Nov 2025 16:22:24 +0100 Subject: [PATCH 15/17] Aux functions are now copied with there whole section to alow for branches without relocation inside of sections. --- src/copapy/_compiler.py | 66 ++++++++++++++++++++++++----------------- src/copapy/_stencils.py | 8 +++++ 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/src/copapy/_compiler.py b/src/copapy/_compiler.py index 64585e1..4eade86 100644 --- a/src/copapy/_compiler.py +++ b/src/copapy/_compiler.py @@ -221,7 +221,7 @@ def get_section_layout(section_indexes: Iterable[int], sdb: stencil_database, of return section_list, offset -def get_aux_function_mem_layout(function_names: Iterable[str], sdb: stencil_database, offset: int = 0) -> tuple[list[tuple[str, int, int]], int]: +def get_aux_func_layout(function_names: Iterable[str], sdb: stencil_database, offset: int = 0) -> tuple[list[tuple[int, int, int]], dict[str, int], int]: """Get memory layout for the provided auxiliary functions Arguments: @@ -230,17 +230,28 @@ def get_aux_function_mem_layout(function_names: Iterable[str], sdb: stencil_data offset: Starting offset for layout Returns: - Tuple of list of (function_name, start_offset, length) and total length + Tuple of list of (section_id, start_offset, length), function address lookup dictionary, and total length """ - function_list: list[tuple[str, int, int]] = [] + function_lookup: dict[str, int] = {} + section_list: list[tuple[int, int, int]] = [] + section_cache: dict[int, int] = {} for name in function_names: - lengths = sdb.get_symbol_size(name) - offset = (offset + 15) // 16 * 16 - function_list.append((name, offset, lengths)) - offset += lengths + index = sdb.get_symbol_section_index(name) - return function_list, offset + if index in section_cache: + section_offset = section_cache[index] + function_lookup[name] = section_offset + sdb.get_symbol_offset(name) + else: + lengths = sdb.get_section_size(index) + alignment = sdb.get_section_alignment(index) + offset = (offset + alignment - 1) // alignment * alignment + section_list.append((index, offset, lengths)) + section_cache[index] = offset + function_lookup[name] = offset + sdb.get_symbol_offset(name) + offset += lengths + + return section_list, function_lookup, offset def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[binw.data_writer, dict[Net, tuple[int, int, str]]]: @@ -272,10 +283,10 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi stencil_names = {node.name for _, node in extended_output_ops} aux_function_names = sdb.get_sub_functions(stencil_names) - used_sections = sdb.const_sections_from_functions(aux_function_names | stencil_names) + used_const_sections = sdb.const_sections_from_functions(aux_function_names | stencil_names) # Write data - section_mem_layout, sections_length = get_section_layout(used_sections, sdb) + section_mem_layout, sections_length = get_section_layout(used_const_sections, sdb) variable_mem_layout, variables_data_lengths = get_data_layout(variable_list, sdb, sections_length) dw.write_com(binw.Command.ALLOCATE_DATA) dw.write_int(variables_data_lengths) @@ -298,8 +309,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi #print(f'+ {net.dtype} {net.source.value}') # prep auxiliary_functions - aux_function_mem_layout, aux_function_lengths = get_aux_function_mem_layout(aux_function_names, sdb) - aux_func_addr_lookup = {name: offs for name, offs, _ in aux_function_mem_layout} + code_section_layout, func_addr_lookup, aux_func_len = get_aux_func_layout(aux_function_names, sdb) # Prepare program code and relocations object_addr_lookup = {net: offs for net, offs, _ in variable_mem_layout} @@ -308,7 +318,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi # assemble stencils to main program and patch stencils data = sdb.get_function_code('entry_function_shell', 'start') data_list.append(data) - offset = aux_function_lengths + len(data) + offset = aux_func_len + len(data) for associated_net, node in extended_output_ops: assert node.name in sdb.stencil_definitions, f"- Warning: {node.name} stencil not found" @@ -336,7 +346,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi #print('* constants stancils', patch.type, patch.patch_address, binw.Command.PATCH_OBJECT, node.name) elif reloc.target_symbol_info == 'STT_FUNC': - func_addr = aux_func_addr_lookup[reloc.target_symbol_name] + func_addr = func_addr_lookup[reloc.target_symbol_name] patch = sdb.get_patch(reloc, func_addr, offset, binw.Command.PATCH_FUNC.value) #print(patch.type, patch.addr, binw.Command.PATCH_FUNC, node.name, '->', patch.target_symbol_name) else: @@ -355,42 +365,44 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi dw.write_int(offset) # write aux functions code - for name, start, lengths in aux_function_mem_layout: + for i, start, lengths in code_section_layout: dw.write_com(binw.Command.COPY_CODE) dw.write_int(start) dw.write_int(lengths) - dw.write_bytes(sdb.get_function_code(name)) + dw.write_bytes(sdb.get_section_data(i)) # Patch aux functions - for name, start, _ in aux_function_mem_layout: + for name, start in func_addr_lookup.items(): + #print('--> ', name, list(sdb.get_relocations(name))) for reloc in sdb.get_relocations(name): #assert reloc.target_symbol_info != 'STT_FUNC', "Not tested yet!" - if reloc.target_symbol_info in {'STT_OBJECT', 'STT_NOTYPE', 'STT_SECTION'}: + if not reloc.target_section_index: + assert reloc.pelfy_reloc.type == 'R_ARM_V4BX' + + elif reloc.target_symbol_info in {'STT_OBJECT', 'STT_NOTYPE', 'STT_SECTION'}: # Patch constants/variable addresses on heap - #print('--> DATA ', name, reloc.pelfy_reloc.symbol.name, reloc.pelfy_reloc.symbol.info, reloc.pelfy_reloc.symbol.section.name) + #print('--> DATA ', name, reloc.pelfy_reloc.symbol, reloc.pelfy_reloc.symbol.info, reloc.pelfy_reloc.symbol.section.name) assert reloc.target_section_index in section_addr_lookup, f"- Function or object in {name} missing: {reloc.pelfy_reloc.symbol.name}" obj_addr = reloc.target_symbol_offset + section_addr_lookup[reloc.target_section_index] patch = sdb.get_patch(reloc, obj_addr, start, binw.Command.PATCH_OBJECT.value) + patch_list.append(patch) elif reloc.target_symbol_info == 'STT_FUNC': #print('--> FUNC', name, reloc.pelfy_reloc.symbol.name, reloc.pelfy_reloc.symbol.info, reloc.pelfy_reloc.symbol.section.name) - func_addr = aux_func_addr_lookup[reloc.target_symbol_name] + func_addr = func_addr_lookup[reloc.target_symbol_name] patch = sdb.get_patch(reloc, func_addr, start, binw.Command.PATCH_FUNC.value) #print(f' FUNC {func_addr=} {start=} {patch.address=}') + patch_list.append(patch) else: raise ValueError(f"Unsupported: {name=} {reloc.target_symbol_info=} {reloc.target_symbol_name=} {reloc.target_section_index}") - patch_list.append(patch) - - #assert False, aux_function_mem_layout - # write entry function code dw.write_com(binw.Command.COPY_CODE) - dw.write_int(aux_function_lengths) - dw.write_int(offset - aux_function_lengths) + dw.write_int(aux_func_len) + dw.write_int(offset - aux_func_len) dw.write_bytes(b''.join(data_list)) # write patch operations @@ -402,6 +414,6 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi dw.write_int(patch.value, signed=True) dw.write_com(binw.Command.ENTRY_POINT) - dw.write_int(aux_function_lengths) + dw.write_int(aux_func_len) return dw, variables diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index c00b618..3c74561 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -353,6 +353,14 @@ class stencil_database(): def get_symbol_size(self, name: str) -> int: """Returns the size of a specified symbol name.""" return self.elf.symbols[name].fields['st_size'] + + def get_symbol_offset(self, name: str) -> int: + """Returns the offset of a specified symbol in the section.""" + return self.elf.symbols[name].fields['st_value'] + + def get_symbol_section_index(self, name: str) -> int: + """Returns the section index for a specified symbol name.""" + return self.elf.symbols[name].fields['st_shndx'] def get_section_size(self, index: int) -> int: """Returns the size of a section specified by index.""" From 4a8f758a4597b7889e4c0a7cc024c25af7ab9b7a Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 24 Nov 2025 16:22:46 +0100 Subject: [PATCH 16/17] tests updated and extended for armv7 --- tests/test_comp_timing.py | 9 +- tests/test_ops_armv7.py | 169 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 5 deletions(-) create mode 100644 tests/test_ops_armv7.py diff --git a/tests/test_comp_timing.py b/tests/test_comp_timing.py index d13dd3b..9dc52b0 100644 --- a/tests/test_comp_timing.py +++ b/tests/test_comp_timing.py @@ -6,7 +6,7 @@ import copapy.backend as cpbe import copapy as cp import copapy._binwrite as binw from copapy._compiler import get_nets, get_section_layout, get_data_layout -from copapy._compiler import patch_entry, CPConstant, get_aux_function_mem_layout +from copapy._compiler import patch_entry, CPConstant, get_aux_func_layout def test_timing_compiler(): t1 = cp.vector([10, 11]*128) + cp.vector(cp.variable(v) for v in range(256)) @@ -88,7 +88,7 @@ def test_timing_compiler(): print('-- get_section_layout:') t0 = time.time() section_mem_layout, sections_length = get_section_layout(used_sections, sdb) - variable_mem_layout, variables_data_lengths = get_data_layout(variable_list, sdb, sections_length) + variable_mem_layout, _ = get_data_layout(variable_list, sdb, sections_length) t1 = time.time() print(f"time: {t1-t0:.6f}s") @@ -123,8 +123,7 @@ def test_timing_compiler(): # prep auxiliary_functions - aux_function_mem_layout, aux_function_lengths = get_aux_function_mem_layout(aux_function_names, sdb) - aux_func_addr_lookup = {name: offs for name, offs, _ in aux_function_mem_layout} + _, aux_func_addr_lookup, aux_function_lengths = get_aux_func_layout(aux_function_names, sdb) # Prepare program code and relocations object_addr_lookup = {net: offs for net, offs, _ in variable_mem_layout} @@ -179,7 +178,7 @@ def test_timing_compiler(): print('-- relocate aux functions:') t0 = time.time() # Patch aux functions - for name, start, _ in aux_function_mem_layout: + for name, start in aux_func_addr_lookup.items(): for reloc in sdb.get_relocations(name): #assert reloc.target_symbol_info != 'STT_FUNC', "Not tested yet!" diff --git a/tests/test_ops_armv7.py b/tests/test_ops_armv7.py new file mode 100644 index 0000000..0fd1b2b --- /dev/null +++ b/tests/test_ops_armv7.py @@ -0,0 +1,169 @@ +from copapy import NumLike, iif, variable +from copapy.backend import Write, compile_to_dag, add_read_command +import subprocess +from copapy import _binwrite +import copapy.backend as backend +import os +import warnings +import re +import struct +import pytest +import copapy as cp + +if os.name == 'nt': + # On Windows wsl and qemu-user is required: + # sudo apt install qemu-user + qemu_command = ['wsl', 'qemu-arm'] +else: + qemu_command = ['qemu-arm'] + +def parse_results(log_text: str) -> dict[int, bytes]: + regex = r"^READ_DATA offs=(\d*) size=(\d*) data=(.*)$" + matches = re.finditer(regex, log_text, re.MULTILINE) + var_dict: dict[int, bytes] = {} + + for match in matches: + value_str: list[str] = match.group(3).strip().split(' ') + #print('--', value_str) + value = bytes(int(v, base=16) for v in value_str) + if len(value) <= 8: + var_dict[int(match.group(1))] = value + + return var_dict + +def run_command(command: list[str]) -> str: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + assert result.returncode != 11, f"SIGSEGV (segmentation fault)\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + assert result.returncode == 0, f"\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + return result.stdout + + +def check_for_qemu() -> bool: + command = qemu_command + ['--version'] + try: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + except: + return False + return result.returncode == 0 + + +def function1(c1: NumLike) -> list[NumLike]: + return [c1 / 4, c1 / -4, c1 // 4, c1 // -4, (c1 * -1) // 4, + c1 * 4, c1 * -4, + c1 + 4, c1 - 4, + c1 > 2, c1 > 100, c1 < 4, c1 < 100] + + +def function2(c1: NumLike) -> list[NumLike]: + return [c1 * 4.44, c1 * -4.44] + + +def function3(c1: NumLike) -> list[NumLike]: + return [c1 / 4] + + +def function4(c1: NumLike) -> list[NumLike]: + return [c1 == 9, c1 == 4, c1 != 9, c1 != 4] + + +def function5(c1: NumLike) -> list[NumLike]: + return [c1 == True, c1 == False, c1 != True, c1 != False, c1 / 2, c1 + 2] + + +def function6(c1: NumLike) -> list[NumLike]: + return [c1 == True] + + +def iiftests(c1: NumLike) -> list[NumLike]: + return [iif(c1 > 5, 8, 9), + iif(c1 < 5, 8.5, 9.5), + iif(1 > 5, 3.3, 8.8) + c1, + iif(1 < 5, c1 * 3.3, 8.8), + iif(c1 < 5, c1 * 3.3, 8.8)] + + +@pytest.mark.runner +def test_compile(): + c_i = variable(9) + c_f = variable(1.111) + c_b = variable(True) + + ret_test = function1(c_i) + function1(c_f) + function2(c_i) + function2(c_f) + function3(c_i) + function4(c_i) + function5(c_b) + [variable(9) % 2] + iiftests(c_i) + iiftests(c_f) + [cp.asin(c_i/10)] + ret_ref = function1(9) + function1(1.111) + function2(9) + function2(1.111) + function3(9) + function4(9) + function5(True) + [9 % 2] + iiftests(9) + iiftests(1.111) + [cp.asin(9/10)] + + #ret_test = (c_i * 100 // 5, c_f * 10 // 5) + #ret_ref = (9 * 100 // 5, 1.111 * 10 // 5) + + out = [Write(r) for r in ret_test] + + sdb = backend.stencil_db_from_package('armv7') + dw, variables = compile_to_dag(out, sdb) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + # run program command + dw.write_com(_binwrite.Command.RUN_PROG) + #dw.write_com(_binwrite.Command.DUMP_CODE) + + for net in ret_test: + assert isinstance(net, backend.Net) + add_read_command(dw, variables, net) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + dw.write_com(_binwrite.Command.END_COM) + + print('* Data to runner:') + dw.print() + + dw.to_file('build/runner/test-armv7.copapy') + + if not check_for_qemu(): + warnings.warn("qemu-armv7 not found, armv7 test skipped!", UserWarning) + return + if not os.path.isfile('build/runner/coparun-armv7'): + warnings.warn("armv7 runner not found, armv7 test skipped!", UserWarning) + return + + command = qemu_command + ['build/runner/coparun-armv7', 'build/runner/test-armv7.copapy'] + ['build/runner/test-armv7.copapy.bin'] + #try: + result = run_command(command) + #except FileNotFoundError: + # warnings.warn(f"Test skipped, executable not found.", UserWarning) + # return + + print('* Output from runner:\n--') + print(result) + print('--') + + assert 'Return value: 1' in result + + result_data = parse_results(result) + + for test, ref in zip(ret_test, ret_ref): + assert isinstance(test, variable) + address = variables[test][0] + data = result_data[address] + if test.dtype == 'int': + val = int.from_bytes(data, sdb.byteorder, signed=True) + elif test.dtype == 'bool': + val = bool.from_bytes(data, sdb.byteorder) + elif test.dtype == 'float': + en = {'little': '<', 'big': '>'}[sdb.byteorder] + val = struct.unpack(en + 'f', data)[0] + assert isinstance(val, float) + else: + raise Exception(f"Unknown type: {test.dtype}") + print('+', val, ref, test.dtype, f" addr={address}") + for t in (int, float, bool): + assert isinstance(val, t) == isinstance(ref, t), f"Result type does not match for {val} and {ref}" + assert val == pytest.approx(ref, 1e-5), f"Result does not match: {val} and reference: {ref}" # pyright: ignore[reportUnknownMemberType] + + +if __name__ == "__main__": + #test_example() + test_compile() From afa8985a7f5d711db7936bda3620d198949f1512 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 24 Nov 2025 16:38:42 +0100 Subject: [PATCH 17/17] cd: wheel build script updated for armv7 --- .github/workflows/build_wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 7fe78b9..705b220 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -54,9 +54,9 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: - CIBW_ARCHS_LINUX: "x86_64 aarch64 i686" + CIBW_ARCHS_LINUX: "x86_64 aarch64 armv7l" # i686 CIBW_ARCHS_MACOS: "x86_64 universal2" - CIBW_ARCHS_WINDOWS: "AMD64 x86" + CIBW_ARCHS_WINDOWS: "AMD64" # x86 CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest -m \"not runner\" {package}/tests/"