From 72b12524aff98c94c1decda71c2072674f22119b Mon Sep 17 00:00:00 2001 From: Nicolas Date: Sun, 9 Nov 2025 15:45:37 +0100 Subject: [PATCH] x64 32 bit support added --- .github/workflows/ci.yml | 6 +- src/copapy/_binwrite.py | 1 + src/copapy/_compiler.py | 2 +- src/copapy/_stencils.py | 26 ++- src/coparun/runmem.c | 10 ++ src/coparun/runmem.h | 1 + ...ssue001_aarch64.py => test_ops_aarch64.py} | 0 tests/test_ops_x86.py | 152 ++++++++++++++++++ tools/build.bat | 19 ++- tools/inspect.bat | 2 + 10 files changed, 206 insertions(+), 13 deletions(-) rename tests/{test_issue001_aarch64.py => test_ops_aarch64.py} (100%) create mode 100644 tests/test_ops_x86.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 17727d5..18d0da3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: - name: Install Python dependencies run: python -m pip install -e .[dev] - - name: Install ARM binutils and qemu + - name: Install ARM binutils, qemu and gcc-multilib (32 bit support) if: strategy.job-index == 0 run: | echo "set man-db/auto-update false" | sudo debconf-communicate @@ -72,7 +72,7 @@ jobs: run: | mkdir -p bin gcc -O3 -DENABLE_BASIC_LOGGING -o bin/coparun src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c - aarch64-linux-gnu-gcc -O3 -static -DENABLE_BASIC_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o bin/coparun-aarch64 + aarch64-linux-gnu-gcc -O3 -static -DENABLE_BASIC_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o bin/coparun-aarch64 - name: Generate debug asm files if: strategy.job-index == 0 @@ -177,6 +177,8 @@ jobs: mkdir bin call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=amd64 cl /DENABLE_BASIC_LOGGING /O2 src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:bin\coparun.exe + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\Common7\Tools\VsDevCmd.bat" -arch=x86 + cl /DENABLE_BASIC_LOGGING /O2 src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:bin\coparun-x86.exe - name: Run tests with pytest run: pytest diff --git a/src/copapy/_binwrite.py b/src/copapy/_binwrite.py index 423436a..27147bc 100644 --- a/src/copapy/_binwrite.py +++ b/src/copapy/_binwrite.py @@ -9,6 +9,7 @@ Command = Enum('Command', [('ALLOCATE_DATA', 1), ('COPY_DATA', 2), ('PATCH_FUNC', 0x1000), ('PATCH_OBJECT', 0x2000), ('PATCH_OBJECT_HI21', 0x2001), ('PATCH_OBJECT_ABS', 0x2002), + ('PATCH_OBJECT_REL', 0x2003), ('ENTRY_POINT', 7), ('RUN_PROG', 64), ('READ_DATA', 65), ('END_COM', 256), ('FREE_MEMORY', 257), ('DUMP_CODE', 258)]) diff --git a/src/copapy/_compiler.py b/src/copapy/_compiler.py index 9150aed..a16333d 100644 --- a/src/copapy/_compiler.py +++ b/src/copapy/_compiler.py @@ -319,7 +319,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi obj_addr = object_addr_lookup[associated_net] patch = sdb.get_patch(reloc, obj_addr, offset, binw.Command.PATCH_OBJECT.value) elif reloc.target_symbol_name.startswith('result_'): - raise Exception(f"Stencil {node.name} seams to branch to multiple result_* calls.") + raise Exception(f"Stencil {node.name} seems to branch to multiple result_* calls.") else: # Patch constants addresses on heap obj_addr = reloc.target_symbol_offset + section_addr_lookup[reloc.target_section_index] diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index fd693cb..754f72f 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -173,13 +173,27 @@ class stencil_database(): patch_offset = pr.fields['r_offset'] - relocation.function_offset - relocation.start + function_offset #print(f"xx {pr.fields['r_offset'] - relocation.function_offset} {relocation.target_symbol_name=} {pr.fields['r_offset']=} {relocation.function_offset=} {relocation.start=} {function_offset=}") scale = 1 + mask = 0xFFFFFFFF # 32 bit - if pr.type.endswith('_PLT32') or pr.type.endswith('_PC32'): + if pr.type.endswith('64_PC32') or pr.type.endswith('64_PLT32'): # S + A - P - mask = 0xFFFFFFFF # 32 bit patch_value = symbol_address + pr.fields['r_addend'] - patch_offset + #print(f" *> {pr.type} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {pr.bits=}, {function_offset=} {patch_offset=}") + + elif pr.type == 'R_386_PC32': + # S + A - P + patch_value = symbol_address + pr.fields['r_addend'] - patch_offset - 4 + #print(f" *> {pr.type} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {pr.bits=}, {function_offset=} {patch_offset=}") + + elif pr.type == 'R_386_32': + # R_386_32 + # (S + A) + patch_value = symbol_address + pr.fields['r_addend'] + symbol_type = symbol_type + 0x03 # Relative to data section + #print(f" *> {pr.type} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {pr.bits=}, {function_offset=} {patch_offset=}") elif pr.type.endswith('_CALL26'): + # R_AARCH64_CALL26 # ((S + A) - P) >> 2 assert pr.file.byteorder == 'little', "Big endian not supported for ARM64" mask = 0x3ffffff # 26 bit (1<<26)-1 @@ -187,6 +201,8 @@ class stencil_database(): scale = 4 elif pr.type.endswith('_ADR_PREL_PG_HI21'): + # R_AARCH64_LDST32_ABS_LO12_NC + # R_AARCH64_ADR_PREL_PG_HI21 assert pr.file.byteorder == 'little', "Big endian not supported for ARM64" mask = 0 # Handled by runner patch_value = symbol_address + pr.fields['r_addend'] @@ -195,17 +211,19 @@ class stencil_database(): #print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") elif pr.type.endswith('_LDST32_ABS_LO12_NC'): + # R_AARCH64_LDST32_ABS_LO12_NC # (S + A) & 0xFFF mask = 0b11_1111_1111_1100_0000_0000 - patch_value = (symbol_address + pr.fields['r_addend']) + patch_value = symbol_address + pr.fields['r_addend'] symbol_type = symbol_type + 0x02 # Absolut value scale = 4 #print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") elif pr.type.endswith('_LDST64_ABS_LO12_NC'): + # R_AARCH64_LDST64_ABS_LO12_NC # (S + A) & 0xFFF mask = 0b11_1111_1111_1100_0000_0000 - patch_value = (symbol_address + pr.fields['r_addend']) + patch_value = symbol_address + pr.fields['r_addend'] symbol_type = symbol_type + 0x02 # Absolut value scale = 8 #print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") diff --git a/src/coparun/runmem.c b/src/coparun/runmem.c index ed9bb4c..4ddd03e 100644 --- a/src/coparun/runmem.c +++ b/src/coparun/runmem.c @@ -139,6 +139,16 @@ int parse_commands(uint8_t *bytes) { patch(executable_memory + offs, patch_mask, value / patch_scale); break; + case PATCH_OBJECT_REL: + offs = *(uint32_t*)bytes; bytes += 4; + patch_mask = *(uint32_t*)bytes; bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; + value = *(int32_t*)bytes; bytes += 4; + LOG("PATCH_OBJECT_REL patch_offs=%i patch_addr=%p scale=%i value=%i\n", + offs, (void*)(data_memory + value), patch_scale, value); + *(void **)(executable_memory + offs) = data_memory + value; // / patch_scale; + break; + case PATCH_OBJECT_HI21: offs = *(uint32_t*)bytes; bytes += 4; patch_mask = *(uint32_t*)bytes; bytes += 4; diff --git a/src/coparun/runmem.h b/src/coparun/runmem.h index c59a5d2..983d0f0 100644 --- a/src/coparun/runmem.h +++ b/src/coparun/runmem.h @@ -23,6 +23,7 @@ #define PATCH_OBJECT 0x2000 #define PATCH_OBJECT_HI21 0x2001 #define PATCH_OBJECT_ABS 0x2002 +#define PATCH_OBJECT_REL 0x2003 #define ENTRY_POINT 7 #define RUN_PROG 64 #define READ_DATA 65 diff --git a/tests/test_issue001_aarch64.py b/tests/test_ops_aarch64.py similarity index 100% rename from tests/test_issue001_aarch64.py rename to tests/test_ops_aarch64.py diff --git a/tests/test_ops_x86.py b/tests/test_ops_x86.py new file mode 100644 index 0000000..a108ca5 --- /dev/null +++ b/tests/test_ops_x86.py @@ -0,0 +1,152 @@ +from copapy import NumLike, iif, variable, sin +from copapy.backend import Write, compile_to_dag, add_read_command +import subprocess +from copapy import _binwrite +import copapy.backend as backend +import warnings +import re +import struct +import platform +import copapy as cp + + +def parse_results(log_text: str) -> dict[int, bytes]: + regex = r"^READ_DATA offs=(\d*) size=(\d*) data=(.*)$" + matches = re.finditer(regex, log_text, re.MULTILINE) + var_dict: dict[int, bytes] = {} + + for match in matches: + value_str: list[str] = match.group(3).strip().split(' ') + #print('--', value_str) + value = bytes(int(v, base=16) for v in value_str) + if len(value) <= 8: + var_dict[int(match.group(1))] = value + + return var_dict + + +def run_command(command: list[str]) -> str: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + assert result.returncode != 11, f"SIGSEGV (segmentation fault)\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + assert result.returncode == 0, f"\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + return result.stdout + + +def function1(c1: NumLike) -> list[NumLike]: + return [c1 / 4, c1 / -4, c1 // 4, c1 // -4, (c1 * -1) // 4, + c1 * 4, c1 * -4, + c1 + 4, c1 - 4, + c1 > 2, c1 > 100, c1 < 4, c1 < 100] + + +def function2(c1: NumLike) -> list[NumLike]: + return [c1 * 4.44, c1 * -4.44] + + +def function3(c1: NumLike) -> list[NumLike]: + return [c1 / 4] + + +def function4(c1: NumLike) -> list[NumLike]: + return [c1 == 9, c1 == 4, c1 != 9, c1 != 4] + + +def function5(c1: NumLike) -> list[NumLike]: + return [c1 == True, c1 == False, c1 != True, c1 != False, c1 / 2, c1 + 2] + + +def function6(c1: NumLike) -> list[NumLike]: + return [c1 == True] + + +def iiftests(c1: NumLike) -> list[NumLike]: + return [iif(c1 > 5, 8, 9), + iif(c1 < 5, 8.5, 9.5), + iif(1 > 5, 3.3, 8.8) + c1, + iif(1 < 5, c1 * 3.3, 8.8), + iif(c1 < 5, c1 * 3.3, 8.8)] + + +def test_compile(): + t1 = cp.vector([10, 11, 12]) + cp.vector(cp.variable(v) for v in range(3)) + t2 = t1.sum() + + t3 = cp.vector(cp.variable(1 / (v + 1)) for v in range(3)) + t4 = ((t3 * t1) * 2).sum() + t5 = ((t3 * t1) * 2).magnitude() + + + c_i = variable(9) + c_f = variable(1.111) + c_b = variable(True) + + ret_test = function1(c_i) + function1(c_f) + function2(c_i) + function2(c_f) + function3(c_i) + function4(c_i) + function5(c_b) + [c_i % 2, sin(c_f)] + iiftests(c_i) + iiftests(c_f) + ret_ref = function1(9) + function1(1.111) + function2(9) + function2(1.111) + function3(9) + function4(9) + function5(True) + [9 % 2, sin(1.111)] + iiftests(9) + iiftests(1.111) + + out = [Write(r) for r in ret_test + [t2, t4, t5]] + + #ret_test += [c_i, v2] + #ret_ref += [9, 4.44, -4.44] + + sdb = backend.stencil_db_from_package('x86') + dw, variables = compile_to_dag(out, sdb) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + # run program command + dw.write_com(_binwrite.Command.RUN_PROG) + #dw.write_com(_binwrite.Command.DUMP_CODE) + + for net in ret_test: + assert isinstance(net, backend.Net) + add_read_command(dw, variables, net) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + dw.write_com(_binwrite.Command.END_COM) + + print('* Data to runner:') + dw.print() + + dw.to_file('bin/test-x86.copapy') + + if platform.machine() != 'AMD64' and platform.machine() != 'x86': + warnings.warn(f"Test skipped, {platform.machine()} not supported for this test.", UserWarning) + else: + command = ['bin/coparun-x86', 'bin/test-x86.copapy', 'bin/test-x86.copapy.bin'] + result = run_command(command) + print('* Output from runner:\n--') + print(result) + print('--') + + assert 'Return value: 1' in result + + result_data = parse_results(result) + + for test, ref in zip(ret_test, ret_ref): + assert isinstance(test, variable) + address = variables[test][0] + data = result_data[address] + if test.dtype == 'int': + val = int.from_bytes(data, sdb.byteorder, signed=True) + elif test.dtype == 'bool': + val = bool.from_bytes(data, sdb.byteorder) + elif test.dtype == 'float': + en = {'little': '<', 'big': '>'}[sdb.byteorder] + val = struct.unpack(en + 'f', data)[0] + assert isinstance(val, float) + else: + raise Exception(f"Unknown type: {test.dtype}") + print('+', val, ref, test.dtype, f" addr={address}") + #for t in (int, float, bool): + # assert isinstance(val, t) == isinstance(ref, t), f"Result type does not match for {val} and {ref}" + #assert val == pytest.approx(ref, 1e-5), f"Result does not match: {val} and reference: {ref}" # pyright: ignore[reportUnknownMemberType] + + +if __name__ == "__main__": + #test_example() + test_compile() diff --git a/tools/build.bat b/tools/build.bat index 2fce37c..dd18d79 100644 --- a/tools/build.bat +++ b/tools/build.bat @@ -1,18 +1,25 @@ -call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 - echo ------------------------------ -echo - Build runner for Windows... +call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 +echo - Build runner for Windows 64 bit... cl /Zi /Od /DENABLE_BASIC_LOGGING src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:bin\coparun.exe REM Optimized: REM cl /O2 src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:bin\coparun.exe -echo ------------------------------ -echo - Build stencils for Windows... +echo - Build stencils for Windows 64 bit... python stencils/generate_stencils.py --abi ms bin/stencils.c -REM copy stencils\stencil_helper.h bin\ wsl gcc -fno-pic -c bin/stencils.c -O3 -o src/copapy/obj/stencils_AMD64_O3.o +echo ------------------------------ +call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x86 + +echo - Build runner for Windows 32 bit... +cl /Zi /Od /DENABLE_BASIC_LOGGING src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:bin\coparun-x86.exe + +echo - Build stencils for Windows 32 bit... +wsl gcc -m32 -fno-pic -c bin/stencils.c -O3 -o src/copapy/obj/stencils_x86_O3.o + + echo ------------------------------ echo - Build stencils for aarch64... python stencils/generate_stencils.py bin/stencils.c diff --git a/tools/inspect.bat b/tools/inspect.bat index 955b98b..f1441a5 100644 --- a/tools/inspect.bat +++ b/tools/inspect.bat @@ -9,3 +9,5 @@ wsl aarch64-linux-gnu-objdump -D -b binary -m aarch64 --adjust-vma=0x5000 bin/te REM wsl objdump -D -b binary -m i386:x86-64 --adjust-vma=0x1000 bin/test.copapy.bin REM wsl aarch64-linux-gnu-objdump -d -x src/copapy/obj/stencils_aarch64_O3.o + +REM wsl objdump -D -b binary -m i8086 --adjust-vma=0x5000 bin/test-x86.copapy.bin \ No newline at end of file