From ed6bb1bc52d90278dbad5865497e07b2102df5d6 Mon Sep 17 00:00:00 2001 From: Nicolas Date: Mon, 3 Nov 2025 02:14:14 +0100 Subject: [PATCH] aarch64 relocation support for objects added and is working --- src/copapy/_binwrite.py | 6 ++-- src/copapy/_compiler.py | 3 +- src/copapy/_stencils.py | 54 +++++++++++++++++++++++++----------- src/copapy/_target.py | 1 - src/coparun/coparun.c | 15 ++++++++-- src/coparun/runmem.c | 61 ++++++++++++++++++++++++++++++++++------- src/coparun/runmem.h | 25 +++++++++-------- 7 files changed, 121 insertions(+), 44 deletions(-) diff --git a/src/copapy/_binwrite.py b/src/copapy/_binwrite.py index f4d7272..036ff37 100644 --- a/src/copapy/_binwrite.py +++ b/src/copapy/_binwrite.py @@ -6,9 +6,11 @@ ByteOrder = Literal['little', 'big'] Command = Enum('Command', [('ALLOCATE_DATA', 1), ('COPY_DATA', 2), ('ALLOCATE_CODE', 3), ('COPY_CODE', 4), - ('PATCH_FUNC', 5), ('PATCH_OBJECT', 6), ('ENTRY_POINT', 7), + ('PATCH_FUNC', 0x1000), ('PATCH_OBJECT', 0x2000), + ('PATCH_OBJECT_HI21', 0x2001), + ('ENTRY_POINT', 7), ('RUN_PROG', 64), ('READ_DATA', 65), - ('END_COM', 256), ('FREE_MEMORY', 257)]) + ('END_COM', 256), ('FREE_MEMORY', 257), ('DUMP_CODE', 258)]) COMMAND_SIZE = 4 diff --git a/src/copapy/_compiler.py b/src/copapy/_compiler.py index a258933..578dabc 100644 --- a/src/copapy/_compiler.py +++ b/src/copapy/_compiler.py @@ -373,9 +373,10 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi # write patch operations for patch in patch_list: - dw.write_int(patch.patch_type) + dw.write_com(binw.Command(patch.patch_type)) dw.write_int(patch.address) dw.write_int(patch.mask) + dw.write_int(patch.scale) dw.write_int(patch.value, signed=True) dw.write_com(binw.Command.ENTRY_POINT) diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index 53719dd..5d1f015 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -33,6 +33,7 @@ class patch_entry: mask: int address: int value: int + scale: int patch_type: int @@ -46,13 +47,13 @@ def get_return_function_type(symbol: elf_symbol) -> str: def strip_function(func: elf_symbol) -> bytes: """Return stencil code by striped stancil function""" - assert func.relocations and func.relocations[-1].symbol.info == 'STT_NOTYPE', f"{func.name} is not a stancil function" + assert func.relocations and func.relocations[-1].symbol.info == 'STT_NOTYPE', f"{func.name} is not a stencil function" start_index, end_index = get_stencil_position(func) return func.data[start_index:end_index] def get_stencil_position(func: elf_symbol) -> tuple[int, int]: - start_index = 0 # For a "naked" function + start_index = 0 # TODO: Only for "naked" functions end_index = get_last_call_in_function(func) return start_index, end_index @@ -61,16 +62,21 @@ def get_last_call_in_function(func: elf_symbol) -> int: # Find last relocation in function assert func.relocations, f'No call function in stencil function {func.name}.' reloc = func.relocations[-1] - - instruction_lenghs = 4 if reloc.bits < 32 else 5 - return reloc.fields['r_offset'] - func.fields['st_value'] - reloc.fields['r_addend'] - instruction_lenghs + # Assume the call instruction is 4 bytes long for relocations with less than 32 bit and 5 bytes otherwise + instruction_lengths = 4 if reloc.bits < 32 else 5 + address_field_length = 4 + print(f"-> {[r.fields['r_offset'] - func.fields['st_value'] for r in func.relocations]}") + return reloc.fields['r_offset'] - func.fields['st_value'] + address_field_length - instruction_lengths def get_op_after_last_call_in_function(func: elf_symbol) -> int: # Find last relocation in function assert func.relocations, f'No call function in stencil function {func.name}.' reloc = func.relocations[-1] - return reloc.fields['r_offset'] - func.fields['st_value'] - reloc.fields['r_addend'] + if reloc.bits < 32: + return reloc.fields['r_offset'] - func.fields['st_value'] - reloc.fields['r_addend'] + 4 + else: + return reloc.fields['r_offset'] - func.fields['st_value'] - reloc.fields['r_addend'] def symbol_is_stencil(sym: elf_symbol) -> bool: @@ -138,12 +144,12 @@ class stencil_database(): print('->', symbol_name) for reloc in symbol.relocations: - print(' ', symbol_name, reloc.symbol.info, reloc.symbol.name, reloc.type) - + # address to fist byte to patch relative to the start of the symbol patch_offset = reloc.fields['r_offset'] - symbol.fields['st_value'] - start_index if patch_offset < end_index - start_index: # Exclude the call to the result_* function + print(' |', symbol_name, reloc.symbol.info, reloc.symbol.name, reloc.type) yield relocation_entry(reloc.symbol.name, reloc.symbol.info, reloc.symbol.fields['st_value'], @@ -169,6 +175,8 @@ class stencil_database(): # calculate absolut address to the first byte to patch # relative to the start of the (stripped stencil) function: patch_offset = pr.fields['r_offset'] - relocation.function_offset - relocation.start + function_offset + #print(f"xx {pr.fields['r_offset'] - relocation.function_offset} {relocation.target_symbol_name=} {pr.fields['r_offset']=} {relocation.function_offset=} {relocation.start=} {function_offset=}") + scale = 1 if pr.type.endswith('_PLT32') or pr.type.endswith('_PC32'): # S + A - P @@ -176,19 +184,33 @@ class stencil_database(): patch_value = symbol_address + pr.fields['r_addend'] - patch_offset print(f"** {patch_offset=} {relocation.target_symbol_name=} {pr.fields['r_offset']=} {relocation.function_offset=} {relocation.start=} {function_offset=}") - print(f" {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") + print(f" * {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") - #elif reloc.type.endswith('_JUMP26') or reloc.type.endswith('_CALL26'): - # # S + A - P - # assert reloc.file.byteorder == 'little', "Big endian not supported for ARM64" - # mask = 0x3ffffff # 26 bit - # imm = offset >> 2 - # assert imm < mask, "Relocation immediate value too large" + elif pr.type.endswith('_CALL26'): + # ((S + A) - P) >> 2 + assert pr.file.byteorder == 'little', "Big endian not supported for ARM64" + mask = 0x3ffffff # 26 bit (1<<26)-1 + patch_value = symbol_address + pr.fields['r_addend'] - patch_offset + scale = 4 + + elif pr.type.endswith('_ADR_PREL_PG_HI21'): + assert pr.file.byteorder == 'little', "Big endian not supported for ARM64" + mask = 0 # Handled by runner + patch_value = symbol_address + pr.fields['r_addend'] + scale = 4096 + symbol_type = symbol_type + 0x01 + print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") + + elif pr.type.endswith('_LDST32_ABS_LO12_NC'): + # (S + A) & 0xFFF + mask = 0b11111111111100000000 + patch_value = symbol_address + pr.fields['r_addend'] + print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") else: raise NotImplementedError(f"Relocation type {pr.type} not implemented") - return patch_entry(mask, patch_offset, patch_value, symbol_type) + return patch_entry(mask, patch_offset, patch_value, scale, symbol_type) def get_stencil_code(self, name: str) -> bytes: diff --git a/src/copapy/_target.py b/src/copapy/_target.py index 18aee37..a30ffe4 100644 --- a/src/copapy/_target.py +++ b/src/copapy/_target.py @@ -83,7 +83,6 @@ class Target(): assert isinstance(net, Net), "Variable must be a copapy variable object" assert net in self._variables, f"Variable {net} not found. It might not have been compiled for the target." addr, lengths, var_type = self._variables[net] - print('...', self._variables[net], net.dtype) assert lengths > 0 data = read_data_mem(addr, lengths) assert data is not None and len(data) == lengths, f"Failed to read variable {net}" diff --git a/src/coparun/coparun.c b/src/coparun/coparun.c index 15779b0..919eaa1 100644 --- a/src/coparun/coparun.c +++ b/src/coparun/coparun.c @@ -4,8 +4,8 @@ #include "mem_man.h" int main(int argc, char *argv[]) { - if (argc != 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); return EXIT_FAILURE; } @@ -47,6 +47,17 @@ int main(int argc, char *argv[]) { int ret = parse_commands(file_buff); + if (ret == 2) { + /* Dump code for debugging */ + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + FILE *f = fopen(argv[2], "wb"); + fwrite(executable_memory, 1, (size_t)executable_memory_len, f); + fclose(f); + } + free_memory(); return ret < 0; diff --git a/src/coparun/runmem.c b/src/coparun/runmem.c index 395fb94..90400c7 100644 --- a/src/coparun/runmem.c +++ b/src/coparun/runmem.c @@ -24,14 +24,31 @@ uint32_t executable_memory_len = 0; entry_point_t entr_point = NULL; int data_offs = 0; -int patch(uint8_t *patch_addr, uint32_t patch_mask, int32_t value) { +void patch(uint8_t *patch_addr, uint32_t patch_mask, int32_t value) { uint32_t *val_ptr = (uint32_t*)patch_addr; uint32_t original = *val_ptr; - uint32_t new_value = (original & ~patch_mask) | ((uint32_t)value & patch_mask); + int32_t shift_factor = patch_mask & -patch_mask; + + uint32_t new_value = (original & ~patch_mask) | ((uint32_t)(value * shift_factor) & patch_mask); *val_ptr = new_value; - return 1; +} + +void patch_hi21(uint8_t *patch_addr, int32_t page_offset) { + uint32_t instr = *(uint32_t *)patch_addr; + + // Split page_offset into immhi (upper 19 bits) and immlo (lower 2 bits) + uint32_t immlo = page_offset & 0x3; // bits[1:0] + uint32_t immhi = (page_offset >> 2) & 0x7FFFF; // bits[20:2] + + // Clear previous imm fields: immhi (bits[23:5]) and immlo (bits[30:29]) + instr &= ~((0x7FFFFu << 5) | (0x3 << 29)); + + // Set new immhi and immlo + instr |= (immhi << 5) | (immlo << 29); + + *(uint32_t *)patch_addr = instr; } void free_memory() { @@ -42,6 +59,10 @@ void free_memory() { } int update_data_offs() { + if (data_memory && executable_memory && (data_memory - executable_memory > 0x7FFFFFFF || executable_memory - data_memory > 0x7FFFFFFF)) { + perror("Error: code and data memory to far apart"); + return 0; + } if (data_memory && executable_memory && (data_memory - executable_memory > 0x7FFFFFFF || executable_memory - data_memory > 0x7FFFFFFF)) { perror("Error: code and data memory to far apart"); return 0; @@ -50,10 +71,15 @@ int update_data_offs() { return 1; } +int floor_div(int a, int b) { + return a / b - ((a % b != 0) && ((a < 0) != (b < 0))); +} + int parse_commands(uint8_t *bytes) { int32_t value; uint32_t command; uint32_t patch_mask; + int32_t patch_scale; uint32_t offs; uint32_t size; int end_flag = 0; @@ -97,22 +123,31 @@ int parse_commands(uint8_t *bytes) { case PATCH_FUNC: offs = *(uint32_t*)bytes; bytes += 4; patch_mask = *(uint32_t*)bytes; bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; value = *(int32_t*)bytes; bytes += 4; - LOG("PATCH_FUNC patch_offs=%i patch_mask=%#08x value=%i\n", - offs, patch_mask, value); - patch(executable_memory + offs, patch_mask, value); + LOG("PATCH_FUNC patch_offs=%i patch_mask=%#08x scale=%i value=%i\n", + offs, patch_mask, patch_scale, value); + patch(executable_memory + offs, patch_mask, value / patch_scale); break; case PATCH_OBJECT: offs = *(uint32_t*)bytes; bytes += 4; patch_mask = *(uint32_t*)bytes; bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; value = *(int32_t*)bytes; bytes += 4; - LOG("PATCH_OBJECT patch_offs=%i patch_mask=%#08x value=%i\n", - offs, patch_mask, value); - patch(executable_memory + offs, patch_mask, value + data_offs); + LOG("PATCH_OBJECT patch_offs=%i patch_mask=%#08x scale=%i value=%i\n", + offs, patch_mask, patch_scale, value); + patch(executable_memory + offs, patch_mask, value / patch_scale + data_offs / patch_scale); break; - case PATCH_MATH_POW: + case PATCH_OBJECT_HI21: + offs = *(uint32_t*)bytes; bytes += 4; + patch_mask = *(uint32_t*)bytes; bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; + value = *(int32_t*)bytes; bytes += 4; + LOG("PATCH_OBJECT_HI21 patch_offs=%i patch_mask=%#08x scale=%i value=%i res_value=%i\n", + offs, patch_mask, patch_scale, value, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); + patch_hi21(executable_memory + offs, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); break; case ENTRY_POINT: @@ -139,9 +174,15 @@ int parse_commands(uint8_t *bytes) { break; case FREE_MEMORY: + LOG("FREE_MENORY\n"); free_memory(); break; + case DUMP_CODE: + LOG("DUMP_CODE\n"); + end_flag = 2; + break; + case END_COM: LOG("END_COM\n"); end_flag = 1; diff --git a/src/coparun/runmem.h b/src/coparun/runmem.h index 87ec751..f384e78 100644 --- a/src/coparun/runmem.h +++ b/src/coparun/runmem.h @@ -4,18 +4,19 @@ #include /* Command opcodes used by the parser */ -#define ALLOCATE_DATA 1 -#define COPY_DATA 2 -#define ALLOCATE_CODE 3 -#define COPY_CODE 4 -#define PATCH_FUNC 5 -#define PATCH_OBJECT 6 -#define ENTRY_POINT 7 -#define RUN_PROG 64 -#define READ_DATA 65 -#define END_COM 256 -#define FREE_MEMORY 257 -#define PATCH_MATH_POW 512 +#define ALLOCATE_DATA 1 +#define COPY_DATA 2 +#define ALLOCATE_CODE 3 +#define COPY_CODE 4 +#define PATCH_FUNC 0x1000 +#define PATCH_OBJECT 0x2000 +#define PATCH_OBJECT_HI21 0x2001 +#define ENTRY_POINT 7 +#define RUN_PROG 64 +#define READ_DATA 65 +#define END_COM 256 +#define FREE_MEMORY 257 +#define DUMP_CODE 258 /* Memory blobs accessible by other translation units */ extern uint8_t *data_memory;