diff --git a/.github/workflows/build_docker_image.yml b/.github/workflows/build_docker_image.yml index 6f9ab27..b67c98e 100644 --- a/.github/workflows/build_docker_image.yml +++ b/.github/workflows/build_docker_image.yml @@ -61,5 +61,34 @@ jobs: run: echo "IMAGE_NAME=ghcr.io/${GITHUB_REPOSITORY_OWNER,,}/arm64_test:1" >> $GITHUB_ENV - name: Build & Push Docker image - run: docker buildx build --platform linux/arm64 --push -t $IMAGE_NAME tools/arm64_test/ + run: docker buildx build --platform linux/arm64 --push -t $IMAGE_NAME tools/qemu_test/ + docker-build-armv7: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: arm + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Set image name + run: echo "IMAGE_NAME=ghcr.io/${GITHUB_REPOSITORY_OWNER,,}/armv7_test:1" >> $GITHUB_ENV + + - name: Build & Push Docker image + run: docker buildx build --platform linux/arm/v7 --push -t $IMAGE_NAME tools/qemu_test/ diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 6dbc371..705b220 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -54,9 +54,9 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v3.2.0 env: - CIBW_ARCHS_LINUX: "x86_64 aarch64" #i686 + CIBW_ARCHS_LINUX: "x86_64 aarch64 armv7l" # i686 CIBW_ARCHS_MACOS: "x86_64 universal2" - CIBW_ARCHS_WINDOWS: "AMD64" #x86 + CIBW_ARCHS_WINDOWS: "AMD64" # x86 CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest -m \"not runner\" {package}/tests/" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6274454..41a47f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,9 +2,9 @@ name: CI Pipeline on: push: - branches: [main] + branches: [main, dev] pull_request: - branches: [main] + branches: [main, dev] jobs: build_stencils: @@ -149,6 +149,34 @@ jobs: name: runner-linux-arm64 path: build/runner/* + build-armv7: + needs: [build_stencils] + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: stencil-object-files + path: src/copapy/obj + - name: Set up QEMU for ARMv7 + uses: docker/setup-qemu-action@v3 + with: + platforms: linux/arm/v7 + - name: Use ARMv7 container + run: | + docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ + bash -lc "pip install . && \ + mkdir -p build/runner && \ + gcc -O3 -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \ + src/coparun/coparun.c src/coparun/mem_man.c && \ + pytest" + + - uses: actions/upload-artifact@v4 + with: + name: runner-linux-armv7 + path: build/runner/* + build-windows: needs: [build_stencils] runs-on: windows-latest @@ -204,7 +232,7 @@ jobs: path: build/runner/* release-stencils: - needs: [build_stencils, build-ubuntu, build-windows, build-arm64] + needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv7] runs-on: ubuntu-latest if: github.event_name == 'push' permissions: @@ -236,7 +264,8 @@ jobs: cp tmp/musl-object-files/* release/ cp tmp/cross-runner/coparun-* release/ cp tmp/runner-linux/coparun release/ - cp tmp/runner-linux-arm64/coparun release/ + cp tmp/runner-linux-arm64/coparun release/coparun-aarch64 + cp tmp/runner-linux-armv7/coparun release/coparun-armv7 cp tmp/runner-win/coparun*.exe release/ TAG="${{ steps.version.outputs.version }}" diff --git a/pyproject.toml b/pyproject.toml index e2332e6..6108eda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "pelfy>=1.0.5" + "pelfy>=1.0.7" ] [project.urls] diff --git a/src/copapy/_binwrite.py b/src/copapy/_binwrite.py index 27147bc..ff77918 100644 --- a/src/copapy/_binwrite.py +++ b/src/copapy/_binwrite.py @@ -10,6 +10,7 @@ Command = Enum('Command', [('ALLOCATE_DATA', 1), ('COPY_DATA', 2), ('PATCH_OBJECT_HI21', 0x2001), ('PATCH_OBJECT_ABS', 0x2002), ('PATCH_OBJECT_REL', 0x2003), + ('PATCH_OBJECT_ARM32_ABS', 0x2004), ('ENTRY_POINT', 7), ('RUN_PROG', 64), ('READ_DATA', 65), ('END_COM', 256), ('FREE_MEMORY', 257), ('DUMP_CODE', 258)]) diff --git a/src/copapy/_compiler.py b/src/copapy/_compiler.py index 64585e1..4eade86 100644 --- a/src/copapy/_compiler.py +++ b/src/copapy/_compiler.py @@ -221,7 +221,7 @@ def get_section_layout(section_indexes: Iterable[int], sdb: stencil_database, of return section_list, offset -def get_aux_function_mem_layout(function_names: Iterable[str], sdb: stencil_database, offset: int = 0) -> tuple[list[tuple[str, int, int]], int]: +def get_aux_func_layout(function_names: Iterable[str], sdb: stencil_database, offset: int = 0) -> tuple[list[tuple[int, int, int]], dict[str, int], int]: """Get memory layout for the provided auxiliary functions Arguments: @@ -230,17 +230,28 @@ def get_aux_function_mem_layout(function_names: Iterable[str], sdb: stencil_data offset: Starting offset for layout Returns: - Tuple of list of (function_name, start_offset, length) and total length + Tuple of list of (section_id, start_offset, length), function address lookup dictionary, and total length """ - function_list: list[tuple[str, int, int]] = [] + function_lookup: dict[str, int] = {} + section_list: list[tuple[int, int, int]] = [] + section_cache: dict[int, int] = {} for name in function_names: - lengths = sdb.get_symbol_size(name) - offset = (offset + 15) // 16 * 16 - function_list.append((name, offset, lengths)) - offset += lengths + index = sdb.get_symbol_section_index(name) - return function_list, offset + if index in section_cache: + section_offset = section_cache[index] + function_lookup[name] = section_offset + sdb.get_symbol_offset(name) + else: + lengths = sdb.get_section_size(index) + alignment = sdb.get_section_alignment(index) + offset = (offset + alignment - 1) // alignment * alignment + section_list.append((index, offset, lengths)) + section_cache[index] = offset + function_lookup[name] = offset + sdb.get_symbol_offset(name) + offset += lengths + + return section_list, function_lookup, offset def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[binw.data_writer, dict[Net, tuple[int, int, str]]]: @@ -272,10 +283,10 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi stencil_names = {node.name for _, node in extended_output_ops} aux_function_names = sdb.get_sub_functions(stencil_names) - used_sections = sdb.const_sections_from_functions(aux_function_names | stencil_names) + used_const_sections = sdb.const_sections_from_functions(aux_function_names | stencil_names) # Write data - section_mem_layout, sections_length = get_section_layout(used_sections, sdb) + section_mem_layout, sections_length = get_section_layout(used_const_sections, sdb) variable_mem_layout, variables_data_lengths = get_data_layout(variable_list, sdb, sections_length) dw.write_com(binw.Command.ALLOCATE_DATA) dw.write_int(variables_data_lengths) @@ -298,8 +309,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi #print(f'+ {net.dtype} {net.source.value}') # prep auxiliary_functions - aux_function_mem_layout, aux_function_lengths = get_aux_function_mem_layout(aux_function_names, sdb) - aux_func_addr_lookup = {name: offs for name, offs, _ in aux_function_mem_layout} + code_section_layout, func_addr_lookup, aux_func_len = get_aux_func_layout(aux_function_names, sdb) # Prepare program code and relocations object_addr_lookup = {net: offs for net, offs, _ in variable_mem_layout} @@ -308,7 +318,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi # assemble stencils to main program and patch stencils data = sdb.get_function_code('entry_function_shell', 'start') data_list.append(data) - offset = aux_function_lengths + len(data) + offset = aux_func_len + len(data) for associated_net, node in extended_output_ops: assert node.name in sdb.stencil_definitions, f"- Warning: {node.name} stencil not found" @@ -336,7 +346,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi #print('* constants stancils', patch.type, patch.patch_address, binw.Command.PATCH_OBJECT, node.name) elif reloc.target_symbol_info == 'STT_FUNC': - func_addr = aux_func_addr_lookup[reloc.target_symbol_name] + func_addr = func_addr_lookup[reloc.target_symbol_name] patch = sdb.get_patch(reloc, func_addr, offset, binw.Command.PATCH_FUNC.value) #print(patch.type, patch.addr, binw.Command.PATCH_FUNC, node.name, '->', patch.target_symbol_name) else: @@ -355,42 +365,44 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi dw.write_int(offset) # write aux functions code - for name, start, lengths in aux_function_mem_layout: + for i, start, lengths in code_section_layout: dw.write_com(binw.Command.COPY_CODE) dw.write_int(start) dw.write_int(lengths) - dw.write_bytes(sdb.get_function_code(name)) + dw.write_bytes(sdb.get_section_data(i)) # Patch aux functions - for name, start, _ in aux_function_mem_layout: + for name, start in func_addr_lookup.items(): + #print('--> ', name, list(sdb.get_relocations(name))) for reloc in sdb.get_relocations(name): #assert reloc.target_symbol_info != 'STT_FUNC', "Not tested yet!" - if reloc.target_symbol_info in {'STT_OBJECT', 'STT_NOTYPE', 'STT_SECTION'}: + if not reloc.target_section_index: + assert reloc.pelfy_reloc.type == 'R_ARM_V4BX' + + elif reloc.target_symbol_info in {'STT_OBJECT', 'STT_NOTYPE', 'STT_SECTION'}: # Patch constants/variable addresses on heap - #print('--> DATA ', name, reloc.pelfy_reloc.symbol.name, reloc.pelfy_reloc.symbol.info, reloc.pelfy_reloc.symbol.section.name) + #print('--> DATA ', name, reloc.pelfy_reloc.symbol, reloc.pelfy_reloc.symbol.info, reloc.pelfy_reloc.symbol.section.name) assert reloc.target_section_index in section_addr_lookup, f"- Function or object in {name} missing: {reloc.pelfy_reloc.symbol.name}" obj_addr = reloc.target_symbol_offset + section_addr_lookup[reloc.target_section_index] patch = sdb.get_patch(reloc, obj_addr, start, binw.Command.PATCH_OBJECT.value) + patch_list.append(patch) elif reloc.target_symbol_info == 'STT_FUNC': #print('--> FUNC', name, reloc.pelfy_reloc.symbol.name, reloc.pelfy_reloc.symbol.info, reloc.pelfy_reloc.symbol.section.name) - func_addr = aux_func_addr_lookup[reloc.target_symbol_name] + func_addr = func_addr_lookup[reloc.target_symbol_name] patch = sdb.get_patch(reloc, func_addr, start, binw.Command.PATCH_FUNC.value) #print(f' FUNC {func_addr=} {start=} {patch.address=}') + patch_list.append(patch) else: raise ValueError(f"Unsupported: {name=} {reloc.target_symbol_info=} {reloc.target_symbol_name=} {reloc.target_section_index}") - patch_list.append(patch) - - #assert False, aux_function_mem_layout - # write entry function code dw.write_com(binw.Command.COPY_CODE) - dw.write_int(aux_function_lengths) - dw.write_int(offset - aux_function_lengths) + dw.write_int(aux_func_len) + dw.write_int(offset - aux_func_len) dw.write_bytes(b''.join(data_list)) # write patch operations @@ -402,6 +414,6 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi dw.write_int(patch.value, signed=True) dw.write_com(binw.Command.ENTRY_POINT) - dw.write_int(aux_function_lengths) + dw.write_int(aux_func_len) return dw, variables diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index 35e7483..3c74561 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -48,8 +48,10 @@ def detect_process_arch() -> str: arch_family = 'x86' elif arch in ('arm64', 'aarch64'): arch_family = 'arm64' - elif 'arm' in arch: - arch_family = 'arm' + elif 'armv7' in arch or 'armv8' in arch: + arch_family = 'armv7' # Treat armv8 (64 bit CPU) as armv7 for 32 bit + elif 'armv6' in arch: + arch_family = 'armv6' elif 'mips' in arch: arch_family = 'mips64' if bits == 64 else 'mips' elif 'riscv' in arch: @@ -229,6 +231,13 @@ class stencil_database(): symbol_type = symbol_type + 0x03 # Relative to data section #print(f" *> {pr.type} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {pr.bits=}, {function_offset=} {patch_offset=}") + elif pr.type.endswith('_ARM_JUMP24') or pr.type.endswith('_ARM_CALL'): + # R_ARM_JUMP24 & R_ARM_CALL + # ((S + A) - P) >> 2 + mask = 0xffffff # 24 bit + patch_value = symbol_address + pr.fields['r_addend'] - patch_offset + scale = 4 + elif pr.type.endswith('_CALL26') or pr.type.endswith('_JUMP26'): # R_AARCH64_CALL26 # ((S + A) - P) >> 2 @@ -273,8 +282,24 @@ class stencil_database(): scale = 8 #print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") + elif pr.type.endswith('_MOVW_ABS_NC'): + # R_ARM_MOVW_ABS_NC + # (S + A) & 0xFFFF + mask = 0xFFFF + patch_value = symbol_address + pr.fields['r_addend'] + symbol_type = symbol_type + 0x04 # Absolut value + #print(f" *> {pr.type} {patch_value=} {symbol_address=}, {function_offset=}") + + elif pr.type.endswith('_MOVT_ABS'): + # R_ARM_MOVT_ABS + # (S + A) & 0xFFFF0000 + mask = 0xFFFF0000 + patch_value = symbol_address + pr.fields['r_addend'] + symbol_type = symbol_type + 0x04 # Absolut value + scale = 0x10000 + else: - raise NotImplementedError(f"Relocation type {pr.type} not implemented") + raise NotImplementedError(f"Relocation type {pr.type} in {relocation.pelfy_reloc.target_section.name} pointing to {relocation.pelfy_reloc.symbol.name} not implemented") return patch_entry(mask, patch_offset, patch_value, scale, symbol_type) @@ -328,6 +353,14 @@ class stencil_database(): def get_symbol_size(self, name: str) -> int: """Returns the size of a specified symbol name.""" return self.elf.symbols[name].fields['st_size'] + + def get_symbol_offset(self, name: str) -> int: + """Returns the offset of a specified symbol in the section.""" + return self.elf.symbols[name].fields['st_value'] + + def get_symbol_section_index(self, name: str) -> int: + """Returns the section index for a specified symbol name.""" + return self.elf.symbols[name].fields['st_shndx'] def get_section_size(self, index: int) -> int: """Returns the size of a section specified by index.""" diff --git a/src/coparun/runmem.c b/src/coparun/runmem.c index 1def456..7e0762e 100644 --- a/src/coparun/runmem.c +++ b/src/coparun/runmem.c @@ -40,6 +40,24 @@ void patch_hi21(uint8_t *patch_addr, int32_t page_offset) { *(uint32_t *)patch_addr = instr; } +void patch_arm32_abs(uint8_t *patch_addr, uint32_t imm16) +{ + uint32_t instr = *((uint32_t *)patch_addr); + + // Split the 16-bit immediate into A1 MOVT fields + uint32_t imm4 = (imm16 >> 12) & 0xF; + uint32_t imm12 = imm16 & 0xFFF; + + // Clear the immediate fields: imm4 (bits 19:16) and imm12 (bits 11:0) + instr &= ~(uint32_t)((0xF << 16) | 0xFFF); + + // Set new immediate fields + instr |= (imm4 << 16); + instr |= imm12; + + *((uint32_t *)patch_addr) = instr; +} + void free_memory() { deallocate_memory(executable_memory, executable_memory_len); deallocate_memory(data_memory, data_memory_len); @@ -141,7 +159,7 @@ int parse_commands(uint8_t *bytes) { case PATCH_OBJECT_REL: offs = *(uint32_t*)bytes; bytes += 4; - patch_mask = *(uint32_t*)bytes; bytes += 4; + bytes += 4; patch_scale = *(int32_t*)bytes; bytes += 4; value = *(int32_t*)bytes; bytes += 4; LOG("PATCH_OBJECT_REL patch_offs=%i patch_addr=%p scale=%i value=%i\n", @@ -150,13 +168,23 @@ int parse_commands(uint8_t *bytes) { break; case PATCH_OBJECT_HI21: + offs = *(uint32_t*)bytes; bytes += 4; + bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; + value = *(int32_t*)bytes; bytes += 4; + LOG("PATCH_OBJECT_HI21 patch_offs=%i scale=%i value=%i res_value=%i\n", + offs, patch_scale, value, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); + patch_hi21(executable_memory + offs, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); + break; + + case PATCH_OBJECT_ARM32_ABS: offs = *(uint32_t*)bytes; bytes += 4; patch_mask = *(uint32_t*)bytes; bytes += 4; patch_scale = *(int32_t*)bytes; bytes += 4; value = *(int32_t*)bytes; bytes += 4; - LOG("PATCH_OBJECT_HI21 patch_offs=%i patch_mask=%#08x scale=%i value=%i res_value=%i\n", - offs, patch_mask, patch_scale, value, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); - patch_hi21(executable_memory + offs, floor_div(data_offs + value, patch_scale) - (int32_t)offs / patch_scale); + LOG("PATCH_OBJECT_ARM32_ABS patch_offs=%i patch_mask=%#08x scale=%i value=%i imm16=%#04x\n", + offs, patch_mask, patch_scale, value, (uint32_t)((uintptr_t)(data_memory + value) & patch_mask) / (uint32_t)patch_scale); + patch_arm32_abs(executable_memory + offs, (uint32_t)((uintptr_t)(data_memory + value) & patch_mask) / (uint32_t)patch_scale); break; case ENTRY_POINT: diff --git a/src/coparun/runmem.h b/src/coparun/runmem.h index 983d0f0..7f11dde 100644 --- a/src/coparun/runmem.h +++ b/src/coparun/runmem.h @@ -24,6 +24,7 @@ #define PATCH_OBJECT_HI21 0x2001 #define PATCH_OBJECT_ABS 0x2002 #define PATCH_OBJECT_REL 0x2003 +#define PATCH_OBJECT_ARM32_ABS 0x2004 #define ENTRY_POINT 7 #define RUN_PROG 64 #define READ_DATA 65 diff --git a/stencils/stencil_helper.h b/stencils/stencil_helper.h index e753047..35ff96b 100644 --- a/stencils/stencil_helper.h +++ b/stencils/stencil_helper.h @@ -3,7 +3,11 @@ // Remove function alignment for stencils #if defined(__GNUC__) #define NOINLINE __attribute__((noinline)) +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(__thumb__) || defined(_M_ARM) +#define STENCIL __attribute__((aligned(4))) +#else #define STENCIL __attribute__((aligned(1))) +#endif #else #define NOINLINE #define STENCIL diff --git a/tests/test_comp_timing.py b/tests/test_comp_timing.py index d13dd3b..9dc52b0 100644 --- a/tests/test_comp_timing.py +++ b/tests/test_comp_timing.py @@ -6,7 +6,7 @@ import copapy.backend as cpbe import copapy as cp import copapy._binwrite as binw from copapy._compiler import get_nets, get_section_layout, get_data_layout -from copapy._compiler import patch_entry, CPConstant, get_aux_function_mem_layout +from copapy._compiler import patch_entry, CPConstant, get_aux_func_layout def test_timing_compiler(): t1 = cp.vector([10, 11]*128) + cp.vector(cp.variable(v) for v in range(256)) @@ -88,7 +88,7 @@ def test_timing_compiler(): print('-- get_section_layout:') t0 = time.time() section_mem_layout, sections_length = get_section_layout(used_sections, sdb) - variable_mem_layout, variables_data_lengths = get_data_layout(variable_list, sdb, sections_length) + variable_mem_layout, _ = get_data_layout(variable_list, sdb, sections_length) t1 = time.time() print(f"time: {t1-t0:.6f}s") @@ -123,8 +123,7 @@ def test_timing_compiler(): # prep auxiliary_functions - aux_function_mem_layout, aux_function_lengths = get_aux_function_mem_layout(aux_function_names, sdb) - aux_func_addr_lookup = {name: offs for name, offs, _ in aux_function_mem_layout} + _, aux_func_addr_lookup, aux_function_lengths = get_aux_func_layout(aux_function_names, sdb) # Prepare program code and relocations object_addr_lookup = {net: offs for net, offs, _ in variable_mem_layout} @@ -179,7 +178,7 @@ def test_timing_compiler(): print('-- relocate aux functions:') t0 = time.time() # Patch aux functions - for name, start, _ in aux_function_mem_layout: + for name, start in aux_func_addr_lookup.items(): for reloc in sdb.get_relocations(name): #assert reloc.target_symbol_info != 'STT_FUNC', "Not tested yet!" diff --git a/tests/test_compile_armv7.py b/tests/test_compile_armv7.py new file mode 100644 index 0000000..bf0fa89 --- /dev/null +++ b/tests/test_compile_armv7.py @@ -0,0 +1,96 @@ +from copapy import NumLike +from copapy.backend import Write, compile_to_dag, add_read_command +import subprocess +from copapy import _binwrite +import copapy.backend as backend +import copapy as cp +import os +import warnings +import pytest + +if os.name == 'nt': + # On Windows wsl and qemu-user is required: + # sudo apt install qemu-user + qemu_command = ['wsl', 'qemu-arm'] +else: + qemu_command = ['qemu-arm'] + + +def run_command(command: list[str]) -> str: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + assert result.returncode != 11, f"SIGSEGV (segmentation fault)\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + assert result.returncode == 0, f"\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + return result.stdout + + +def check_for_qemu() -> bool: + command = qemu_command + ['--version'] + try: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + except Exception: + return False + return result.returncode == 0 + + +def function(c1: NumLike, c2: NumLike) -> tuple[NumLike, ...]: + i1 = c1 // 3.3 + 5 + i2 = c2 * 5 + c1 + r1 = i1 + i2 * 55 / 4 + r2 = 4 * i2 + 5 + + return i1, i2, r1, r2 + + +@pytest.mark.runner +def test_compile(): + t1 = cp.vector([10, 11, 12]) + cp.vector(cp.variable(v) for v in range(3)) + t2 = t1.sum() + + t3 = cp.vector(cp.variable(1 / (v + 1)) for v in range(3)) + t4 = ((t3 * t1) * 2).sum() + t5 = ((t3 * t1) * 2).magnitude() + + ret = (t2, t4, t5) + + out = [Write(r) for r in ret] + + sdb = backend.stencil_db_from_package('armv7') + il, variables = compile_to_dag(out, sdb) + + # run program command + il.write_com(_binwrite.Command.RUN_PROG) + #il.write_com(_binwrite.Command.DUMP_CODE) + + for net in ret: + assert isinstance(net, backend.Net) + add_read_command(il, variables, net) + + il.write_com(_binwrite.Command.END_COM) + + #print('* Data to runner:') + #il.print() + + il.to_file('build/runner/test-armv7.copapy') + + if not check_for_qemu(): + warnings.warn("qemu-armv7 not found, aarch64 test skipped!", UserWarning) + elif not os.path.isfile('build/runner/coparun-armv7'): + warnings.warn("armv7 runner not found, aarch64 test skipped!", UserWarning) + else: + command = ['build/runner/coparun-armv7', 'build/runner/test-armv7.copapy', 'build/runner/test-armv7.copapy.bin'] + result = run_command(qemu_command + command) + print('* Output from runner:\n--') + print(result) + print('--') + + assert 'Return value: 1' in result + + # Compare to x86_64 reference results + assert " size=4 data=24 00 00 00" in result + assert " size=4 data=56 55 25 42" in result + assert " size=4 data=B4 F9 C8 41" in result + + +if __name__ == "__main__": + #test_example() + test_compile() diff --git a/tests/test_ops_armv7.py b/tests/test_ops_armv7.py new file mode 100644 index 0000000..0fd1b2b --- /dev/null +++ b/tests/test_ops_armv7.py @@ -0,0 +1,169 @@ +from copapy import NumLike, iif, variable +from copapy.backend import Write, compile_to_dag, add_read_command +import subprocess +from copapy import _binwrite +import copapy.backend as backend +import os +import warnings +import re +import struct +import pytest +import copapy as cp + +if os.name == 'nt': + # On Windows wsl and qemu-user is required: + # sudo apt install qemu-user + qemu_command = ['wsl', 'qemu-arm'] +else: + qemu_command = ['qemu-arm'] + +def parse_results(log_text: str) -> dict[int, bytes]: + regex = r"^READ_DATA offs=(\d*) size=(\d*) data=(.*)$" + matches = re.finditer(regex, log_text, re.MULTILINE) + var_dict: dict[int, bytes] = {} + + for match in matches: + value_str: list[str] = match.group(3).strip().split(' ') + #print('--', value_str) + value = bytes(int(v, base=16) for v in value_str) + if len(value) <= 8: + var_dict[int(match.group(1))] = value + + return var_dict + +def run_command(command: list[str]) -> str: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + assert result.returncode != 11, f"SIGSEGV (segmentation fault)\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + assert result.returncode == 0, f"\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + return result.stdout + + +def check_for_qemu() -> bool: + command = qemu_command + ['--version'] + try: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + except: + return False + return result.returncode == 0 + + +def function1(c1: NumLike) -> list[NumLike]: + return [c1 / 4, c1 / -4, c1 // 4, c1 // -4, (c1 * -1) // 4, + c1 * 4, c1 * -4, + c1 + 4, c1 - 4, + c1 > 2, c1 > 100, c1 < 4, c1 < 100] + + +def function2(c1: NumLike) -> list[NumLike]: + return [c1 * 4.44, c1 * -4.44] + + +def function3(c1: NumLike) -> list[NumLike]: + return [c1 / 4] + + +def function4(c1: NumLike) -> list[NumLike]: + return [c1 == 9, c1 == 4, c1 != 9, c1 != 4] + + +def function5(c1: NumLike) -> list[NumLike]: + return [c1 == True, c1 == False, c1 != True, c1 != False, c1 / 2, c1 + 2] + + +def function6(c1: NumLike) -> list[NumLike]: + return [c1 == True] + + +def iiftests(c1: NumLike) -> list[NumLike]: + return [iif(c1 > 5, 8, 9), + iif(c1 < 5, 8.5, 9.5), + iif(1 > 5, 3.3, 8.8) + c1, + iif(1 < 5, c1 * 3.3, 8.8), + iif(c1 < 5, c1 * 3.3, 8.8)] + + +@pytest.mark.runner +def test_compile(): + c_i = variable(9) + c_f = variable(1.111) + c_b = variable(True) + + ret_test = function1(c_i) + function1(c_f) + function2(c_i) + function2(c_f) + function3(c_i) + function4(c_i) + function5(c_b) + [variable(9) % 2] + iiftests(c_i) + iiftests(c_f) + [cp.asin(c_i/10)] + ret_ref = function1(9) + function1(1.111) + function2(9) + function2(1.111) + function3(9) + function4(9) + function5(True) + [9 % 2] + iiftests(9) + iiftests(1.111) + [cp.asin(9/10)] + + #ret_test = (c_i * 100 // 5, c_f * 10 // 5) + #ret_ref = (9 * 100 // 5, 1.111 * 10 // 5) + + out = [Write(r) for r in ret_test] + + sdb = backend.stencil_db_from_package('armv7') + dw, variables = compile_to_dag(out, sdb) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + # run program command + dw.write_com(_binwrite.Command.RUN_PROG) + #dw.write_com(_binwrite.Command.DUMP_CODE) + + for net in ret_test: + assert isinstance(net, backend.Net) + add_read_command(dw, variables, net) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + dw.write_com(_binwrite.Command.END_COM) + + print('* Data to runner:') + dw.print() + + dw.to_file('build/runner/test-armv7.copapy') + + if not check_for_qemu(): + warnings.warn("qemu-armv7 not found, armv7 test skipped!", UserWarning) + return + if not os.path.isfile('build/runner/coparun-armv7'): + warnings.warn("armv7 runner not found, armv7 test skipped!", UserWarning) + return + + command = qemu_command + ['build/runner/coparun-armv7', 'build/runner/test-armv7.copapy'] + ['build/runner/test-armv7.copapy.bin'] + #try: + result = run_command(command) + #except FileNotFoundError: + # warnings.warn(f"Test skipped, executable not found.", UserWarning) + # return + + print('* Output from runner:\n--') + print(result) + print('--') + + assert 'Return value: 1' in result + + result_data = parse_results(result) + + for test, ref in zip(ret_test, ret_ref): + assert isinstance(test, variable) + address = variables[test][0] + data = result_data[address] + if test.dtype == 'int': + val = int.from_bytes(data, sdb.byteorder, signed=True) + elif test.dtype == 'bool': + val = bool.from_bytes(data, sdb.byteorder) + elif test.dtype == 'float': + en = {'little': '<', 'big': '>'}[sdb.byteorder] + val = struct.unpack(en + 'f', data)[0] + assert isinstance(val, float) + else: + raise Exception(f"Unknown type: {test.dtype}") + print('+', val, ref, test.dtype, f" addr={address}") + for t in (int, float, bool): + assert isinstance(val, t) == isinstance(ref, t), f"Result type does not match for {val} and {ref}" + assert val == pytest.approx(ref, 1e-5), f"Result does not match: {val} and reference: {ref}" # pyright: ignore[reportUnknownMemberType] + + +if __name__ == "__main__": + #test_example() + test_compile() diff --git a/tools/build.bat b/tools/build.bat index d93eda4..5402235 100644 --- a/tools/build.bat +++ b/tools/build.bat @@ -29,18 +29,38 @@ echo - Build runner for linux x86 32 bit... wsl i686-linux-gnu-gcc-12 -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-x86 echo - Build stencils x86 32 bit... -REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh i686-linux-gnu-gcc-12 i686-linux-gnu-ld ../copapy/build/musl/musl_objects_x86.o +REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh i686-linux-gnu-gcc-12 i686-linux-gnu-ld ../copapy/build/musl/musl_objects_x86.o -fno-pic wsl i686-linux-gnu-gcc-12 -fno-pic -ffunction-sections -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o wsl i686-linux-gnu-ld -r build/stencils/stencils.o build/musl/musl_objects_x86.o -o src/copapy/obj/stencils_x86_O3.o wsl i686-linux-gnu-objdump -d -x src/copapy/obj/stencils_x86_O3.o > build/stencils/stencils_x86_O3.asm echo --------------arm64 64 bit---------------- -echo - Build stencils for aarch64... wsl aarch64-linux-gnu-gcc-12 -fno-pic -ffunction-sections -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o wsl aarch64-linux-gnu-ld -r build/stencils/stencils.o build/musl/musl_objects_arm64.o -o src/copapy/obj/stencils_arm64_O3.o wsl aarch64-linux-gnu-objdump -d -x src/copapy/obj/stencils_arm64_O3.o > build/stencils/stencils_arm64_O3.asm - echo ------------------------------ echo - Build runner for Aarch64... wsl aarch64-linux-gnu-gcc-12 -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-aarch64 + + +echo --------------arm-v6 32 bit---------------- +REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld ../copapy/build/musl/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -marm" +wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv6 -mfpu=vfp -marm -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o +wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv6.o -o src/copapy/obj/stencils_armv6_O3.o +wsl arm-none-eabi-objdump -d -x src/copapy/obj/stencils_armv6_O3.o > build/stencils/stencils_armv6_O3.asm +echo ------------------------------ +REM echo - Build runner +REM wsl arm-linux-gnueabihf-gcc -march=armv6 -mfpu=vfp -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv6 + + + +echo --------------arm-v7 32 bit---------------- +REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld ../copapy/build/musl/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -marm" +wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -marm -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o +wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o -o src/copapy/obj/stencils_armv7_O3.o +wsl arm-none-eabi-objdump -d -x src/copapy/obj/stencils_armv7_O3.o > build/stencils/stencils_armv7_O3.asm +echo ------------------------------ +echo - Build runner +wsl arm-linux-gnueabihf-gcc -march=armv7-a -mfpu=neon-vfpv3 -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv7 + diff --git a/tools/build.sh b/tools/build.sh index 73f697e..ae78afc 100644 --- a/tools/build.sh +++ b/tools/build.sh @@ -1,20 +1,32 @@ #!/bin/bash set -e set -v -mkdir -p bin + +mkdir -p build/stencils +mkdir -p build/runner + SRC=build/stencils/stencils.c DEST=src/copapy/obj python3 stencils/generate_stencils.py $SRC mkdir -p $DEST -gcc --version -#gcc -fno-pic -c $SRC -O0 -o $DEST/stencils_x86_64_O0.o -#gcc -fno-pic -c $SRC -O1 -o $DEST/stencils_x86_64_O1.o -#gcc -fno-pic -c $SRC -O2 -o $DEST/stencils_x86_64_O2.o -gcc -fno-pic -c $SRC -O3 -o $DEST/stencils_x86_64_O3.o + +gcc -fno-pic -ffunction-sections -c $SRC -O3 -o build/stencils/stencils.o +ld -r build/stencils/stencils.o build/musl/musl_objects_x86_64.o -o $DEST/stencils_x86_64_O3.o +objdump -d -x $DEST/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm mkdir bin -p gcc -Wall -Wextra -Wconversion -Wsign-conversion \ -Wshadow -Wstrict-overflow -Werror -g -O3 \ - -DENABLE_LOGGING + -DENABLE_LOGGING \ src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun -#x86_64-w64-mingw32-gcc -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -Werror src/runner/runmem2.c -Wall -O3 -o bin/runmem2.exe + + +echo "--------------arm-v7 32 bit----------------" +LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) +#LIBM=$(arm-none-eabi-gcc -print-file-name=libm.a) +#LIBC=$(arm-none-eabi-gcc -print-file-name=libc.a) + +arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -c $SRC -O3 -o build/stencils/stencils.o +arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o $LIBGCC -o $DEST/stencils_armv7_O3.o +arm-none-eabi-objdump -d -x $DEST/stencils_armv7_O3.o > build/stencils/stencils_armv7_O3.asm +arm-linux-gnueabihf-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv7 diff --git a/tools/cross_compiler_unix/Dockerfile b/tools/cross_compiler_unix/Dockerfile index acfd78c..3bef513 100644 --- a/tools/cross_compiler_unix/Dockerfile +++ b/tools/cross_compiler_unix/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y \ build-essential \ gcc-13 \ gcc-13-i686-linux-gnu \ - gcc-13-arm-linux-gnueabihf \ + gcc-arm-none-eabi \ gcc-13-mips-linux-gnu \ gcc-13-mipsel-linux-gnu \ gcc-13-riscv64-linux-gnu \ diff --git a/tools/cross_compiler_unix/build_musl.sh b/tools/cross_compiler_unix/build_musl.sh index 8c66202..02a6b40 100644 --- a/tools/cross_compiler_unix/build_musl.sh +++ b/tools/cross_compiler_unix/build_musl.sh @@ -5,7 +5,8 @@ set -v mkdir -p /object_files -git clone --single-branch --branch master --depth 1 https://git.musl-libc.org/git/musl +#git clone --single-branch --branch master --depth 1 https://git.musl-libc.org/git/musl +git clone --single-branch --branch master --depth 1 https://repo.or.cz/musl.git cd musl #./configure CFLAGS="-O2 -fno-stack-protector -ffast-math" @@ -16,6 +17,10 @@ sh ../packobjs.sh i686-linux-gnu-gcc-13 i686-linux-gnu-ld /object_files/musl_obj sh ../packobjs.sh aarch64-linux-gnu-gcc-13 aarch64-linux-gnu-ld /object_files/musl_objects_arm64.o +sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -mfloat-abi=hard -marm" + +sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm" + #sh ../packobjs.sh mips mips-linux-gnu-gcc-13 mips-linux-gnu-ld #sh ../packobjs.sh riscv64 riscv64-linux-gnu-gcc-13 riscv64-linux-gnu-ld diff --git a/tools/crosscompile.sh b/tools/crosscompile.sh index 9b634bc..67a118c 100644 --- a/tools/crosscompile.sh +++ b/tools/crosscompile.sh @@ -31,9 +31,15 @@ i686-linux-gnu-ld -r $STMP /object_files/musl_objects_x86.o -o $DEST/stencils_x8 aarch64-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $STMP aarch64-linux-gnu-ld -r $STMP /object_files/musl_objects_arm64.o -o $DEST/stencils_arm64_$OPT.o -# ARMv7 -#arm-linux-gnueabihf-gcc-13 $FLAGS -$OPT -c $SRC -o $STMP -#arm-linux-gnu-ld -r $STMP /object_files/musl_objects_arm.o -o $DEST/stencils_arm_$OPT.o +# ARMv6 hardware fp +arm-none-eabi-gcc -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP +LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv6.o $LIBGCC -o $DEST/stencils_armv6_$OPT.o + +# ARMv7 hardware fp +arm-none-eabi-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP +LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7.o $LIBGCC -o $DEST/stencils_armv7_$OPT.o # PowerPC64LE # powerpc64le-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $DEST/stencils_ppc64le_$OPT.o diff --git a/tools/inspect.sh b/tools/inspect.sh index f08b019..c25e898 100644 --- a/tools/inspect.sh +++ b/tools/inspect.sh @@ -1,9 +1,16 @@ #!/bin/bash -source tools/build.sh +set -e +set -v -objdump -d -j .text src/copapy/obj/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm +sh tools/build.sh + +#objdump -d -j .text src/copapy/obj/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm python3 tools/make_example.py -python3 tools/extract_code.py "build/runner/test.copapy" "build/runner/test.copapy.bin" + +build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin objdump -D -b binary -m i386:x86-64 --adjust-vma=0x1000 build/runner/test.copapy.bin > build/runner/test.copapy.asm + +build/runner/coparun-armv7 build/runner/test-armv7.copapy build/runner/test.copapy-armv7.bin +arm-none-eabi-objdump -D -b binary -marm --adjust-vma=0x50000 build/runner/test.copapy-armv7.bin > build/runner/test.copapy-armv7.asm diff --git a/tools/arm64_test/Dockerfile b/tools/qemu_test/Dockerfile similarity index 100% rename from tools/arm64_test/Dockerfile rename to tools/qemu_test/Dockerfile