diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 349f366..44eabea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -151,7 +151,7 @@ jobs: - name: Use ARM64 container run: | docker run --rm -v $PWD:/app -w /app --platform linux/arm64 ghcr.io/nonannet/arm64_test:1 \ - bash -lc "pip install . && \ + bash -lc "pip install .[mindev] && \ mkdir -p build/runner && \ gcc -O3 -static -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \ src/coparun/coparun.c src/coparun/mem_man.c && \ @@ -180,7 +180,8 @@ jobs: - name: Use ARMv6 container run: | docker run --rm -v $PWD:/app -w /app --platform linux/arm/v6 ghcr.io/nonannet/armv6_test:1 \ - bash -lc "pip install . && \ + bash -lc "set -x && \ + pip install .[mindev] && \ mkdir -p build/runner && \ gcc -O3 -static -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \ src/coparun/coparun.c src/coparun/mem_man.c && \ @@ -209,9 +210,14 @@ jobs: - name: Use ARMv7 container run: | docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ - bash -lc "pip install . && \ + bash -lc "set -x && \ + pip install .[mindev] && \ mkdir -p build/runner && \ - gcc -O3 -static -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \ + gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ + -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + -o build/runner/coparun src/coparun/runmem.c \ src/coparun/coparun.c src/coparun/mem_man.c && \ pytest && \ bash tools/create_asm.sh" @@ -221,6 +227,76 @@ jobs: name: runner-linux-armv7 path: build/runner/* + build-armv7thumb: + needs: [build_stencils] + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: stencil-object-files + path: src/copapy/obj + - name: Set up QEMU for ARMv7 + uses: docker/setup-qemu-action@v3 + with: + platforms: linux/arm/v7 + - name: Use ARMv7 container + run: | + docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ + bash -lc "set -x && \ + pip install .[mindev] && \ + mkdir -p build/runner && \ + gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ + -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + -o build/runner/coparun src/coparun/runmem.c \ + src/coparun/coparun.c src/coparun/mem_man.c && \ + export CP_TARGET_ARCH=armv7thumb && \ + pytest && \ + bash tools/create_asm.sh" + + - uses: actions/upload-artifact@v4 + with: + name: runner-linux-armv7thumb + path: build/runner/* + + build-armv7mthumb: + needs: [build_stencils] + runs-on: ubuntu-latest + continue-on-error: true + steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 + with: + name: stencil-object-files + path: src/copapy/obj + - name: Set up QEMU for ARMv7 + uses: docker/setup-qemu-action@v3 + with: + platforms: linux/arm/v7 + - name: Use ARMv7 container + run: | + docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ + bash -lc "set -x && \ + pip install .[mindev] && \ + mkdir -p build/runner && \ + gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ + -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + -o build/runner/coparun src/coparun/runmem.c \ + src/coparun/coparun.c src/coparun/mem_man.c && \ + export CP_TARGET_ARCH=armv7mthumb && \ + pytest && \ + bash tools/create_asm.sh" + + - uses: actions/upload-artifact@v4 + with: + name: runner-linux-armv7mthumb + path: build/runner/* + build-windows: needs: [build_stencils] runs-on: windows-latest @@ -244,7 +320,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install Python dependencies - run: python -m pip install .[dev] + run: python -m pip install .[mindev] - name: Set up MSVC environment uses: microsoft/setup-msbuild@v2 @@ -270,7 +346,7 @@ jobs: path: build/runner/* release-stencils: - needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7] + needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7, build-armv7thumb, build-armv7mthumb] runs-on: ubuntu-latest if: github.ref == 'refs/heads/main' && github.event_name == 'push' permissions: diff --git a/.gitignore b/.gitignore index aaa1e38..dfe5d47 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,4 @@ core *.log docs/source/start.md /src/copapy/_version.py +sketch*.py diff --git a/README.md b/README.md index 0df2f75..d7780ca 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ The main features can be summarized as: - Memory and type safety with a minimal set of runtime errors - Deterministic execution - Automatic differentiation for efficient realtime optimization (reverse-mode) -- Optimized machine code for x86_64, ARMv6, ARMv7 and AArch64 +- Optimized machine code for x86_64, 32 Bit ARM (Cortex-A and Cortex-M) and AArch64 - Highly portable to new architectures - Small Python package with minimal dependencies and no cross-compile toolchain required @@ -31,7 +31,6 @@ While hardware I/O is obviously a core aspect of the project, it is not yet avai Currently in development: - Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications -- Support for Thumb instructions required by ARM*-M targets (for MCUs) - Constant regrouping for further symbolic optimization of the computation graph Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5: @@ -253,4 +252,4 @@ This project is licensed under the MIT license - see the [LICENSE](LICENSE) file [^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture. -[^3]: Supported architectures: x86_64, AArch64, ARMv6 and 7 (non-Thumb). ARMv6/7-M (Thumb) support is in development. Code for x86 32-bit exists but has unresolved issues and a low priority. +[^3]: Supported architectures: x86_64, AArch64, ARMv6/7 (non-Thumb) and ARMv7 Thumb for Cortex-A and Cortex-M. Code for x86 32-bit exists but has unresolved issues and a low priority. diff --git a/pyproject.toml b/pyproject.toml index d5643c8..1ee53ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "copapy" dynamic = ["version"] authors = [ - { name="Nicolas Kruse", email="nicolas.kruse@nonan.net" }, + { name="Nicolas Kruse", email="nicolas.kruse@nonan.net" }, ] description = "Copy-Patch Compiler" readme = "README.md" @@ -45,14 +45,18 @@ dev = [ "ruff", "mypy", "pytest", - "pelfy>=1.0.7" + "pelfy>=1.0.8" +] +mindev = [ + "pytest", + "pelfy>=1.0.8" ] doc_build = [ - "sphinx", - "pydata_sphinx_theme", - "sphinx-autodoc-typehints", - "myst-parser", - "pelfy>=1.0.7" + "sphinx", + "pydata_sphinx_theme", + "sphinx-autodoc-typehints", + "myst-parser", + "pelfy>=1.0.8" ] [tool.mypy] diff --git a/src/copapy/_binwrite.py b/src/copapy/_binwrite.py index ff77918..db1f15f 100644 --- a/src/copapy/_binwrite.py +++ b/src/copapy/_binwrite.py @@ -6,11 +6,14 @@ ByteOrder = Literal['little', 'big'] Command = Enum('Command', [('ALLOCATE_DATA', 1), ('COPY_DATA', 2), ('ALLOCATE_CODE', 3), ('COPY_CODE', 4), - ('PATCH_FUNC', 0x1000), ('PATCH_OBJECT', 0x2000), + ('PATCH_FUNC', 0x1000), + ('PATCH_FUNC_ARM32_THM', 0x1005), + ('PATCH_OBJECT', 0x2000), ('PATCH_OBJECT_HI21', 0x2001), ('PATCH_OBJECT_ABS', 0x2002), ('PATCH_OBJECT_REL', 0x2003), ('PATCH_OBJECT_ARM32_ABS', 0x2004), + ('PATCH_OBJECT_ARM32_ABS_THM', 0x2006), ('ENTRY_POINT', 7), ('RUN_PROG', 64), ('READ_DATA', 65), ('END_COM', 256), ('FREE_MEMORY', 257), ('DUMP_CODE', 258)]) @@ -22,6 +25,11 @@ class data_writer(): self._data: list[tuple[str, bytes, int]] = [] self.byteorder: ByteOrder = byteorder + def copy(self) -> 'data_writer': + cp = data_writer(self.byteorder) + cp._data = self._data.copy() + return cp + def write_int(self, value: int, num_bytes: int = 4, signed: bool = False) -> None: self._data.append((f"INT {value}", value.to_bytes(length=num_bytes, byteorder=self.byteorder, signed=signed), 0)) diff --git a/src/copapy/_compiler.py b/src/copapy/_compiler.py index 929b265..4f71d73 100644 --- a/src/copapy/_compiler.py +++ b/src/copapy/_compiler.py @@ -393,6 +393,7 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi # assemble stencils to main program and patch stencils data = sdb.get_function_code('entry_function_shell', 'start') data_list.append(data) + #print(f"* entry_function_shell (0) " + ' '.join(f'{d:02X}' for d in data)) offset = aux_func_len + len(data) for associated_net, node in extended_output_ops: @@ -451,10 +452,8 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi #print('--> ', name, list(sdb.get_relocations(name))) for reloc in sdb.get_relocations(name): - #assert reloc.target_symbol_info != 'STT_FUNC', "Not tested yet!" - if not reloc.target_section_index: - assert reloc.pelfy_reloc.type == 'R_ARM_V4BX' + assert reloc.pelfy_reloc.type == 'R_ARM_V4BX', (reloc.pelfy_reloc.type, name, reloc.pelfy_reloc.symbol.name) elif reloc.target_symbol_info in {'STT_OBJECT', 'STT_NOTYPE', 'STT_SECTION'}: # Patch constants/variable addresses on heap @@ -489,6 +488,6 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi dw.write_int(patch.value, signed=True) dw.write_com(binw.Command.ENTRY_POINT) - dw.write_int(aux_func_len) + dw.write_int(aux_func_len + sdb.thumb_mode) return dw, variables diff --git a/src/copapy/_math.py b/src/copapy/_math.py index dd26bf6..fcd14c9 100644 --- a/src/copapy/_math.py +++ b/src/copapy/_math.py @@ -310,7 +310,7 @@ def get_42(x: value[Any]) -> value[float]: ... def get_42(x: NumLike) -> value[float] | float: """Returns the value representing the constant 42""" if isinstance(x, value): - return add_op('get_42', [x, x]) + return add_op('get_42', [x]) return float((int(x) * 3.0 + 42.0) * 5.0 + 21.0) diff --git a/src/copapy/_stencils.py b/src/copapy/_stencils.py index b746c94..7ac84e5 100644 --- a/src/copapy/_stencils.py +++ b/src/copapy/_stencils.py @@ -2,6 +2,7 @@ from dataclasses import dataclass from typing import Generator, Literal, Iterable, TYPE_CHECKING import struct import platform +import os if TYPE_CHECKING: import pelfy @@ -49,11 +50,14 @@ class patch_entry: def detect_process_arch() -> str: - """ - For running the code locally in the python module + """For running the code locally in the python module the architecture of the current process is detected by this function to load the correct stencil database. """ + cp_target_arch = os.environ.get("CP_TARGET_ARCH") + if cp_target_arch: + return cp_target_arch + bits = struct.calcsize("P") * 8 arch = platform.machine().lower() @@ -88,11 +92,20 @@ def get_return_function_type(symbol: pelfy.elf_symbol) -> str: def get_stencil_position(func: pelfy.elf_symbol) -> tuple[int, int]: start_index = 0 # There must be no prolog + # Find last relocation in function last_instr = get_last_call_in_function(func) - function_size = func.fields['st_size'] - if last_instr + 5 >= function_size: # Check if jump is last instruction - end_index = last_instr # Jump can be striped + + assert func.section, f"No code section specified for symbol {func.name}" + + # func.section.fields['sh_size'] is equivalent to func.fields['st_size'] + # expect for ARM thumb, here nop padding at the end for 4-byte alignment + # is not included in st_size + function_size = func.section.fields['sh_size'] + + # Check if jump is the last instruction and can be striped + if last_instr + 5 >= function_size: + end_index = last_instr else: end_index = function_size @@ -106,11 +119,12 @@ def get_last_call_in_function(func: pelfy.elf_symbol) -> int: if reloc.symbol.name.startswith('dummy_'): return -0xFFFF # Last relocation is not a jump else: - # Assume the call instruction is 4 bytes long for relocations with less than 32 bit and 5 bytes otherwise + # Assume the jump/call instruction is 4 bytes long for relocations + # with less than 32 bit and 5 bytes otherwise instruction_lengths = 4 if reloc.bits < 32 else 5 address_field_length = 4 #print(f"-> {[r.fields['r_offset'] - func.fields['st_value'] for r in func.relocations]}") - return reloc.fields['r_offset'] - func.fields['st_value'] + address_field_length - instruction_lengths + return reloc.fields['r_offset'] - func.offset_in_section + address_field_length - instruction_lengths def get_op_after_last_call_in_function(func: pelfy.elf_symbol) -> int: @@ -118,7 +132,12 @@ def get_op_after_last_call_in_function(func: pelfy.elf_symbol) -> int: assert func.relocations, f'No call function in stencil function {func.name}.' reloc = func.relocations[-1] assert reloc.bits <= 32, "Relocation segment might be larger then 32 bit" - return reloc.fields['r_offset'] - func.fields['st_value'] + 4 + return reloc.fields['r_offset'] - func.offset_in_section + 4 + + +def add_sign_int32(value: int) -> int: + """Convert a 32-bit unsigned integer to a signed integer.""" + return value - 0x100000000 if value > 0x7FFFFFFF else value class stencil_database(): @@ -129,6 +148,7 @@ class stencil_database(): var_size (dict[str, int]): dictionary of object names and their sizes byteorder (ByteOrder): byte order of the ELF file elf (elf_file): the loaded ELF file + thumb_mode (bool): entry_function_shell in ARM thumb mode """ def __init__(self, obj_file: str | bytes): @@ -155,6 +175,8 @@ class stencil_database(): # if s.info == 'STT_OBJECT'} self.byteorder: ByteOrder = self.elf.byteorder + self.thumb_mode = self.elf.symbols['entry_function_shell'].thumb_mode + #for name in self.function_definitions.keys(): # sym = self.elf.symbols[name] # sym.relocations @@ -196,19 +218,20 @@ class stencil_database(): for reloc in symbol.relocations: # address to fist byte to patch relative to the start of the symbol - patch_offset = reloc.fields['r_offset'] - symbol.fields['st_value'] - start_index + patch_offset = reloc.fields['r_offset'] - symbol.offset_in_section - start_index if patch_offset < end_index - start_index: # Exclude the call to the result_* function reloc_entry = relocation_entry(reloc.symbol.name, reloc.symbol.info, - reloc.symbol.fields['st_value'], + reloc.symbol.fields['st_value'], # LSB on ARM indicates thumb mode reloc.symbol.fields['st_shndx'], - symbol.fields['st_value'], + symbol.offset_in_section, start_index, reloc) cache.append(reloc_entry) yield reloc_entry + def get_patch(self, relocation: relocation_entry, symbol_address: int, function_offset: int, symbol_type: int) -> patch_entry: """Return patch positions for a provided symbol (function or object) @@ -234,12 +257,14 @@ class stencil_database(): if pr.type.endswith('64_PC32') or pr.type.endswith('64_PLT32'): # S + A - P - patch_value = symbol_address + pr.fields['r_addend'] - patch_offset + addend = add_sign_int32(pr.fields['r_addend']) + patch_value = symbol_address + addend - patch_offset #print(f" *> {pr.type} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {pr.bits=}, {function_offset=} {patch_offset=}") elif pr.type == 'R_386_PC32': # S + A - P - patch_value = symbol_address + pr.fields['r_addend'] - patch_offset + addend = add_sign_int32(pr.fields['r_addend']) + patch_value = symbol_address + addend - patch_offset #print(f" *> {pr.type} {pr.symbol.name} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {bin(pr.fields['r_addend'])} {pr.bits=}, {function_offset=} {patch_offset=}") elif pr.type == 'R_386_32': @@ -300,28 +325,52 @@ class stencil_database(): scale = 8 #print(f" *> {patch_value=} {symbol_address=} {pr.fields['r_addend']=}, {function_offset=}") - elif pr.type.endswith('_MOVW_ABS_NC'): - # R_ARM_MOVW_ABS_NC + elif pr.type == 'R_ARM_MOVW_ABS_NC': # (S + A) & 0xFFFF mask = 0xFFFF patch_value = symbol_address + pr.fields['r_addend'] symbol_type = symbol_type + 0x04 # Absolut value #print(f" *> {pr.type} {patch_value=} {symbol_address=}, {function_offset=}") - elif pr.type.endswith('_MOVT_ABS'): - # R_ARM_MOVT_ABS + elif pr.type =='R_ARM_MOVT_ABS': # (S + A) & 0xFFFF0000 mask = 0xFFFF0000 patch_value = symbol_address + pr.fields['r_addend'] symbol_type = symbol_type + 0x04 # Absolut value scale = 0x10000 + #print(f" *> {pr.type} {patch_value=} {symbol_address=}, {function_offset=}, {pr.fields['r_addend']=}") elif pr.type.endswith('_ABS32'): # R_ARM_ABS32 # S + A (replaces full 32 bit) + assert not patch_offset % 4, 'R_ARM_ABS32 patched data like literals needs to be 4 Byte aligned' + # This might be caused by the call in entry_function_shell if not aligned + patch_value = symbol_address + pr.fields['r_addend'] symbol_type = symbol_type + 0x03 # Relative to data section + elif pr.type.endswith('_THM_JUMP24') or pr.type.endswith('_THM_CALL'): + # R_ARM_THM_JUMP24 + # S + A - P + patch_value = symbol_address - patch_offset + pr.fields['r_addend'] + symbol_type = symbol_type + 0x05 # PATCH_FUNC_ARM32_THM + #print(f" *> {pr.type} {patch_value=} {symbol_address=} {pr.fields['r_addend']=} {pr.bits=}, {function_offset=} {patch_offset=}") + + elif pr.type == 'R_ARM_THM_MOVW_ABS_NC': + # (S + A) & 0xFFFF + mask = 0xFFFF + patch_value = symbol_address + pr.fields['r_addend'] + symbol_type = symbol_type + 0x06 # PATCH_OBJECT_ARM32_ABS_THM + #print(f" *> {pr.type} {patch_value=} {symbol_address=}, {function_offset=}, {pr.fields['r_addend']=}") + + elif pr.type == 'R_ARM_THM_MOVT_ABS': + # (S + A) & 0xFFFF0000 + mask = 0xFFFF0000 + patch_value = symbol_address + pr.fields['r_addend'] + symbol_type = symbol_type + 0x06 # PATCH_OBJECT_ARM32_ABS_THM + scale = 0x10000 + #print(f" *> {pr.type} {patch_value=} {symbol_address=}, {function_offset=}, {pr.fields['r_addend']=}") + else: raise NotImplementedError(f"Relocation type {pr.type} in {relocation.pelfy_reloc.target_section.name} pointing to {relocation.pelfy_reloc.symbol.name} not implemented") @@ -342,7 +391,8 @@ class stencil_database(): func = self.elf.symbols[name] start_stencil, end_stencil = get_stencil_position(func) assert func.section - start_index = func.section['sh_offset'] + func['st_value'] + start_stencil + + start_index = func.offset_in_file + start_stencil lengths = end_stencil - start_stencil self._stencil_cache[name] = (start_index, lengths) @@ -380,7 +430,7 @@ class stencil_database(): def get_symbol_offset(self, name: str) -> int: """Returns the offset of a specified symbol in the section.""" - return self.elf.symbols[name].fields['st_value'] + return self.elf.symbols[name].offset_in_section def get_symbol_section_index(self, name: str) -> int: """Returns the section index for a specified symbol name.""" diff --git a/src/coparun/runmem.c b/src/coparun/runmem.c index f6e54b3..d0bded9 100644 --- a/src/coparun/runmem.c +++ b/src/coparun/runmem.c @@ -57,6 +57,67 @@ void patch_arm32_abs(uint8_t *patch_addr, uint32_t imm16) *((uint32_t *)patch_addr) = instr; } +void patch_arm_thm_abs(uint8_t *patch_addr, uint32_t imm16) +{ + // Thumb MOVW (T3) / MOVT (T1) encoding + + uint16_t *instr16 = (uint16_t *)patch_addr; + uint16_t first_half = instr16[0]; + uint16_t second_half = instr16[1]; + + // Extract fields from imm16 + uint32_t imm4 = (imm16 >> 12) & 0xF; + uint32_t i = (imm16 >> 11) & 0x1; + uint32_t imm3 = (imm16 >> 8) & 0x7; + uint32_t imm8 = imm16 & 0xFF; + + // Clear bits + first_half &= (uint16_t)(~(0x000F | (1 << 10))); + second_half &= (uint16_t)(~(0x00FF | (0x7 << 12))); + + // Set new fields + first_half |= (uint16_t)((imm4 << 0) | (i << 10)); + second_half |= (uint16_t)(imm8 | (imm3 << 12)); + + instr16[0] = first_half; + instr16[1] = second_half; +} + +void patch_arm_thm_jump24(uint8_t *patch_addr, int32_t imm24) +{ + // Read the 32-bit instruction (two halfwords) + uint16_t *instr16 = (uint16_t *)patch_addr; + uint16_t first_half = instr16[0]; + uint16_t second_half = instr16[1]; + + // Thumb branch instructions always have LSB = 0 (halfword aligned) + // The imm24 offset in Thumb is shifted right by 1 when encoded + int32_t offset = imm24 >> 1; + + // Split into S, J1, J2, imm10, imm11 + uint32_t S = (offset >> 23) & 0x1; + uint32_t I1 = (offset >> 22) & 0x1; + uint32_t I2 = (offset >> 21) & 0x1; + uint32_t imm10 = (offset >> 11) & 0x3FF; + uint32_t imm11 = offset & 0x7FF; + + // Re-encode J1 and J2 + uint32_t J1 = (~(I1 ^ S)) & 0x1; + uint32_t J2 = (~(I2 ^ S)) & 0x1; + + // Clear old imm fields + first_half &= 0xF800; // Keep upper 5 bits + second_half &= 0xD000; // Keep upper 5 bits + + // Set new imm fields + first_half |= (uint16_t)((S << 10) | imm10); + second_half |= (uint16_t)((J1 << 13) | (J2 << 11) | imm11); + + // Write back + instr16[0] = first_half; + instr16[1] = second_half; +} + void free_memory(runmem_t *context) { deallocate_memory(context->executable_memory, context->executable_memory_len); deallocate_memory(context->data_memory, context->data_memory_len); @@ -187,6 +248,26 @@ int parse_commands(runmem_t *context, uint8_t *bytes) { patch_arm32_abs(context->executable_memory + offs, (uint32_t)((uintptr_t)(context->data_memory + value) & patch_mask) / (uint32_t)patch_scale); break; + case PATCH_FUNC_ARM32_THM: + offs = *(uint32_t*)bytes; bytes += 4; + patch_mask = *(uint32_t*)bytes; bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; + value = *(int32_t*)bytes; bytes += 4; + LOG("PATCH_FUNC_ARM32_THM patch_offs=%i patch_mask=%#08x scale=%i value=%i\n", + offs, patch_mask, patch_scale, value); + patch_arm_thm_jump24(context->executable_memory + offs, value); + break; + + case PATCH_OBJECT_ARM32_ABS_THM: + offs = *(uint32_t*)bytes; bytes += 4; + patch_mask = *(uint32_t*)bytes; bytes += 4; + patch_scale = *(int32_t*)bytes; bytes += 4; + value = *(int32_t*)bytes; bytes += 4; + LOG("PATCH_OBJECT_ARM32_ABS_THM patch_offs=%i patch_mask=%#08x scale=%i value=%i imm16=%#04x\n", + offs, patch_mask, patch_scale, value, (uint32_t)((uintptr_t)(context->data_memory + value) & patch_mask) / (uint32_t)patch_scale); + patch_arm_thm_abs(context->executable_memory + offs, (uint32_t)((uintptr_t)(context->data_memory + value) & patch_mask) / (uint32_t)patch_scale); + break; + case ENTRY_POINT: rel_entr_point = *(uint32_t*)bytes; bytes += 4; context->entr_point = (entry_point_t)(context->executable_memory + rel_entr_point); diff --git a/src/coparun/runmem.h b/src/coparun/runmem.h index 77fe5d2..80c4252 100644 --- a/src/coparun/runmem.h +++ b/src/coparun/runmem.h @@ -27,11 +27,13 @@ #define ALLOCATE_CODE 3 #define COPY_CODE 4 #define PATCH_FUNC 0x1000 +#define PATCH_FUNC_ARM32_THM 0x1005 #define PATCH_OBJECT 0x2000 #define PATCH_OBJECT_HI21 0x2001 #define PATCH_OBJECT_ABS 0x2002 #define PATCH_OBJECT_REL 0x2003 #define PATCH_OBJECT_ARM32_ABS 0x2004 +#define PATCH_OBJECT_ARM32_ABS_THM 0x2006 #define ENTRY_POINT 7 #define RUN_PROG 64 #define READ_DATA 65 diff --git a/stencils/generate_stencils.py b/stencils/generate_stencils.py index 19b0f73..307db55 100644 --- a/stencils/generate_stencils.py +++ b/stencils/generate_stencils.py @@ -57,8 +57,8 @@ def norm_indent(f: Callable[..., str]) -> Callable[..., str]: def get_entry_function_shell() -> str: return f""" {entry_func_prefix}int entry_function_shell(){{ - volatile char stack_place_holder[{stack_size}]; - stack_place_holder[0] = 0; + //volatile char stack_place_holder[{stack_size}]; + //stack_place_holder[0] = 0; result_int(0); return 1; }} diff --git a/tests/test_ops_armv7mthumb.py b/tests/test_ops_armv7mthumb.py new file mode 100644 index 0000000..16058d6 --- /dev/null +++ b/tests/test_ops_armv7mthumb.py @@ -0,0 +1,174 @@ +from copapy import NumLike, iif, value +from copapy.backend import Store, compile_to_dag, add_read_value_remote +import subprocess +from copapy import _binwrite +import copapy.backend as backend +import os +import warnings +import re +import struct +import pytest +import copapy as cp + +if os.name == 'nt': + # On Windows wsl and qemu-user is required: + # sudo apt install qemu-user + qemu_command = ['wsl', 'qemu-arm'] +else: + qemu_command = ['qemu-arm'] + + +def parse_results(log_text: str) -> dict[int, bytes]: + regex = r"^READ_DATA offs=(\d*) size=(\d*) data=(.*)$" + matches = re.finditer(regex, log_text, re.MULTILINE) + var_dict: dict[int, bytes] = {} + + for match in matches: + value_str: list[str] = match.group(3).strip().split(' ') + #print('--', value_str) + value = bytes(int(v, base=16) for v in value_str) + if len(value) <= 8: + var_dict[int(match.group(1))] = value + + return var_dict + + +def run_command(command: list[str]) -> str: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + assert result.returncode != 11, f"SIGSEGV (segmentation fault)\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + assert result.returncode == 0, f"\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + return result.stdout + + +def check_for_qemu() -> bool: + command = qemu_command + ['--version'] + try: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False) + except Exception: + return False + return result.returncode == 0 + + +def function1(c1: NumLike) -> list[NumLike]: + return [c1 / 4, c1 / -4, c1 // 4, c1 // -4, (c1 * -1) // 4, + c1 * 4, c1 * -4, + c1 + 4, c1 - 4, + c1 > 2, c1 > 100, c1 < 4, c1 < 100] + +def function1ex(c1: NumLike) -> list[NumLike]: + return [c1 // 4] + + +def function2(c1: NumLike) -> list[NumLike]: + return [c1 * 4.44, c1 * -4.44] + + +def function3(c1: NumLike) -> list[NumLike]: + return [c1 / 4] + + +def function4(c1: NumLike) -> list[NumLike]: + return [c1 == 9, c1 == 4, c1 != 9, c1 != 4] + + +def function5(c1: NumLike) -> list[NumLike]: + return [c1 == True, c1 == False, c1 != True, c1 != False, c1 / 2, c1 + 2] + + +def function6(c1: NumLike) -> list[NumLike]: + return [c1 == True] + + +def iiftests(c1: NumLike) -> list[NumLike]: + return [iif(c1 > 5, 8, 9), + iif(c1 < 5, 8.5, 9.5), + iif(1 > 5, 3.3, 8.8) + c1, + iif(1 < 5, c1 * 3.3, 8.8), + iif(c1 < 5, c1 * 3.3, 8.8)] + + +@pytest.mark.runner +def test_compile(): + c_i = value(9) + c_f = value(1.111) + c_b = value(True) + + ret_test = function1(c_i) + function1(c_f) + function2(c_i) + function2(c_f) + function3(c_i) + function4(c_i) + function5(c_b) + [value(9) % 2] + iiftests(c_i) + iiftests(c_f) + [cp.asin(c_i/10)] + ret_ref = function1(9) + function1(1.111) + function2(9) + function2(1.111) + function3(9) + function4(9) + function5(True) + [9 % 2] + iiftests(9) + iiftests(1.111) + [cp.asin(9/10)] + + out = [Store(r) for r in ret_test] + + sdb = backend.stencil_db_from_package('armv7mthumb') + dw, variables = compile_to_dag(out, sdb) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + du = dw.copy() + dw.write_com(_binwrite.Command.RUN_PROG) + du.write_com(_binwrite.Command.DUMP_CODE) + + for v in ret_test: + assert isinstance(v, value) + add_read_value_remote(dw, variables, v.net) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + dw.write_com(_binwrite.Command.END_COM) + du.write_com(_binwrite.Command.END_COM) + + #print('* Data to runner:') + #dw.print() + + dw.to_file('build/runner/test-armv7mthumb.copapy') + du.to_file('build/runner/test-armv7mthumb-dump.copapy') + + if not check_for_qemu(): + warnings.warn("qemu-armv7 not found, armv7 test skipped!", UserWarning) + return + if not os.path.isfile('build/runner/coparun-armv7'): + warnings.warn("armv7 runner not found, armv7 test skipped!", UserWarning) + return + + print('----- Dump code...') + command = qemu_command + ['build/runner/coparun-armv7', 'build/runner/test-armv7mthumb-dump.copapy', 'build/runner/test.copapy-armv7mthumb.bin'] + result = run_command(command) + + print('----- Run code...') + command = qemu_command + ['build/runner/coparun-armv7', 'build/runner/test-armv7mthumb.copapy'] + result = run_command(command) + + + print('* Output from runner:\n--') + print(result) + print('--') + + assert 'Return value: 1' in result + + result_data = parse_results(result) + + for test, ref in zip(ret_test, ret_ref): + assert isinstance(test, value) + address = variables[test.net][0] + data = result_data[address] + if test.dtype == 'int': + val = int.from_bytes(data, sdb.byteorder, signed=True) + elif test.dtype == 'bool': + val = bool.from_bytes(data, sdb.byteorder) + elif test.dtype == 'float': + en = {'little': '<', 'big': '>'}[sdb.byteorder] + val = struct.unpack(en + 'f', data)[0] + assert isinstance(val, float) + else: + raise Exception(f"Unknown type: {test.dtype}") + print('+', val, ref, test.dtype, f" addr={address}") + for t in (int, float, bool): + assert isinstance(val, t) == isinstance(ref, t), f"Result type does not match for {val} and {ref}" + assert val == pytest.approx(ref, 1e-5), f"Result does not match: {val} and reference: {ref}" # pyright: ignore[reportUnknownMemberType] + + +if __name__ == "__main__": + test_compile() diff --git a/tests/test_ops_armv7thumb.py b/tests/test_ops_armv7thumb.py new file mode 100644 index 0000000..997f1cd --- /dev/null +++ b/tests/test_ops_armv7thumb.py @@ -0,0 +1,184 @@ +from copapy import NumLike, iif, value +from copapy.backend import Store, compile_to_dag, add_read_value_remote +import subprocess +from copapy import _binwrite +import copapy.backend as backend +import os +import warnings +import re +import struct +import pytest +import copapy as cp + +if os.name == 'nt': + # On Windows wsl and qemu-user is required: + # sudo apt install qemu-user + qemu_command = ['wsl', 'qemu-arm'] +else: + qemu_command = ['qemu-arm'] + + +def parse_results(log_text: str) -> dict[int, bytes]: + regex = r"^READ_DATA offs=(\d*) size=(\d*) data=(.*)$" + matches = re.finditer(regex, log_text, re.MULTILINE) + var_dict: dict[int, bytes] = {} + + for match in matches: + value_str: list[str] = match.group(3).strip().split(' ') + #print('--', value_str) + value = bytes(int(v, base=16) for v in value_str) + if len(value) <= 8: + var_dict[int(match.group(1))] = value + + return var_dict + + +def run_command(command: list[str]) -> str: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8', check=False) + assert result.returncode != 11, f"SIGSEGV (segmentation fault)\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + assert result.returncode == 0, f"\n -Error occurred: {result.stderr}\n -Output: {result.stdout}" + return result.stdout + + +def check_for_qemu() -> bool: + command = qemu_command + ['--version'] + try: + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False) + except Exception: + return False + return result.returncode == 0 + + +def function1(c1: NumLike) -> list[NumLike]: + return [c1 / 4, c1 / -4, c1 // 4, c1 // -4, (c1 * -1) // 4, + c1 * 4, c1 * -4, + c1 + 4, c1 - 4, + c1 > 2, c1 > 100, c1 < 4, c1 < 100] + +def function1ex(c1: NumLike) -> list[NumLike]: + return [c1 // 4] + + +def function2(c1: NumLike) -> list[NumLike]: + return [c1 * 4.44, c1 * -4.44] + + +def function3(c1: NumLike) -> list[NumLike]: + return [c1 / 4] + + +def function4(c1: NumLike) -> list[NumLike]: + return [c1 == 9, c1 == 4, c1 != 9, c1 != 4] + + +def function5(c1: NumLike) -> list[NumLike]: + return [c1 == True, c1 == False, c1 != True, c1 != False, c1 / 2, c1 + 2] + + +def function6(c1: NumLike) -> list[NumLike]: + return [c1 == True] + + +def iiftests(c1: NumLike) -> list[NumLike]: + return [iif(c1 > 5, 8, 9), + iif(c1 < 5, 8.5, 9.5), + iif(1 > 5, 3.3, 8.8) + c1, + iif(1 < 5, c1 * 3.3, 8.8), + iif(c1 < 5, c1 * 3.3, 8.8)] + + +@pytest.mark.runner +def test_compile(): + c_i = value(9) + c_f = value(1.111) + c_b = value(True) + + ret_test = function1(c_i) + function1(c_f) + function2(c_i) + function2(c_f) + function3(c_i) + function4(c_i) + function5(c_b) + [value(9) % 2] + iiftests(c_i) + iiftests(c_f) + [cp.asin(c_i/10)] + ret_ref = function1(9) + function1(1.111) + function2(9) + function2(1.111) + function3(9) + function4(9) + function5(True) + [9 % 2] + iiftests(9) + iiftests(1.111) + [cp.asin(9/10)] + + out = [Store(r) for r in ret_test] + + sdb = backend.stencil_db_from_package('armv7thumb') + dw, variables = compile_to_dag(out, sdb) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + du = dw.copy() + dw.write_com(_binwrite.Command.RUN_PROG) + du.write_com(_binwrite.Command.DUMP_CODE) + + for v in ret_test: + assert isinstance(v, value) + add_read_value_remote(dw, variables, v.net) + + #dw.write_com(_binwrite.Command.READ_DATA) + #dw.write_int(0) + #dw.write_int(28) + + dw.write_com(_binwrite.Command.END_COM) + du.write_com(_binwrite.Command.END_COM) + + #print('* Data to runner:') + #dw.print() + + dw.to_file('build/runner/test-armv7thumb.copapy') + du.to_file('build/runner/test-armv7thumb-dump.copapy') + + if not check_for_qemu(): + warnings.warn("qemu-armv7 not found, armv7 test skipped!", UserWarning) + return + if not os.path.isfile('build/runner/coparun-armv7'): + warnings.warn("armv7 runner not found, armv7 test skipped!", UserWarning) + return + + print('----- Dump code...') + command = qemu_command + ['build/runner/coparun-armv7', 'build/runner/test-armv7thumb-dump.copapy', 'build/runner/test.copapy-armv7thumb.bin'] + result = run_command(command) + + print('----- Run code...') + command = qemu_command + ['build/runner/coparun-armv7', 'build/runner/test-armv7thumb.copapy'] + result = run_command(command) + + + print('* Output from runner:\n--') + print(result) + print('--') + + assert 'Return value: 1' in result + + result_data = parse_results(result) + + for test, ref in zip(ret_test, ret_ref): + assert isinstance(test, value) + address = variables[test.net][0] + data = result_data[address] + if test.dtype == 'int': + val = int.from_bytes(data, sdb.byteorder, signed=True) + elif test.dtype == 'bool': + val = bool.from_bytes(data, sdb.byteorder) + elif test.dtype == 'float': + en = {'little': '<', 'big': '>'}[sdb.byteorder] + val = struct.unpack(en + 'f', data)[0] + assert isinstance(val, float) + else: + raise Exception(f"Unknown type: {test.dtype}") + print('+', val, ref, test.dtype, f" addr={address}") + for t in (int, float, bool): + assert isinstance(val, t) == isinstance(ref, t), f"Result type does not match for {val} and {ref}" + assert val == pytest.approx(ref, 1e-5), f"Result does not match: {val} and reference: {ref}" # pyright: ignore[reportUnknownMemberType] + + +if __name__ == "__main__": + test_compile() + + +""" +qemu-arm -d in_asm,exec,cpu_reset -D qemu.log build/runner/coparun-armv7thumb build/runner/test-armv7thumb.copapy build/runner/test.copapy-armv7thumb.bin + +qemu-arm -d in_asm,exec -D qemu_trace.log \ + -global driver=pl011.audiomaddr,property=addr,value=0xff7ec000 \ + -global driver=pl011.audiomaddr,property=size,value=0x100000 \ + your_binary +""" \ No newline at end of file diff --git a/tests/test_stencil_db.py b/tests/test_stencil_db.py index 0ef40e4..7e762bd 100644 --- a/tests/test_stencil_db.py +++ b/tests/test_stencil_db.py @@ -19,11 +19,18 @@ def test_start_end_function(): if symbol.relocations and symbol.relocations[-1].symbol.info == 'STT_NOTYPE': - print('-', sym_name, get_stencil_position(symbol), len(symbol.data)) + if symbol.section and symbol.section.name == '.text': + print('SKIP', sym_name, '(Aux function, not a stencil)') + continue - start, end = get_stencil_position(symbol) + if symbol.section: + function_size = symbol.section.fields['sh_size'] # len(symbol.data) excludes nop after the function - assert start >= 0 and end >= start and end <= len(symbol.data) + print('-', sym_name, get_stencil_position(symbol), function_size) + + start, end = get_stencil_position(symbol) + + assert (start >= 0 and end >= start and end <= function_size) def test_aux_functions(): diff --git a/tools/build.sh b/tools/build.sh index b7e3cd6..4686e3d 100644 --- a/tools/build.sh +++ b/tools/build.sh @@ -1,13 +1,13 @@ #!/bin/bash -set -eux +set -eu ARCH=${1:-x86_64} case "$ARCH" in - (x86_64|arm-v6|arm-v7|all) + (x86_64|arm64|arm-v6|arm-v7|arm-v7-thumb|arm-v7m-thumb|all) ;; (*) - echo "Usage: $0 [x86_64|arm-v6|arm-v7|all]" + echo "Usage: $0 [x86_64|arm64|arm-v6|arm-v7|arm-v6-thumb|arm-v7m-thumb|all]" exit 1 ;; esac @@ -42,13 +42,44 @@ if [[ "$ARCH" == "x86_64" || "$ARCH" == "all" ]]; then -o build/runner/coparun fi +####################################### +# ARM 64 +####################################### +if [[ "$ARCH" == "arm64" || "$ARCH" == "all" ]]; then + echo "--------------arm64----------------" + + LIBGCC=$(aarch64-linux-gnu-gcc -print-libgcc-file-name) + + aarch64-linux-gnu-gcc -fno-pic -ffunction-sections \ + -c $SRC -O3 -o build/stencils/stencils.o + + aarch64-linux-gnu-ld -r \ + build/stencils/stencils.o \ + build/musl/musl_objects_arm64.o \ + $LIBGCC \ + -o $DEST/stencils_arm64_O3.o + + aarch64-linux-gnu-objdump -d -x \ + $DEST/stencils_arm64_O3.o \ + > build/stencils/stencils_arm64_O3.asm + + aarch64-linux-gnu-gcc \ + -Wall -Wextra -Wconversion -Wsign-conversion -static \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + src/coparun/runmem.c \ + src/coparun/coparun.c \ + src/coparun/mem_man.c \ + -o build/runner/coparun-arm64 +fi + ####################################### # ARM v6 ####################################### if [[ "$ARCH" == "arm-v6" || "$ARCH" == "all" ]]; then echo "--------------arm-v6 32 bit----------------" - LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) + LIBGCC=$(arm-none-eabi-gcc -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm -print-libgcc-file-name) arm-none-eabi-gcc -fno-pic -ffunction-sections \ -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm \ @@ -81,7 +112,7 @@ fi if [[ "$ARCH" == "arm-v7" || "$ARCH" == "all" ]]; then echo "--------------arm-v7 32 bit----------------" - LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) + LIBGCC=$(arm-none-eabi-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -print-libgcc-file-name) arm-none-eabi-gcc -fno-pic -ffunction-sections \ -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm \ @@ -97,6 +128,7 @@ if [[ "$ARCH" == "arm-v7" || "$ARCH" == "all" ]]; then $DEST/stencils_armv7_O3.o \ > build/stencils/stencils_armv7_O3.asm + # The same runner for all ARM7 arm-linux-gnueabihf-gcc \ -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ -Wall -Wextra -Wconversion -Wsign-conversion \ @@ -107,3 +139,71 @@ if [[ "$ARCH" == "arm-v7" || "$ARCH" == "all" ]]; then src/coparun/mem_man.c \ -o build/runner/coparun-armv7 fi + +####################################### +# ARM v7 thumb Cortex-A +####################################### +if [[ "$ARCH" == "arm-v7-thumb" || "$ARCH" == "all" ]]; then + echo "--------------arm-v7a-thumb 32 bit----------------" + + LIBGCC=$(arm-none-eabi-gcc -march=armv7 -mfpu=vfp3 -mthumb -print-libgcc-file-name) + + arm-none-eabi-gcc -fno-pic -ffunction-sections \ + -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -mthumb \ + -c $SRC -O3 -o build/stencils/stencils.o + + arm-none-eabi-ld -r \ + build/stencils/stencils.o \ + build/musl/musl_objects_armv7thumb.o \ + $LIBGCC \ + -o $DEST/stencils_armv7thumb_O3.o + + arm-none-eabi-objdump -d -x \ + $DEST/stencils_armv7thumb_O3.o \ + > build/stencils/stencils_armv7thumb_O3.asm + + # The same runner for all ARM7 + arm-linux-gnueabihf-gcc \ + -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -static \ + -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + src/coparun/runmem.c \ + src/coparun/coparun.c \ + src/coparun/mem_man.c \ + -o build/runner/coparun-armv7thumb +fi + +####################################### +# ARM v7 thumb Cortex-M +####################################### +if [[ "$ARCH" == "arm-v7m-thumb" || "$ARCH" == "all" ]]; then + echo "--------------arm-v7m-thumb 32 bit----------------" + + LIBGCC=$(arm-none-eabi-gcc -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb -print-libgcc-file-name) + + arm-none-eabi-gcc -fno-pic -ffunction-sections \ + -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb \ + -c $SRC -O3 -o build/stencils/stencils.o + + arm-none-eabi-ld -r \ + build/stencils/stencils.o \ + build/musl/musl_objects_armv7mthumb.o \ + $LIBGCC \ + -o $DEST/stencils_armv7mthumb_O3.o + + arm-none-eabi-objdump -d -x \ + $DEST/stencils_armv7mthumb_O3.o \ + > build/stencils/stencils_armv7mthumb_O3.asm + + # The same runner for all ARM7 + arm-linux-gnueabihf-gcc \ + -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -static \ + -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + src/coparun/runmem.c \ + src/coparun/coparun.c \ + src/coparun/mem_man.c \ + -o build/runner/coparun-armv7thumb +fi \ No newline at end of file diff --git a/tools/create_asm.sh b/tools/create_asm.sh index ca6e220..202cd66 100644 --- a/tools/create_asm.sh +++ b/tools/create_asm.sh @@ -10,7 +10,6 @@ cparch=$(python3 -c "import copapy; print(copapy._stencils.detect_process_arch() # Disassemble stencil object file objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm -# Create example code disassembly python3 tools/make_example.py build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin @@ -28,6 +27,10 @@ fi echo "Archtitecture: '$cparch'" -objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm +if [[ "$cparch" == *"thumb"* ]]; then + objdump -D -b binary -marm -M force-thumb --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm +else + objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm +fi rm build/runner/test.copapy.bin diff --git a/tools/cross_compiler_unix/build_musl.sh b/tools/cross_compiler_unix/build_musl.sh index 2f16b1e..3f7b50e 100644 --- a/tools/cross_compiler_unix/build_musl.sh +++ b/tools/cross_compiler_unix/build_musl.sh @@ -26,8 +26,11 @@ sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_ # Armv7 sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm" +# Armv7 Thumb for Cortex-A +sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv7thumb.o "-march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -mthumb" + # Armv7 Thumb for Cortex-M3..7 -sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv7thumb.o "-march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb" +sh ../packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld /object_files/musl_objects_armv7mthumb.o "-march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb" #sh ../packobjs.sh mips mips-linux-gnu-gcc-13 mips-linux-gnu-ld diff --git a/tools/cross_compiler_unix/packobjs.sh b/tools/cross_compiler_unix/packobjs.sh index 43bd0e9..98e3c17 100644 --- a/tools/cross_compiler_unix/packobjs.sh +++ b/tools/cross_compiler_unix/packobjs.sh @@ -25,14 +25,16 @@ ar x ../../musl/lib/libc.a sinf.o cosf.o tanf.o asinf.o acosf.o atanf.o atan2f.o ar x ../../musl/lib/libc.a sqrtf.o logf.o expf.o sqrt.o ar x ../../musl/lib/libc.a logf_data.o __tandf.o __cosdf.o __sindf.o ar x ../../musl/lib/libc.a fabsf.o scalbn.o floor.o floorf.o exp2f_data.o powf.o powf_data.o -ar x ../../musl/lib/libc.a __rem_pio2f.o __math_invalidf.o __stack_chk_fail.o __math_divzerof.o __math_oflowf.o __rem_pio2_large.o __math_uflowf.o __math_xflowf.o +ar x ../../musl/lib/libc.a __rem_pio2f.o __math_invalid.o __math_invalidf.o __stack_chk_fail.o +ar x ../../musl/lib/libc.a __math_divzerof.o __math_oflowf.o __rem_pio2_large.o __math_uflowf.o __math_xflowf.o sqrt_data.o # Check out .lo (PIC) ar x ../../musl/lib/libc.a sinf.lo cosf.lo tanf.lo asinf.lo acosf.lo atanf.lo atan2f.lo ar x ../../musl/lib/libc.a sqrtf.lo logf.lo expf.lo sqrt.lo ar x ../../musl/lib/libc.a logf_data.lo __tandf.lo __cosdf.lo __sindf.lo ar x ../../musl/lib/libc.a fabsf.lo scalbn.lo floor.lo floorf.o exp2f_data.lo powf.lo powf_data.lo -ar x ../../musl/lib/libc.a __rem_pio2f.lo __math_invalidf.lo __stack_chk_fail.lo __math_divzerof.lo __math_oflowf.lo __rem_pio2_large.lo __math_uflowf.lo __math_xflowf.lo +ar x ../../musl/lib/libc.a __rem_pio2f.lo __math_invalid.lo __math_invalidf.lo __stack_chk_fail.lo +ar x ../../musl/lib/libc.a __math_divzerof.lo __math_oflowf.lo __rem_pio2_large.lo __math_uflowf.lo __math_xflowf.lo sqrt_data.lo cd ../../musl diff --git a/tools/crosscompile.sh b/tools/crosscompile.sh index 9494ff2..78a4f2d 100644 --- a/tools/crosscompile.sh +++ b/tools/crosscompile.sh @@ -36,15 +36,20 @@ arm-none-eabi-gcc -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm $FLAGS -$OPT -c LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv6.o $LIBGCC -o $DEST/stencils_armv6_$OPT.o -# ARMv7 hardware fp +# ARMv7 hardware fp for Cortex-A arm-none-eabi-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm $FLAGS -$OPT -c $SRC -o $STMP LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7.o $LIBGCC -o $DEST/stencils_armv7_$OPT.o +# ARMv7 Thumb for Cortex-A with hardware fp +arm-none-eabi-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -mthumb $FLAGS -$OPT -c $SRC -o $STMP +LIBGCC=$(arm-none-eabi-gcc -march=armv7 -mfpu=vfp3 -mthumb -print-libgcc-file-name) +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7thumb.o $LIBGCC -o $DEST/stencils_armv7thumb_$OPT.o + # Armv7 Thumb for Cortex-M3..7 hardware fp arm-none-eabi-gcc -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb $FLAGS -$OPT -c $SRC -o $STMP -LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) -arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7thumb.o $LIBGCC -o $DEST/stencils_armv7thumb_$OPT.o +LIBGCC=$(arm-none-eabi-gcc -march=armv7e-m -mfpu=fpv4-sp-d16 -mfloat-abi=hard -mthumb -print-libgcc-file-name) +arm-none-eabi-ld -r $STMP /object_files/musl_objects_armv7mthumb.o $LIBGCC -o $DEST/stencils_armv7mthumb_$OPT.o # PowerPC64LE # powerpc64le-linux-gnu-gcc-13 $FLAGS -$OPT -c $SRC -o $DEST/stencils_ppc64le_$OPT.o diff --git a/tools/inspect.sh b/tools/inspect.sh index c25e898..42fba43 100644 --- a/tools/inspect.sh +++ b/tools/inspect.sh @@ -14,3 +14,5 @@ objdump -D -b binary -m i386:x86-64 --adjust-vma=0x1000 build/runner/test.copapy build/runner/coparun-armv7 build/runner/test-armv7.copapy build/runner/test.copapy-armv7.bin arm-none-eabi-objdump -D -b binary -marm --adjust-vma=0x50000 build/runner/test.copapy-armv7.bin > build/runner/test.copapy-armv7.asm + +# arm-none-eabi-objdump -D -b binary -marm -M force-thumb --adjust-vma=0x50001 build/runner/test.copapy-armv7thumb.bin > build/runner/test.copapy-armv7thumb.asm diff --git a/tools/test_example_code.sh b/tools/test_example_code.sh new file mode 100755 index 0000000..7b0c054 --- /dev/null +++ b/tools/test_example_code.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Build arm-v7 runner and stencils +bash tools/build.sh arm-v7 + +# Build arm-v7-thumb stencils +bash tools/build.sh arm-v7-thumb + +# Build arm-v7-thumb example code +export CP_TARGET_ARCH=armv7thumb +python3 tools/make_example.py +build/runner/coparun-armv7 build/runner/test.copapy build/runner/test.copapy.bin + +arm-none-eabi-objdump -D -b binary -marm -M force-thumb --adjust-vma=0x1000000 build/runner/test.copapy.bin > build/runner/test.copapy-example-armv7thumb.asm + +# Build arm-v7-thumb example code +export CP_TARGET_ARCH=armv7 +python3 tools/make_example.py +build/runner/coparun-armv7 build/runner/test.copapy build/runner/test.copapy.bin + +arm-none-eabi-objdump -D -b binary -marm --adjust-vma=0x1000000 build/runner/test.copapy.bin > build/runner/test.copapy-example-armv7.asm \ No newline at end of file diff --git a/tools/test_thumb_stancils.sh b/tools/test_thumb_stancils.sh new file mode 100755 index 0000000..7fec81e --- /dev/null +++ b/tools/test_thumb_stancils.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +bash tools/build.sh arm-v7-thumb +python tests/test_ops_armv7thumb.py +qemu-arm -d in_asm -D qemu.log build/runner/coparun-armv7thumb build/runner/test-armv7thumb.copapy build/runner/test.copapy-armv7thumb.bin +arm-none-eabi-objdump -D -b binary -marm -M force-thumb --adjust-vma=0xff7ed000 build/runner/test.copapy-armv7thumb.bin > build/runner/test.copapy-armv7thumb.asm \ No newline at end of file