Compare commits

...

2 Commits

Author SHA1 Message Date
Nicolas 01f02cc9ba Readme updated 2026-03-04 15:16:51 +01:00
Nicolas 031249241e Removed un-required bulk stack allocation and added 4 Byte alignment check. CI updated. 2026-03-04 14:58:52 +01:00
6 changed files with 43 additions and 15 deletions

View File

@ -180,7 +180,8 @@ jobs:
- name: Use ARMv6 container - name: Use ARMv6 container
run: | run: |
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v6 ghcr.io/nonannet/armv6_test:1 \ docker run --rm -v $PWD:/app -w /app --platform linux/arm/v6 ghcr.io/nonannet/armv6_test:1 \
bash -lc "pip install .[mindev] && \ bash -lc "set -x && \
pip install .[mindev] && \
mkdir -p build/runner && \ mkdir -p build/runner && \
gcc -O3 -static -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \ gcc -O3 -static -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \
src/coparun/coparun.c src/coparun/mem_man.c && \ src/coparun/coparun.c src/coparun/mem_man.c && \
@ -209,7 +210,8 @@ jobs:
- name: Use ARMv7 container - name: Use ARMv7 container
run: | run: |
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
bash -lc "pip install .[mindev] && \ bash -lc "set -x && \
pip install .[mindev] && \
mkdir -p build/runner && \ mkdir -p build/runner && \
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
-Wall -Wextra -Wconversion -Wsign-conversion \ -Wall -Wextra -Wconversion -Wsign-conversion \
@ -242,7 +244,8 @@ jobs:
- name: Use ARMv7 container - name: Use ARMv7 container
run: | run: |
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
bash -lc "pip install .[mindev] && \ bash -lc "set -x && \
pip install .[mindev] && \
mkdir -p build/runner && \ mkdir -p build/runner && \
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
-Wall -Wextra -Wconversion -Wsign-conversion \ -Wall -Wextra -Wconversion -Wsign-conversion \
@ -276,7 +279,8 @@ jobs:
- name: Use ARMv7 container - name: Use ARMv7 container
run: | run: |
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \ docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
bash -lc "pip install .[mindev] && \ bash -lc "set -x && \
pip install .[mindev] && \
mkdir -p build/runner && \ mkdir -p build/runner && \
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
-Wall -Wextra -Wconversion -Wsign-conversion \ -Wall -Wextra -Wconversion -Wsign-conversion \
@ -342,7 +346,7 @@ jobs:
path: build/runner/* path: build/runner/*
release-stencils: release-stencils:
needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7] needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7, build-armv7thumb, build-armv7mthumb]
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main' && github.event_name == 'push' if: github.ref == 'refs/heads/main' && github.event_name == 'push'
permissions: permissions:
@ -375,8 +379,6 @@ jobs:
cp tmp/runner-linux-arm64/coparun release/coparun-aarch64 cp tmp/runner-linux-arm64/coparun release/coparun-aarch64
cp tmp/runner-linux-armv6/coparun release/coparun-armv6 cp tmp/runner-linux-armv6/coparun release/coparun-armv6
cp tmp/runner-linux-armv7/coparun release/coparun-armv7 cp tmp/runner-linux-armv7/coparun release/coparun-armv7
cp tmp/runner-linux-armv7thumb/coparun release/coparun-armv7thumb
cp tmp/runner-linux-armv7mthumb/coparun release/coparun-armv7mthumb
cp tmp/runner-win/coparun*.exe release/ cp tmp/runner-win/coparun*.exe release/
TAG="${{ steps.version.outputs.version }}" TAG="${{ steps.version.outputs.version }}"

View File

@ -13,7 +13,7 @@ The main features can be summarized as:
- Memory and type safety with a minimal set of runtime errors - Memory and type safety with a minimal set of runtime errors
- Deterministic execution - Deterministic execution
- Automatic differentiation for efficient realtime optimization (reverse-mode) - Automatic differentiation for efficient realtime optimization (reverse-mode)
- Optimized machine code for x86_64, ARMv6, ARMv7 and AArch64 - Optimized machine code for x86_64, 32 Bit ARM (Cortex-A and Cortex-M) and AArch64
- Highly portable to new architectures - Highly portable to new architectures
- Small Python package with minimal dependencies and no cross-compile toolchain required - Small Python package with minimal dependencies and no cross-compile toolchain required
@ -31,7 +31,6 @@ While hardware I/O is obviously a core aspect of the project, it is not yet avai
Currently in development: Currently in development:
- Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications - Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications
- Support for Thumb instructions required by ARM*-M targets (for MCUs)
- Constant regrouping for further symbolic optimization of the computation graph - Constant regrouping for further symbolic optimization of the computation graph
Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5: Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5:
@ -253,4 +252,4 @@ This project is licensed under the MIT license - see the [LICENSE](LICENSE) file
[^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture. [^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture.
[^3]: Supported architectures: x86_64, AArch64, ARMv6 and 7 (non-Thumb). ARMv6/7-M (Thumb) support is in development. Code for x86 32-bit exists but has unresolved issues and a low priority. [^3]: Supported architectures: x86_64, AArch64, ARMv6/7 (non-Thumb) and ARMv7 Thumb for Cortex-A and Cortex-M. Code for x86 32-bit exists but has unresolved issues and a low priority.

View File

@ -343,6 +343,9 @@ class stencil_database():
elif pr.type.endswith('_ABS32'): elif pr.type.endswith('_ABS32'):
# R_ARM_ABS32 # R_ARM_ABS32
# S + A (replaces full 32 bit) # S + A (replaces full 32 bit)
assert not patch_offset % 4, 'R_ARM_ABS32 patched data like literals needs to be 4 Byte aligned'
# This might be caused by the call in entry_function_shell if not aligned
patch_value = symbol_address + pr.fields['r_addend'] patch_value = symbol_address + pr.fields['r_addend']
symbol_type = symbol_type + 0x03 # Relative to data section symbol_type = symbol_type + 0x03 # Relative to data section

View File

@ -57,8 +57,8 @@ def norm_indent(f: Callable[..., str]) -> Callable[..., str]:
def get_entry_function_shell() -> str: def get_entry_function_shell() -> str:
return f""" return f"""
{entry_func_prefix}int entry_function_shell(){{ {entry_func_prefix}int entry_function_shell(){{
volatile char stack_place_holder[{stack_size}]; //volatile char stack_place_holder[{stack_size}];
stack_place_holder[0] = 0; //stack_place_holder[0] = 0;
result_int(0); result_int(0);
return 1; return 1;
}} }}

View File

@ -10,7 +10,6 @@ cparch=$(python3 -c "import copapy; print(copapy._stencils.detect_process_arch()
# Disassemble stencil object file # Disassemble stencil object file
objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm
# Create example code disassembly
python3 tools/make_example.py python3 tools/make_example.py
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin
@ -28,6 +27,10 @@ fi
echo "Archtitecture: '$cparch'" echo "Archtitecture: '$cparch'"
objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm if [[ "$cparch" == *"thumb"* ]]; then
objdump -D -b binary -marm -M force-thumb --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
else
objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
fi
rm build/runner/test.copapy.bin rm build/runner/test.copapy.bin

21
tools/test_example_code.sh Executable file
View File

@ -0,0 +1,21 @@
#!/bin/bash
# Build arm-v7 runner and stencils
bash tools/build.sh arm-v7
# Build arm-v7-thumb stencils
bash tools/build.sh arm-v7-thumb
# Build arm-v7-thumb example code
export CP_TARGET_ARCH=armv7thumb
python3 tools/make_example.py
build/runner/coparun-armv7 build/runner/test.copapy build/runner/test.copapy.bin
arm-none-eabi-objdump -D -b binary -marm -M force-thumb --adjust-vma=0x1000000 build/runner/test.copapy.bin > build/runner/test.copapy-example-armv7thumb.asm
# Build arm-v7-thumb example code
export CP_TARGET_ARCH=armv7
python3 tools/make_example.py
build/runner/coparun-armv7 build/runner/test.copapy build/runner/test.copapy.bin
arm-none-eabi-objdump -D -b binary -marm --adjust-vma=0x1000000 build/runner/test.copapy.bin > build/runner/test.copapy-example-armv7.asm