mirror of https://github.com/Nonannet/copapy.git
Compare commits
2 Commits
e428e16b83
...
01f02cc9ba
| Author | SHA1 | Date |
|---|---|---|
|
|
01f02cc9ba | |
|
|
031249241e |
|
|
@ -180,7 +180,8 @@ jobs:
|
||||||
- name: Use ARMv6 container
|
- name: Use ARMv6 container
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v6 ghcr.io/nonannet/armv6_test:1 \
|
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v6 ghcr.io/nonannet/armv6_test:1 \
|
||||||
bash -lc "pip install .[mindev] && \
|
bash -lc "set -x && \
|
||||||
|
pip install .[mindev] && \
|
||||||
mkdir -p build/runner && \
|
mkdir -p build/runner && \
|
||||||
gcc -O3 -static -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \
|
gcc -O3 -static -DENABLE_LOGGING -o build/runner/coparun src/coparun/runmem.c \
|
||||||
src/coparun/coparun.c src/coparun/mem_man.c && \
|
src/coparun/coparun.c src/coparun/mem_man.c && \
|
||||||
|
|
@ -209,7 +210,8 @@ jobs:
|
||||||
- name: Use ARMv7 container
|
- name: Use ARMv7 container
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
||||||
bash -lc "pip install .[mindev] && \
|
bash -lc "set -x && \
|
||||||
|
pip install .[mindev] && \
|
||||||
mkdir -p build/runner && \
|
mkdir -p build/runner && \
|
||||||
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
||||||
-Wall -Wextra -Wconversion -Wsign-conversion \
|
-Wall -Wextra -Wconversion -Wsign-conversion \
|
||||||
|
|
@ -242,7 +244,8 @@ jobs:
|
||||||
- name: Use ARMv7 container
|
- name: Use ARMv7 container
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
||||||
bash -lc "pip install .[mindev] && \
|
bash -lc "set -x && \
|
||||||
|
pip install .[mindev] && \
|
||||||
mkdir -p build/runner && \
|
mkdir -p build/runner && \
|
||||||
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
||||||
-Wall -Wextra -Wconversion -Wsign-conversion \
|
-Wall -Wextra -Wconversion -Wsign-conversion \
|
||||||
|
|
@ -276,7 +279,8 @@ jobs:
|
||||||
- name: Use ARMv7 container
|
- name: Use ARMv7 container
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
||||||
bash -lc "pip install .[mindev] && \
|
bash -lc "set -x && \
|
||||||
|
pip install .[mindev] && \
|
||||||
mkdir -p build/runner && \
|
mkdir -p build/runner && \
|
||||||
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
||||||
-Wall -Wextra -Wconversion -Wsign-conversion \
|
-Wall -Wextra -Wconversion -Wsign-conversion \
|
||||||
|
|
@ -342,7 +346,7 @@ jobs:
|
||||||
path: build/runner/*
|
path: build/runner/*
|
||||||
|
|
||||||
release-stencils:
|
release-stencils:
|
||||||
needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7]
|
needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7, build-armv7thumb, build-armv7mthumb]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
||||||
permissions:
|
permissions:
|
||||||
|
|
@ -375,8 +379,6 @@ jobs:
|
||||||
cp tmp/runner-linux-arm64/coparun release/coparun-aarch64
|
cp tmp/runner-linux-arm64/coparun release/coparun-aarch64
|
||||||
cp tmp/runner-linux-armv6/coparun release/coparun-armv6
|
cp tmp/runner-linux-armv6/coparun release/coparun-armv6
|
||||||
cp tmp/runner-linux-armv7/coparun release/coparun-armv7
|
cp tmp/runner-linux-armv7/coparun release/coparun-armv7
|
||||||
cp tmp/runner-linux-armv7thumb/coparun release/coparun-armv7thumb
|
|
||||||
cp tmp/runner-linux-armv7mthumb/coparun release/coparun-armv7mthumb
|
|
||||||
cp tmp/runner-win/coparun*.exe release/
|
cp tmp/runner-win/coparun*.exe release/
|
||||||
|
|
||||||
TAG="${{ steps.version.outputs.version }}"
|
TAG="${{ steps.version.outputs.version }}"
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ The main features can be summarized as:
|
||||||
- Memory and type safety with a minimal set of runtime errors
|
- Memory and type safety with a minimal set of runtime errors
|
||||||
- Deterministic execution
|
- Deterministic execution
|
||||||
- Automatic differentiation for efficient realtime optimization (reverse-mode)
|
- Automatic differentiation for efficient realtime optimization (reverse-mode)
|
||||||
- Optimized machine code for x86_64, ARMv6, ARMv7 and AArch64
|
- Optimized machine code for x86_64, 32 Bit ARM (Cortex-A and Cortex-M) and AArch64
|
||||||
- Highly portable to new architectures
|
- Highly portable to new architectures
|
||||||
- Small Python package with minimal dependencies and no cross-compile toolchain required
|
- Small Python package with minimal dependencies and no cross-compile toolchain required
|
||||||
|
|
||||||
|
|
@ -31,7 +31,6 @@ While hardware I/O is obviously a core aspect of the project, it is not yet avai
|
||||||
|
|
||||||
Currently in development:
|
Currently in development:
|
||||||
- Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications
|
- Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications
|
||||||
- Support for Thumb instructions required by ARM*-M targets (for MCUs)
|
|
||||||
- Constant regrouping for further symbolic optimization of the computation graph
|
- Constant regrouping for further symbolic optimization of the computation graph
|
||||||
|
|
||||||
Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5:
|
Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5:
|
||||||
|
|
@ -253,4 +252,4 @@ This project is licensed under the MIT license - see the [LICENSE](LICENSE) file
|
||||||
|
|
||||||
[^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture.
|
[^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture.
|
||||||
|
|
||||||
[^3]: Supported architectures: x86_64, AArch64, ARMv6 and 7 (non-Thumb). ARMv6/7-M (Thumb) support is in development. Code for x86 32-bit exists but has unresolved issues and a low priority.
|
[^3]: Supported architectures: x86_64, AArch64, ARMv6/7 (non-Thumb) and ARMv7 Thumb for Cortex-A and Cortex-M. Code for x86 32-bit exists but has unresolved issues and a low priority.
|
||||||
|
|
|
||||||
|
|
@ -343,6 +343,9 @@ class stencil_database():
|
||||||
elif pr.type.endswith('_ABS32'):
|
elif pr.type.endswith('_ABS32'):
|
||||||
# R_ARM_ABS32
|
# R_ARM_ABS32
|
||||||
# S + A (replaces full 32 bit)
|
# S + A (replaces full 32 bit)
|
||||||
|
assert not patch_offset % 4, 'R_ARM_ABS32 patched data like literals needs to be 4 Byte aligned'
|
||||||
|
# This might be caused by the call in entry_function_shell if not aligned
|
||||||
|
|
||||||
patch_value = symbol_address + pr.fields['r_addend']
|
patch_value = symbol_address + pr.fields['r_addend']
|
||||||
symbol_type = symbol_type + 0x03 # Relative to data section
|
symbol_type = symbol_type + 0x03 # Relative to data section
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,8 +57,8 @@ def norm_indent(f: Callable[..., str]) -> Callable[..., str]:
|
||||||
def get_entry_function_shell() -> str:
|
def get_entry_function_shell() -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{entry_func_prefix}int entry_function_shell(){{
|
{entry_func_prefix}int entry_function_shell(){{
|
||||||
volatile char stack_place_holder[{stack_size}];
|
//volatile char stack_place_holder[{stack_size}];
|
||||||
stack_place_holder[0] = 0;
|
//stack_place_holder[0] = 0;
|
||||||
result_int(0);
|
result_int(0);
|
||||||
return 1;
|
return 1;
|
||||||
}}
|
}}
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,6 @@ cparch=$(python3 -c "import copapy; print(copapy._stencils.detect_process_arch()
|
||||||
# Disassemble stencil object file
|
# Disassemble stencil object file
|
||||||
objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm
|
objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm
|
||||||
|
|
||||||
# Create example code disassembly
|
|
||||||
python3 tools/make_example.py
|
python3 tools/make_example.py
|
||||||
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin
|
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin
|
||||||
|
|
||||||
|
|
@ -28,6 +27,10 @@ fi
|
||||||
|
|
||||||
echo "Archtitecture: '$cparch'"
|
echo "Archtitecture: '$cparch'"
|
||||||
|
|
||||||
|
if [[ "$cparch" == *"thumb"* ]]; then
|
||||||
|
objdump -D -b binary -marm -M force-thumb --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
||||||
|
else
|
||||||
objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
||||||
|
fi
|
||||||
|
|
||||||
rm build/runner/test.copapy.bin
|
rm build/runner/test.copapy.bin
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Build arm-v7 runner and stencils
|
||||||
|
bash tools/build.sh arm-v7
|
||||||
|
|
||||||
|
# Build arm-v7-thumb stencils
|
||||||
|
bash tools/build.sh arm-v7-thumb
|
||||||
|
|
||||||
|
# Build arm-v7-thumb example code
|
||||||
|
export CP_TARGET_ARCH=armv7thumb
|
||||||
|
python3 tools/make_example.py
|
||||||
|
build/runner/coparun-armv7 build/runner/test.copapy build/runner/test.copapy.bin
|
||||||
|
|
||||||
|
arm-none-eabi-objdump -D -b binary -marm -M force-thumb --adjust-vma=0x1000000 build/runner/test.copapy.bin > build/runner/test.copapy-example-armv7thumb.asm
|
||||||
|
|
||||||
|
# Build arm-v7-thumb example code
|
||||||
|
export CP_TARGET_ARCH=armv7
|
||||||
|
python3 tools/make_example.py
|
||||||
|
build/runner/coparun-armv7 build/runner/test.copapy build/runner/test.copapy.bin
|
||||||
|
|
||||||
|
arm-none-eabi-objdump -D -b binary -marm --adjust-vma=0x1000000 build/runner/test.copapy.bin > build/runner/test.copapy-example-armv7.asm
|
||||||
Loading…
Reference in New Issue