mirror of https://github.com/Nonannet/copapy.git
Compare commits
1 Commits
01f02cc9ba
...
e428e16b83
| Author | SHA1 | Date |
|---|---|---|
|
|
e428e16b83 |
|
|
@ -219,6 +219,8 @@ jobs:
|
||||||
-DENABLE_LOGGING \
|
-DENABLE_LOGGING \
|
||||||
-o build/runner/coparun src/coparun/runmem.c \
|
-o build/runner/coparun src/coparun/runmem.c \
|
||||||
src/coparun/coparun.c src/coparun/mem_man.c && \
|
src/coparun/coparun.c src/coparun/mem_man.c && \
|
||||||
|
python3 tools/make_example.py && \
|
||||||
|
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin && \
|
||||||
pytest && \
|
pytest && \
|
||||||
bash tools/create_asm.sh"
|
bash tools/create_asm.sh"
|
||||||
|
|
||||||
|
|
@ -244,17 +246,19 @@ jobs:
|
||||||
- name: Use ARMv7 container
|
- name: Use ARMv7 container
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
||||||
bash -lc "set -x && \
|
bash -lc "set -x; \
|
||||||
pip install .[mindev] && \
|
pip install .[mindev]; \
|
||||||
mkdir -p build/runner && \
|
mkdir -p build/runner; \
|
||||||
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
||||||
-Wall -Wextra -Wconversion -Wsign-conversion \
|
-Wall -Wextra -Wconversion -Wsign-conversion \
|
||||||
-Wshadow -Wstrict-overflow -O3 \
|
-Wshadow -Wstrict-overflow -O3 \
|
||||||
-DENABLE_LOGGING \
|
-DENABLE_LOGGING \
|
||||||
-o build/runner/coparun src/coparun/runmem.c \
|
-o build/runner/coparun src/coparun/runmem.c \
|
||||||
src/coparun/coparun.c src/coparun/mem_man.c && \
|
src/coparun/coparun.c src/coparun/mem_man.c; \
|
||||||
export CP_TARGET_ARCH=armv7thumb && \
|
export CP_TARGET_ARCH=armv7thumb; \
|
||||||
pytest && \
|
python3 tools/make_example.py; \
|
||||||
|
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin; \
|
||||||
|
pytest -m 'not runner'; \
|
||||||
bash tools/create_asm.sh"
|
bash tools/create_asm.sh"
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
|
|
@ -279,17 +283,19 @@ jobs:
|
||||||
- name: Use ARMv7 container
|
- name: Use ARMv7 container
|
||||||
run: |
|
run: |
|
||||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
||||||
bash -lc "set -x && \
|
bash -lc "set -x; \
|
||||||
pip install .[mindev] && \
|
pip install .[mindev]; \
|
||||||
mkdir -p build/runner && \
|
mkdir -p build/runner; \
|
||||||
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
||||||
-Wall -Wextra -Wconversion -Wsign-conversion \
|
-Wall -Wextra -Wconversion -Wsign-conversion \
|
||||||
-Wshadow -Wstrict-overflow -O3 \
|
-Wshadow -Wstrict-overflow -O3 \
|
||||||
-DENABLE_LOGGING \
|
-DENABLE_LOGGING \
|
||||||
-o build/runner/coparun src/coparun/runmem.c \
|
-o build/runner/coparun src/coparun/runmem.c \
|
||||||
src/coparun/coparun.c src/coparun/mem_man.c && \
|
src/coparun/coparun.c src/coparun/mem_man.c; \
|
||||||
export CP_TARGET_ARCH=armv7mthumb && \
|
export CP_TARGET_ARCH=armv7mthumb; \
|
||||||
pytest && \
|
python3 tools/make_example.py; \
|
||||||
|
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin; \
|
||||||
|
pytest -m 'not runner'; \
|
||||||
bash tools/create_asm.sh"
|
bash tools/create_asm.sh"
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
|
|
@ -346,7 +352,7 @@ jobs:
|
||||||
path: build/runner/*
|
path: build/runner/*
|
||||||
|
|
||||||
release-stencils:
|
release-stencils:
|
||||||
needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7, build-armv7thumb, build-armv7mthumb]
|
needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
||||||
permissions:
|
permissions:
|
||||||
|
|
@ -379,6 +385,8 @@ jobs:
|
||||||
cp tmp/runner-linux-arm64/coparun release/coparun-aarch64
|
cp tmp/runner-linux-arm64/coparun release/coparun-aarch64
|
||||||
cp tmp/runner-linux-armv6/coparun release/coparun-armv6
|
cp tmp/runner-linux-armv6/coparun release/coparun-armv6
|
||||||
cp tmp/runner-linux-armv7/coparun release/coparun-armv7
|
cp tmp/runner-linux-armv7/coparun release/coparun-armv7
|
||||||
|
cp tmp/runner-linux-armv7thumb/coparun release/coparun-armv7thumb
|
||||||
|
cp tmp/runner-linux-armv7mthumb/coparun release/coparun-armv7mthumb
|
||||||
cp tmp/runner-win/coparun*.exe release/
|
cp tmp/runner-win/coparun*.exe release/
|
||||||
|
|
||||||
TAG="${{ steps.version.outputs.version }}"
|
TAG="${{ steps.version.outputs.version }}"
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ The main features can be summarized as:
|
||||||
- Memory and type safety with a minimal set of runtime errors
|
- Memory and type safety with a minimal set of runtime errors
|
||||||
- Deterministic execution
|
- Deterministic execution
|
||||||
- Automatic differentiation for efficient realtime optimization (reverse-mode)
|
- Automatic differentiation for efficient realtime optimization (reverse-mode)
|
||||||
- Optimized machine code for x86_64, 32 Bit ARM (Cortex-A and Cortex-M) and AArch64
|
- Optimized machine code for x86_64, ARMv6, ARMv7 and AArch64
|
||||||
- Highly portable to new architectures
|
- Highly portable to new architectures
|
||||||
- Small Python package with minimal dependencies and no cross-compile toolchain required
|
- Small Python package with minimal dependencies and no cross-compile toolchain required
|
||||||
|
|
||||||
|
|
@ -31,6 +31,7 @@ While hardware I/O is obviously a core aspect of the project, it is not yet avai
|
||||||
|
|
||||||
Currently in development:
|
Currently in development:
|
||||||
- Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications
|
- Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications
|
||||||
|
- Support for Thumb instructions required by ARM*-M targets (for MCUs)
|
||||||
- Constant regrouping for further symbolic optimization of the computation graph
|
- Constant regrouping for further symbolic optimization of the computation graph
|
||||||
|
|
||||||
Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5:
|
Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5:
|
||||||
|
|
@ -252,4 +253,4 @@ This project is licensed under the MIT license - see the [LICENSE](LICENSE) file
|
||||||
|
|
||||||
[^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture.
|
[^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture.
|
||||||
|
|
||||||
[^3]: Supported architectures: x86_64, AArch64, ARMv6/7 (non-Thumb) and ARMv7 Thumb for Cortex-A and Cortex-M. Code for x86 32-bit exists but has unresolved issues and a low priority.
|
[^3]: Supported architectures: x86_64, AArch64, ARMv6 and 7 (non-Thumb). ARMv6/7-M (Thumb) support is in development. Code for x86 32-bit exists but has unresolved issues and a low priority.
|
||||||
|
|
|
||||||
|
|
@ -343,9 +343,6 @@ class stencil_database():
|
||||||
elif pr.type.endswith('_ABS32'):
|
elif pr.type.endswith('_ABS32'):
|
||||||
# R_ARM_ABS32
|
# R_ARM_ABS32
|
||||||
# S + A (replaces full 32 bit)
|
# S + A (replaces full 32 bit)
|
||||||
assert not patch_offset % 4, 'R_ARM_ABS32 patched data like literals needs to be 4 Byte aligned'
|
|
||||||
# This might be caused by the call in entry_function_shell if not aligned
|
|
||||||
|
|
||||||
patch_value = symbol_address + pr.fields['r_addend']
|
patch_value = symbol_address + pr.fields['r_addend']
|
||||||
symbol_type = symbol_type + 0x03 # Relative to data section
|
symbol_type = symbol_type + 0x03 # Relative to data section
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,8 +57,8 @@ def norm_indent(f: Callable[..., str]) -> Callable[..., str]:
|
||||||
def get_entry_function_shell() -> str:
|
def get_entry_function_shell() -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{entry_func_prefix}int entry_function_shell(){{
|
{entry_func_prefix}int entry_function_shell(){{
|
||||||
//volatile char stack_place_holder[{stack_size}];
|
volatile char stack_place_holder[{stack_size}];
|
||||||
//stack_place_holder[0] = 0;
|
stack_place_holder[0] = 0;
|
||||||
result_int(0);
|
result_int(0);
|
||||||
return 1;
|
return 1;
|
||||||
}}
|
}}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ cparch=$(python3 -c "import copapy; print(copapy._stencils.detect_process_arch()
|
||||||
# Disassemble stencil object file
|
# Disassemble stencil object file
|
||||||
objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm
|
objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm
|
||||||
|
|
||||||
|
# Create example code disassembly
|
||||||
python3 tools/make_example.py
|
python3 tools/make_example.py
|
||||||
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin
|
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin
|
||||||
|
|
||||||
|
|
@ -27,10 +28,6 @@ fi
|
||||||
|
|
||||||
echo "Archtitecture: '$cparch'"
|
echo "Archtitecture: '$cparch'"
|
||||||
|
|
||||||
if [[ "$cparch" == *"thumb"* ]]; then
|
|
||||||
objdump -D -b binary -marm -M force-thumb --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
|
||||||
else
|
|
||||||
objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
||||||
fi
|
|
||||||
|
|
||||||
rm build/runner/test.copapy.bin
|
rm build/runner/test.copapy.bin
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue