mirror of https://github.com/Nonannet/copapy.git
Compare commits
1 Commits
01f02cc9ba
...
e428e16b83
| Author | SHA1 | Date |
|---|---|---|
|
|
e428e16b83 |
|
|
@ -219,6 +219,8 @@ jobs:
|
|||
-DENABLE_LOGGING \
|
||||
-o build/runner/coparun src/coparun/runmem.c \
|
||||
src/coparun/coparun.c src/coparun/mem_man.c && \
|
||||
python3 tools/make_example.py && \
|
||||
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin && \
|
||||
pytest && \
|
||||
bash tools/create_asm.sh"
|
||||
|
||||
|
|
@ -244,17 +246,19 @@ jobs:
|
|||
- name: Use ARMv7 container
|
||||
run: |
|
||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
||||
bash -lc "set -x && \
|
||||
pip install .[mindev] && \
|
||||
mkdir -p build/runner && \
|
||||
bash -lc "set -x; \
|
||||
pip install .[mindev]; \
|
||||
mkdir -p build/runner; \
|
||||
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
||||
-Wall -Wextra -Wconversion -Wsign-conversion \
|
||||
-Wshadow -Wstrict-overflow -O3 \
|
||||
-DENABLE_LOGGING \
|
||||
-o build/runner/coparun src/coparun/runmem.c \
|
||||
src/coparun/coparun.c src/coparun/mem_man.c && \
|
||||
export CP_TARGET_ARCH=armv7thumb && \
|
||||
pytest && \
|
||||
src/coparun/coparun.c src/coparun/mem_man.c; \
|
||||
export CP_TARGET_ARCH=armv7thumb; \
|
||||
python3 tools/make_example.py; \
|
||||
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin; \
|
||||
pytest -m 'not runner'; \
|
||||
bash tools/create_asm.sh"
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
|
|
@ -279,17 +283,19 @@ jobs:
|
|||
- name: Use ARMv7 container
|
||||
run: |
|
||||
docker run --rm -v $PWD:/app -w /app --platform linux/arm/v7 ghcr.io/nonannet/armv7_test:1 \
|
||||
bash -lc "set -x && \
|
||||
pip install .[mindev] && \
|
||||
mkdir -p build/runner && \
|
||||
bash -lc "set -x; \
|
||||
pip install .[mindev]; \
|
||||
mkdir -p build/runner; \
|
||||
gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \
|
||||
-Wall -Wextra -Wconversion -Wsign-conversion \
|
||||
-Wshadow -Wstrict-overflow -O3 \
|
||||
-DENABLE_LOGGING \
|
||||
-o build/runner/coparun src/coparun/runmem.c \
|
||||
src/coparun/coparun.c src/coparun/mem_man.c && \
|
||||
export CP_TARGET_ARCH=armv7mthumb && \
|
||||
pytest && \
|
||||
src/coparun/coparun.c src/coparun/mem_man.c; \
|
||||
export CP_TARGET_ARCH=armv7mthumb; \
|
||||
python3 tools/make_example.py; \
|
||||
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin; \
|
||||
pytest -m 'not runner'; \
|
||||
bash tools/create_asm.sh"
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
|
|
@ -346,7 +352,7 @@ jobs:
|
|||
path: build/runner/*
|
||||
|
||||
release-stencils:
|
||||
needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7, build-armv7thumb, build-armv7mthumb]
|
||||
needs: [build_stencils, build-ubuntu, build-windows, build-arm64, build-armv6, build-armv7]
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/main' && github.event_name == 'push'
|
||||
permissions:
|
||||
|
|
@ -379,6 +385,8 @@ jobs:
|
|||
cp tmp/runner-linux-arm64/coparun release/coparun-aarch64
|
||||
cp tmp/runner-linux-armv6/coparun release/coparun-armv6
|
||||
cp tmp/runner-linux-armv7/coparun release/coparun-armv7
|
||||
cp tmp/runner-linux-armv7thumb/coparun release/coparun-armv7thumb
|
||||
cp tmp/runner-linux-armv7mthumb/coparun release/coparun-armv7mthumb
|
||||
cp tmp/runner-win/coparun*.exe release/
|
||||
|
||||
TAG="${{ steps.version.outputs.version }}"
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ The main features can be summarized as:
|
|||
- Memory and type safety with a minimal set of runtime errors
|
||||
- Deterministic execution
|
||||
- Automatic differentiation for efficient realtime optimization (reverse-mode)
|
||||
- Optimized machine code for x86_64, 32 Bit ARM (Cortex-A and Cortex-M) and AArch64
|
||||
- Optimized machine code for x86_64, ARMv6, ARMv7 and AArch64
|
||||
- Highly portable to new architectures
|
||||
- Small Python package with minimal dependencies and no cross-compile toolchain required
|
||||
|
||||
|
|
@ -31,6 +31,7 @@ While hardware I/O is obviously a core aspect of the project, it is not yet avai
|
|||
|
||||
Currently in development:
|
||||
- Array stencils for handling very large arrays and generating SIMD-optimized code - e.g., for machine vision and neural network applications
|
||||
- Support for Thumb instructions required by ARM*-M targets (for MCUs)
|
||||
- Constant regrouping for further symbolic optimization of the computation graph
|
||||
|
||||
Despite missing SIMD-optimization, benchmark performance shows promising numbers. The following chart plots the results in comparison to NumPy 2.3.5:
|
||||
|
|
@ -252,4 +253,4 @@ This project is licensed under the MIT license - see the [LICENSE](LICENSE) file
|
|||
|
||||
[^2]: The compiler must support tail-call optimization (TCO). Currently, GCC is supported. Porting to a new architecture requires implementing a subset of relocation types used by that architecture.
|
||||
|
||||
[^3]: Supported architectures: x86_64, AArch64, ARMv6/7 (non-Thumb) and ARMv7 Thumb for Cortex-A and Cortex-M. Code for x86 32-bit exists but has unresolved issues and a low priority.
|
||||
[^3]: Supported architectures: x86_64, AArch64, ARMv6 and 7 (non-Thumb). ARMv6/7-M (Thumb) support is in development. Code for x86 32-bit exists but has unresolved issues and a low priority.
|
||||
|
|
|
|||
|
|
@ -343,9 +343,6 @@ class stencil_database():
|
|||
elif pr.type.endswith('_ABS32'):
|
||||
# R_ARM_ABS32
|
||||
# S + A (replaces full 32 bit)
|
||||
assert not patch_offset % 4, 'R_ARM_ABS32 patched data like literals needs to be 4 Byte aligned'
|
||||
# This might be caused by the call in entry_function_shell if not aligned
|
||||
|
||||
patch_value = symbol_address + pr.fields['r_addend']
|
||||
symbol_type = symbol_type + 0x03 # Relative to data section
|
||||
|
||||
|
|
|
|||
|
|
@ -57,8 +57,8 @@ def norm_indent(f: Callable[..., str]) -> Callable[..., str]:
|
|||
def get_entry_function_shell() -> str:
|
||||
return f"""
|
||||
{entry_func_prefix}int entry_function_shell(){{
|
||||
//volatile char stack_place_holder[{stack_size}];
|
||||
//stack_place_holder[0] = 0;
|
||||
volatile char stack_place_holder[{stack_size}];
|
||||
stack_place_holder[0] = 0;
|
||||
result_int(0);
|
||||
return 1;
|
||||
}}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ cparch=$(python3 -c "import copapy; print(copapy._stencils.detect_process_arch()
|
|||
# Disassemble stencil object file
|
||||
objdump -d -x src/copapy/obj/stencils_${cparch}_O3.o > build/runner/stencils.asm
|
||||
|
||||
# Create example code disassembly
|
||||
python3 tools/make_example.py
|
||||
build/runner/coparun build/runner/test.copapy build/runner/test.copapy.bin
|
||||
|
||||
|
|
@ -27,10 +28,6 @@ fi
|
|||
|
||||
echo "Archtitecture: '$cparch'"
|
||||
|
||||
if [[ "$cparch" == *"thumb"* ]]; then
|
||||
objdump -D -b binary -marm -M force-thumb --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
||||
else
|
||||
objdump -D -b binary -m $cparch --adjust-vma=0x10000 build/runner/test.copapy.bin > build/runner/example.asm
|
||||
fi
|
||||
|
||||
rm build/runner/test.copapy.bin
|
||||
|
|
|
|||
Loading…
Reference in New Issue