nop instuction byte reversed, optimisation added

This commit is contained in:
Nicolas 2025-10-07 22:58:04 +02:00
parent 59f3b162de
commit ba3c56647d
7 changed files with 35 additions and 310 deletions

View File

@ -27,7 +27,7 @@ jobs:
run: python -m pip install -e .[dev]
- name: Build ops obj files and runner
run: bash build.sh
run: bash src/copapy/obj/nativecompile.sh
- name: Run tests with pytest
run: pytest

2
.gitignore vendored
View File

@ -13,9 +13,11 @@ __pycache__
/bin/*
token.txt
/src/copapy/obj/*.o
/src/copapy/obj/old/*.o
runmem2
src/*.so
bin/*
build/*
/*.obj
/src/*.pyd
src/copapy/stencils.c

View File

@ -9,7 +9,7 @@ def get_function_start() -> str:
return """
int function_start(){
result_int(0); // dummy call instruction before marker gets striped
asm volatile (".long 0x0F1F4400E2");
asm volatile (".long 0xE200441F0F");
return 1;
}
"""
@ -19,7 +19,7 @@ def get_function_end() -> str:
return """
int function_end(){
result_int(0);
asm volatile (".long 0x0F1F4400E1");
asm volatile (".long 0xE100441F0F");
return 1;
}
"""
@ -28,9 +28,9 @@ def get_function_end() -> str:
def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str:
return f"""
void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
asm volatile (".long 0x0F1F4400E1");
asm volatile (".long 0xE100441F0F");
result_{type_out}_{type2}(arg1 {op_signs[op]} arg2, arg2);
asm volatile (".long 0x0F1F4400E2");
asm volatile (".long 0xE200441F0F");
}}
"""
@ -38,9 +38,9 @@ def get_op_code(op: str, type1: str, type2: str, type_out: str) -> str:
def get_op_code_float(op: str, type1: str, type2: str) -> str:
return f"""
void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
asm volatile (".long 0x0F1F4400E1");
asm volatile (".long 0xE100441F0F");
result_float_{type2}((float)arg1 {op_signs[op]} arg2, arg2);
asm volatile (".long 0x0F1F4400E2");
asm volatile (".long 0xE200441F0F");
}}
"""
@ -48,9 +48,9 @@ def get_op_code_float(op: str, type1: str, type2: str) -> str:
def get_op_code_int(op: str, type1: str, type2: str) -> str:
return f"""
void {op}_{type1}_{type2}({type1} arg1, {type2} arg2) {{
asm volatile (".long 0x0F1F4400E1");
asm volatile (".long 0xE100441F0F");
result_int_{type2}((int)(arg1 {op_signs[op]} arg2), arg2);
asm volatile (".long 0x0F1F4400E2");
asm volatile (".long 0xE200441F0F");
}}
"""
@ -70,9 +70,9 @@ def get_result_stubs2(type1: str, type2: str) -> str:
def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str:
return f"""
void read_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{
asm volatile (".long 0x0F1F4400E1");
asm volatile (".long 0xE100441F0F");
result_{type_out}_{type2}(dummy_{type_out}, arg2);
asm volatile (".long 0x0F1F4400E2");
asm volatile (".long 0xE200441F0F");
}}
"""
@ -80,9 +80,9 @@ def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str:
def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str:
return f"""
void read_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{
asm volatile (".long 0x0F1F4400E1");
asm volatile (".long 0xE100441F0F");
result_{type1}_{type_out}(arg1, dummy_{type_out});
asm volatile (".long 0x0F1F4400E2");
asm volatile (".long 0xE200441F0F");
}}
"""
@ -90,10 +90,10 @@ def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str:
def get_write_code(type1: str) -> str:
return f"""
void write_{type1}({type1} arg1) {{
asm volatile (".long 0x0F1F4400E1");
asm volatile (".long 0xE100441F0F");
dummy_{type1} = arg1;
result_{type1}(arg1);
asm volatile (".long 0x0F1F4400E2");
asm volatile (".long 0xE200441F0F");
}}
"""

View File

@ -19,7 +19,7 @@ OPT=O3
mkdir -p $DEST
# Native x86_64
gcc-12 -c $SRC -o $DEST/stencils_x86_64_$OPT.o
gcc-12 -$OPT -c $SRC -o $DEST/stencils_x86_64_$OPT.o
# ARM64
aarch64-linux-gnu-gcc-12 -$OPT -c $SRC -o $DEST/stencils_aarch64_$OPT.o

View File

@ -0,0 +1,14 @@
#!/bin/bash
set -e
set -v
SRC=src/copapy/stencils.c
DEST=src/copapy/obj
python src/copapy/generate_stencils.py
mkdir -p $DEST
gcc-12 -c $SRC -O0 -o $DEST/stencils_x86_64_O0.o
gcc-12 -c $SRC -O1 -o $DEST/stencils_x86_64_O1.o
gcc-12 -c $SRC -O2 -o $DEST/stencils_x86_64_O2.o
gcc-12 -c $SRC -O3 -o $DEST/stencils_x86_64_O3.o

View File

@ -5,8 +5,8 @@ from enum import Enum
ByteOrder = Literal['little', 'big']
START_MARKER = 0x0F1F4400E1 # Nop on x86-64
END_MARKER = 0x0F1F4400E2 # Nop on x86-64
START_MARKER = 0xE100441F0F # Nop on x86-64
END_MARKER = 0xE200441F0F # Nop on x86-64
MARKER_LENGTH = 5
# on x86_64: call or jmp instruction when tail call optimized

View File

@ -1,291 +0,0 @@
// Auto-generated stencils for copapy
// Do not edit manually
volatile int dummy_int = 1337;
volatile float dummy_float = 1337;
void result_int(int arg1);
void result_float(float arg1);
void result_int_int(int arg1, int arg2);
void result_int_float(int arg1, float arg2);
void result_float_int(float arg1, int arg2);
void result_float_float(float arg1, float arg2);
void add_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1 + arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void add_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 + arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void add_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1 + arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void add_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 + arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void sub_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1 - arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void sub_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 - arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void sub_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1 - arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void sub_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 - arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void mul_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1 * arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void mul_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 * arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void mul_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1 * arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void mul_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 * arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void div_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int((float)arg1 / arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void div_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float((float)arg1 / arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void div_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1 / arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void div_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 / arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void gt_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1 > arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void gt_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 > arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void gt_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1 > arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void gt_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 > arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void eq_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1 == arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void eq_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 == arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void eq_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1 == arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void eq_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1 == arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void mod_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1 % arg2, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg0_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(dummy_int, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg1_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1, dummy_int);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg0_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(dummy_float, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg1_int_int(int arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_float(arg1, dummy_float);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg0_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_float(dummy_int, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg1_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(arg1, dummy_int);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg0_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(dummy_float, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg1_int_float(int arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_float(arg1, dummy_float);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg0_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_int(dummy_int, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg1_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1, dummy_int);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg0_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(dummy_float, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg1_float_int(float arg1, int arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1, dummy_float);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg0_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_int_float(dummy_int, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_int_reg1_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_int(arg1, dummy_int);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg0_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(dummy_float, arg2);
asm volatile (".long 0xF27ECAFE");
}
void read_float_reg1_float_float(float arg1, float arg2) {
asm volatile (".long 0xF17ECAFE");
result_float_float(arg1, dummy_float);
asm volatile (".long 0xF27ECAFE");
}
void write_int(int arg1) {
asm volatile (".long 0xF17ECAFE");
dummy_int = arg1;
result_int(arg1);
asm volatile (".long 0xF27ECAFE");
}
void write_float(float arg1) {
asm volatile (".long 0xF17ECAFE");
dummy_float = arg1;
result_float(arg1);
asm volatile (".long 0xF27ECAFE");
}
int function_start(){
result_int(0); // dummy call instruction before marker gets striped
asm volatile (".long 0xF27ECAFE");
return 1;
}
int function_end(){
result_int(0);
asm volatile (".long 0xF17ECAFE");
return 1;
}