diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 46692f3..1622b83 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -22,6 +22,11 @@ jobs: name: stencil-object-files path: src/copapy/obj/*.o + - uses: actions/upload-artifact@v4 + with: + name: musl-object-files + path: /object_files/* + build_wheels: if: contains(github.ref, '-beta') == false needs: [build_stencils] diff --git a/docs/source/compiler.md b/docs/source/compiler.md index 078a74e..faab63c 100644 --- a/docs/source/compiler.md +++ b/docs/source/compiler.md @@ -1,4 +1,4 @@ -# Compiler +# How it works ```{toctree} :maxdepth: 1 :hidden: diff --git a/src/copapy/_autograd.py b/src/copapy/_autograd.py index 3abd97e..4840805 100644 --- a/src/copapy/_autograd.py +++ b/src/copapy/_autograd.py @@ -89,8 +89,11 @@ def grad(x: Any, y: value[Any] | Sequence[value[Any]] | vector[Any] | tensor[Any elif opn == 'sqrt': add_grad(a, g * (0.5 / cp.sqrt(a))) - #elif opn == 'abs': - # add_grad(x, g * cp.sign(x)) + elif opn == 'abs': + add_grad(a, g * cp.sign(a)) + + elif opn == 'neg': + add_grad(a, -b) elif opn == 'sin': add_grad(a, g * cp.cos(a)) diff --git a/src/copapy/_basic_types.py b/src/copapy/_basic_types.py index e71077a..0a120b2 100644 --- a/src/copapy/_basic_types.py +++ b/src/copapy/_basic_types.py @@ -1,5 +1,5 @@ import pkgutil -from typing import Any, Sequence, TypeVar, overload, TypeAlias, Generic, cast, Callable +from typing import Any, Sequence, TypeVar, overload, TypeAlias, Generic, Callable from ._stencils import stencil_database, detect_process_arch import copapy as cp from ._helper_types import TNum @@ -230,13 +230,11 @@ class value(Generic[TNum]): def __rfloordiv__(self, other: NumLike) -> Any: return add_op('floordiv', [other, self]) - def __abs__(self: TCPNum) -> TCPNum: - return cp.abs(self) # type: ignore + def __abs__(self: 'value[TNum]') -> 'value[TNum]': + return cp.abs(self) - def __neg__(self: TCPNum) -> TCPNum: - if self.dtype == 'float': - return cast(TCPNum, add_op('sub', [value(0.0), self])) - return cast(TCPNum, add_op('sub', [value(0), self])) + def __neg__(self: 'value[TNum]') -> 'value[TNum]': + return add_op('neg', [self]) def __gt__(self, other: TVarNumb) -> 'value[int]': return add_op('gt', [self, other], dtype='bool') @@ -362,7 +360,7 @@ class CPConstant(Node): return self.node_hash -class Write(Node): +class Store(Node): def __init__(self, input: value[Any] | Net | int | float): if isinstance(input, value): net = input.net @@ -372,7 +370,7 @@ class Write(Node): node = CPConstant(input) net = Net(node.dtype, node) - self.name = 'write_' + transl_type(net.dtype) + self.name = 'store_' + transl_type(net.dtype) self.args = (net,) self.node_hash = hash(self.name) ^ hash(net.source.node_hash) diff --git a/src/copapy/_compiler.py b/src/copapy/_compiler.py index 16676c6..9a17f8c 100644 --- a/src/copapy/_compiler.py +++ b/src/copapy/_compiler.py @@ -2,7 +2,7 @@ from typing import Generator, Iterable, Any from . import _binwrite as binw from ._stencils import stencil_database, patch_entry from collections import defaultdict, deque -from ._basic_types import Net, Node, Write, CPConstant, Op, transl_type +from ._basic_types import Net, Node, Store, CPConstant, Op, transl_type def stable_toposort(edges: Iterable[tuple[Node, Node]]) -> list[Node]: @@ -132,7 +132,7 @@ def get_const_nets(nodes: list[Node]) -> list[Net]: return [net_lookup[node] for node in nodes if isinstance(node, CPConstant)] -def add_read_ops(node_list: list[Node]) -> Generator[tuple[Net | None, Node], None, None]: +def add_load_ops(node_list: list[Node]) -> Generator[tuple[Net | None, Node], None, None]: """Add read node before each op where arguments are not already positioned correctly in the registers @@ -156,7 +156,7 @@ def add_read_ops(node_list: list[Node]) -> Generator[tuple[Net | None, Node], No #if net in registers: # print('x swap registers') type_list = ['int' if r is None else transl_type(r.dtype) for r in registers] - new_node = Op(f"read_{transl_type(net.dtype)}_reg{i}_" + '_'.join(type_list), []) + new_node = Op(f"load_{transl_type(net.dtype)}_reg{i}_" + '_'.join(type_list), []) yield net, new_node registers[i] = net @@ -170,7 +170,7 @@ def add_read_ops(node_list: list[Node]) -> Generator[tuple[Net | None, Node], No yield None, node -def add_write_ops(net_node_list: list[tuple[Net | None, Node]], const_nets: list[Net]) -> Generator[tuple[Net | None, Node], None, None]: +def add_store_ops(net_node_list: list[tuple[Net | None, Node]], const_nets: list[Net]) -> Generator[tuple[Net | None, Node], None, None]: """Add write operation for each new defined net if a read operation is later followed Returns: @@ -181,19 +181,19 @@ def add_write_ops(net_node_list: list[tuple[Net | None, Node]], const_nets: list # Initialize set of nets with constants stored_nets = set(const_nets) - #assert all(node.name.startswith('read_') for net, node in net_node_list if net) + #assert all(node.name.startswith('load_') for net, node in net_node_list if net) read_back_nets = { net for net, node in net_node_list - if net and node.name.startswith('read_')} + if net and node.name.startswith('load_')} registers: list[Net | None] = [None, None] for net, node in net_node_list: - if isinstance(node, Write): + if isinstance(node, Store): assert len(registers) == 2 type_list = [transl_type(r.dtype) if r else 'int' for r in registers] - yield node.args[0], Op(f"write_{type_list[0]}_reg0_" + '_'.join(type_list), node.args) - elif node.name.startswith('read_'): + yield node.args[0], Op(f"store_{type_list[0]}_reg0_" + '_'.join(type_list), node.args) + elif node.name.startswith('load_'): yield net, node else: yield None, node @@ -207,7 +207,7 @@ def add_write_ops(net_node_list: list[tuple[Net | None, Node]], const_nets: list if net in read_back_nets and net not in stored_nets: type_list = [transl_type(r.dtype) if r else 'int' for r in registers] - yield net, Op(f"write_{type_list[0]}_reg0_" + '_'.join(type_list), []) + yield net, Op(f"store_{type_list[0]}_reg0_" + '_'.join(type_list), []) stored_nets.add(net) @@ -344,8 +344,8 @@ def compile_to_dag(node_list: Iterable[Node], sdb: stencil_database) -> tuple[bi ordered_ops = list(stable_toposort(get_all_dag_edges(node_list))) const_net_list = get_const_nets(ordered_ops) - output_ops = list(add_read_ops(ordered_ops)) - extended_output_ops = list(add_write_ops(output_ops, const_net_list)) + output_ops = list(add_load_ops(ordered_ops)) + extended_output_ops = list(add_store_ops(output_ops, const_net_list)) dw = binw.data_writer(sdb.byteorder) diff --git a/src/copapy/_target.py b/src/copapy/_target.py index 5648173..575c013 100644 --- a/src/copapy/_target.py +++ b/src/copapy/_target.py @@ -2,7 +2,7 @@ from typing import Iterable, overload, TypeVar, Any, Callable, TypeAlias from . import _binwrite as binw from coparun_module import coparun, read_data_mem, create_target, clear_target import struct -from ._basic_types import value, Net, Node, Write, NumLike, ArrayType, stencil_db_from_package +from ._basic_types import value, Net, Node, Store, NumLike, ArrayType, stencil_db_from_package from ._compiler import compile_to_dag T = TypeVar("T", int, float) @@ -76,13 +76,13 @@ class Target(): if isinstance(input, ArrayType): for v in input.values: if isinstance(v, value): - nodes.append(Write(v)) + nodes.append(Store(v)) elif isinstance(input, Iterable): for v in input: if isinstance(v, value): - nodes.append(Write(v)) + nodes.append(Store(v)) elif isinstance(input, value): - nodes.append(Write(input)) + nodes.append(Store(input)) dw, self._values = compile_to_dag(nodes, self.sdb) dw.write_com(binw.Command.END_COM) diff --git a/src/copapy/backend.py b/src/copapy/backend.py index 4ff6109..1593c57 100644 --- a/src/copapy/backend.py +++ b/src/copapy/backend.py @@ -4,10 +4,10 @@ and give access to compiler internals and debugging tools. """ from ._target import add_read_command -from ._basic_types import Net, Op, Node, CPConstant, Write, stencil_db_from_package +from ._basic_types import Net, Op, Node, CPConstant, Store, stencil_db_from_package from ._compiler import compile_to_dag, \ - stable_toposort, get_const_nets, get_all_dag_edges, add_read_ops, get_all_dag_edges_between, \ - add_write_ops, get_dag_stats + stable_toposort, get_const_nets, get_all_dag_edges, add_load_ops, get_all_dag_edges_between, \ + add_store_ops, get_dag_stats __all__ = [ "add_read_command", @@ -15,14 +15,14 @@ __all__ = [ "Op", "Node", "CPConstant", - "Write", + "Store", "compile_to_dag", "stable_toposort", "get_const_nets", "get_all_dag_edges", "get_all_dag_edges_between", - "add_read_ops", - "add_write_ops", + "add_load_ops", + "add_store_ops", "stencil_db_from_package", "get_dag_stats" ] diff --git a/stencils/generate_stencils.py b/stencils/generate_stencils.py index 61846c0..bc2e326 100644 --- a/stencils/generate_stencils.py +++ b/stencils/generate_stencils.py @@ -187,27 +187,27 @@ def get_result_stubs2(type1: str, type2: str) -> str: @norm_indent -def get_read_reg0_code(type1: str, type2: str, type_out: str) -> str: +def get_load_reg0_code(type1: str, type2: str, type_out: str) -> str: return f""" - STENCIL void read_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ + STENCIL void load_{type_out}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_{type_out}_{type2}(dummy_{type_out}, arg2); }} """ @norm_indent -def get_read_reg1_code(type1: str, type2: str, type_out: str) -> str: +def get_load_reg1_code(type1: str, type2: str, type_out: str) -> str: return f""" - STENCIL void read_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{ + STENCIL void load_{type_out}_reg1_{type1}_{type2}({type1} arg1, {type2} arg2) {{ result_{type1}_{type_out}(arg1, dummy_{type_out}); }} """ @norm_indent -def get_write_code(type1: str, type2: str) -> str: +def get_store_code(type1: str, type2: str) -> str: return f""" - STENCIL void write_{type1}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ + STENCIL void store_{type1}_reg0_{type1}_{type2}({type1} arg1, {type2} arg2) {{ dummy_{type1} = arg1; result_{type1}_{type2}(arg1, arg2); }} @@ -289,11 +289,11 @@ if __name__ == "__main__": code += get_op_code('mod', 'int', 'int', 'int') for t1, t2, t_out in permutate(types, types, types): - code += get_read_reg0_code(t1, t2, t_out) - code += get_read_reg1_code(t1, t2, t_out) + code += get_load_reg0_code(t1, t2, t_out) + code += get_load_reg1_code(t1, t2, t_out) for t1, t2 in permutate(types, types): - code += get_write_code(t1, t2) + code += get_store_code(t1, t2) print(f"Write file {args.path}...") with open(args.path, 'w') as f: diff --git a/tests/test_ast_gen.py b/tests/test_ast_gen.py index ee36f05..84f9bf6 100644 --- a/tests/test_ast_gen.py +++ b/tests/test_ast_gen.py @@ -1,5 +1,5 @@ from copapy import value -from copapy.backend import Write +from copapy.backend import Store import copapy.backend as cpb @@ -19,16 +19,16 @@ def test_ast_generation(): #i1 = c1 * 2 #r1 = i1 + 7 #r2 = i1 + 9 - #out = [Write(r1), Write(r2)] + #out = [Store(r1), Store(r2)] c1 = value(4) c2 = value(2) #i1 = c1 * 2 #r1 = i1 + 7 + (c2 + 7 * 9) #r2 = i1 + 9 - #out = [Write(r1), Write(r2)] + #out = [Store(r1), Store(r2)] r1 = c1 * 5 + 8 + c2 * 3 - out = [Write(r1)] + out = [Store(r1)] print(out) print('-- get_edges:') @@ -48,12 +48,12 @@ def test_ast_generation(): print('#', p) print('-- add_read_ops:') - output_ops = list(cpb.add_read_ops(ordered_ops)) + output_ops = list(cpb.add_load_ops(ordered_ops)) for p in output_ops: print('#', p) print('-- add_write_ops:') - extended_output_ops = list(cpb.add_write_ops(output_ops, const_list)) + extended_output_ops = list(cpb.add_store_ops(output_ops, const_list)) for p in extended_output_ops: print('#', p) print('--') diff --git a/tests/test_autograd.py b/tests/test_autograd.py index f8b113d..fb79f55 100644 --- a/tests/test_autograd.py +++ b/tests/test_autograd.py @@ -13,7 +13,7 @@ def test_autograd(): c += c + 1 c += 1 + c + (-a) d += d * 2 + cp.relu(b + a) - d += 3 * d + cp.relu(b - a) + d += 3 * d + cp.relu(-a + b) e = c - d f = e**2 g = f / 2.0 @@ -34,5 +34,26 @@ def test_autograd(): assert pytest.approx(dg[1], abs=1e-4) == 645.57725 # pyright: ignore[reportUnknownMemberType] +def test_autograd_extended(): + a = value(-4.0) + b = value(2.0) + c = a + b + d = a * b + b**3 + c += c + 1 + c += 1 + c + (-a) + d += d * 2 + cp.relu(b + a) + d += 3 * d + cp.relu(b - a) + e = c - cp.sin(-d) + f = cp.abs(e**2) + g = f / 2.0 + g += 10.0 / f + + dg = grad(g, (a, b)) + + tg = cp.Target() + tg.compile(g, dg) + tg.run() + + if __name__ == "__main__": test_autograd() diff --git a/tests/test_branching_stencils.py b/tests/test_branching_stencils.py index ae26f64..6ab929b 100644 --- a/tests/test_branching_stencils.py +++ b/tests/test_branching_stencils.py @@ -1,5 +1,5 @@ from copapy import value -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import copapy as cp import subprocess from copapy import _binwrite @@ -22,7 +22,7 @@ def test_compile(): # Function with no passing-on-jump as last instruction: ret_test = [r for v in test_vals for r in (cp.tan(value(v)),)] - out = [Write(r) for r in ret_test] + out = [Store(r) for r in ret_test] il, variables = compile_to_dag(out, copapy.generic_sdb) diff --git a/tests/test_comp_timing.py b/tests/test_comp_timing.py index 5a1fe81..a2ffb20 100644 --- a/tests/test_comp_timing.py +++ b/tests/test_comp_timing.py @@ -1,6 +1,6 @@ import time from copapy import backend -from copapy.backend import Write, stencil_db_from_package +from copapy.backend import Store, stencil_db_from_package import copapy.backend as cpb import copapy as cp import copapy._binwrite as binw @@ -13,7 +13,7 @@ def test_timing_compiler(): #t2 = t1.sum() t3 = cp.vector(cp.value(1 / (v + 1)) for v in range(256)) t5 = ((t3 * t1) * 2).magnitude() - out = [Write(t5)] + out = [Store(t5)] print(out) @@ -45,7 +45,7 @@ def test_timing_compiler(): print('-- add_read_ops:') t0 = time.time() - output_ops = list(cpb.add_read_ops(ordered_ops)) + output_ops = list(cpb.add_load_ops(ordered_ops)) t1 = time.time() #for p in output_ops: # print('#', p) @@ -53,7 +53,7 @@ def test_timing_compiler(): print('-- add_write_ops:') t0 = time.time() - extended_output_ops = list(cpb.add_write_ops(output_ops, const_net_list)) + extended_output_ops = list(cpb.add_store_ops(output_ops, const_net_list)) t1 = time.time() #for p in extended_output_ops: # print('#', p) diff --git a/tests/test_compile.py b/tests/test_compile.py index 8f88aac..7c0c674 100644 --- a/tests/test_compile.py +++ b/tests/test_compile.py @@ -1,5 +1,5 @@ from copapy import NumLike -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import copapy as cp import subprocess import struct @@ -58,7 +58,7 @@ def test_compile(): ret = (t2, t4, t5) - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] il, variables = compile_to_dag(out, copapy.generic_sdb) diff --git a/tests/test_compile_aarch64.py b/tests/test_compile_aarch64.py index 1cdeb14..2a56385 100644 --- a/tests/test_compile_aarch64.py +++ b/tests/test_compile_aarch64.py @@ -1,5 +1,5 @@ from copapy import NumLike -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import subprocess from copapy import _binwrite import copapy.backend as backend @@ -52,7 +52,7 @@ def test_compile(): ret = (t2, t4, t5) - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] sdb = backend.stencil_db_from_package('arm64') il, variables = compile_to_dag(out, sdb) diff --git a/tests/test_compile_armv7.py b/tests/test_compile_armv7.py index 79ea027..fec2eab 100644 --- a/tests/test_compile_armv7.py +++ b/tests/test_compile_armv7.py @@ -1,5 +1,5 @@ from copapy import NumLike -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import subprocess from copapy import _binwrite import copapy.backend as backend @@ -52,7 +52,7 @@ def test_compile(): ret = (t2, t4, t5) - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] sdb = backend.stencil_db_from_package('armv7') il, variables = compile_to_dag(out, sdb) diff --git a/tests/test_compile_div.py b/tests/test_compile_div.py index 8b355ad..1133d1e 100644 --- a/tests/test_compile_div.py +++ b/tests/test_compile_div.py @@ -1,5 +1,5 @@ from copapy import value, NumLike -from copapy.backend import Write, compile_to_dag, add_read_command, Net +from copapy.backend import Store, compile_to_dag, add_read_command import copapy import subprocess from copapy import _binwrite @@ -26,7 +26,7 @@ def test_compile(): ret = function(c1) - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] il, vars = compile_to_dag(out, copapy.generic_sdb) diff --git a/tests/test_compile_math.py b/tests/test_compile_math.py index 75731a5..3376068 100644 --- a/tests/test_compile_math.py +++ b/tests/test_compile_math.py @@ -1,5 +1,5 @@ from copapy import value -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import copapy as cp import subprocess from copapy import _binwrite @@ -21,7 +21,7 @@ def test_compile_sqrt(): ret = [r for v in test_vals for r in (cp.sqrt(value(v)),)] - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] il, variables = compile_to_dag(out, copapy.generic_sdb) @@ -55,7 +55,7 @@ def test_compile_log(): ret = [r for v in test_vals for r in (cp.log(value(v)),)] - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] il, variables = compile_to_dag(out, copapy.generic_sdb) @@ -89,7 +89,7 @@ def test_compile_sin(): ret = [r for v in test_vals for r in (cp.sin(value(v)),)] - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] il, variables = compile_to_dag(out, copapy.generic_sdb) diff --git a/tests/test_dag_optimization.py b/tests/test_dag_optimization.py index a340e19..b381c70 100644 --- a/tests/test_dag_optimization.py +++ b/tests/test_dag_optimization.py @@ -1,12 +1,12 @@ import copapy as cp from copapy import value -from copapy.backend import get_dag_stats, Write +from copapy.backend import get_dag_stats, Store import copapy.backend as cpb from typing import Any def show_dag(val: value[Any]): - out = [Write(val.net)] + out = [Store(val.net)] print(out) print('-- get_edges:') @@ -26,12 +26,12 @@ def show_dag(val: value[Any]): print('#', p) print('-- add_read_ops:') - output_ops = list(cpb.add_read_ops(ordered_ops)) + output_ops = list(cpb.add_load_ops(ordered_ops)) for p in output_ops: print('#', p) print('-- add_write_ops:') - extended_output_ops = list(cpb.add_write_ops(output_ops, const_list)) + extended_output_ops = list(cpb.add_store_ops(output_ops, const_list)) for p in extended_output_ops: print('#', p) print('--') diff --git a/tests/test_ops_aarch64.py b/tests/test_ops_aarch64.py index 2158494..d733425 100644 --- a/tests/test_ops_aarch64.py +++ b/tests/test_ops_aarch64.py @@ -1,5 +1,5 @@ from copapy import NumLike, iif, value -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import subprocess from copapy import _binwrite import copapy.backend as backend @@ -91,7 +91,7 @@ def test_compile(): ret_test = function1(c_i) + function1(c_f) + function2(c_i) + function2(c_f) + function3(c_i) + function4(c_i) + function5(c_b) + [value(9) % 2] + iiftests(c_i) + iiftests(c_f) + [cp.asin(c_i/10)] ret_ref = function1(9) + function1(1.111) + function2(9) + function2(1.111) + function3(9) + function4(9) + function5(True) + [9 % 2] + iiftests(9) + iiftests(1.111) + [cp.asin(9/10)] - out = [Write(r) for r in ret_test] + out = [Store(r) for r in ret_test] #ret_test += [c_i, v2] #ret_ref += [9, 4.44, -4.44] diff --git a/tests/test_ops_armv6.py b/tests/test_ops_armv6.py index 1797c95..0d716cb 100644 --- a/tests/test_ops_armv6.py +++ b/tests/test_ops_armv6.py @@ -1,5 +1,5 @@ from copapy import NumLike, iif, value -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import subprocess from copapy import _binwrite import copapy.backend as backend @@ -96,7 +96,7 @@ def test_compile(): #ret_test = (c_i * 100 // 5, c_f * 10 // 5) #ret_ref = (9 * 100 // 5, 1.111 * 10 // 5) - out = [Write(r) for r in ret_test] + out = [Store(r) for r in ret_test] sdb = backend.stencil_db_from_package('armv6') dw, variables = compile_to_dag(out, sdb) diff --git a/tests/test_ops_armv7.py b/tests/test_ops_armv7.py index 31e8e05..c354eff 100644 --- a/tests/test_ops_armv7.py +++ b/tests/test_ops_armv7.py @@ -1,5 +1,5 @@ from copapy import NumLike, iif, value -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import subprocess from copapy import _binwrite import copapy.backend as backend @@ -96,7 +96,7 @@ def test_compile(): #ret_test = (c_i * 100 // 5, c_f * 10 // 5) #ret_ref = (9 * 100 // 5, 1.111 * 10 // 5) - out = [Write(r) for r in ret_test] + out = [Store(r) for r in ret_test] sdb = backend.stencil_db_from_package('armv7') dw, variables = compile_to_dag(out, sdb) diff --git a/tests/test_ops_x86.py b/tests/test_ops_x86.py index c7825d3..c6d0643 100644 --- a/tests/test_ops_x86.py +++ b/tests/test_ops_x86.py @@ -1,5 +1,5 @@ from copapy import NumLike, iif, value -from copapy.backend import Write, compile_to_dag, add_read_command +from copapy.backend import Store, compile_to_dag, add_read_command import subprocess from copapy import _binwrite import copapy.backend as backend @@ -104,7 +104,7 @@ def test_compile(): #ret_test = [cp.get_42(c_i)] #ret_ref = [cp.get_42(9)] - out = [Write(r) for r in ret_test] + out = [Store(r) for r in ret_test] #ret_test += [c_i, v2] #ret_ref += [9, 4.44, -4.44] @@ -185,7 +185,7 @@ def test_vector_compile(): ret = (t2, t4, t5) - out = [Write(r) for r in ret] + out = [Store(r) for r in ret] sdb = backend.stencil_db_from_package('x86') il, variables = compile_to_dag(out, sdb) @@ -243,7 +243,7 @@ def test_sinus(): ret_test = [si, e] ret_ref = [cp.sin(a_val), (a_val + 0.87 * 2.0) ** 2 + cp.sin(a_val) + cp.sqrt(0.87)] - out = [Write(r) for r in ret_test] + out = [Store(r) for r in ret_test] sdb = backend.stencil_db_from_package('x86') dw, variables = compile_to_dag(out, sdb) diff --git a/tools/build.bat b/tools/build.bat index b98b23e..80a529f 100644 --- a/tools/build.bat +++ b/tools/build.bat @@ -1,68 +1,156 @@ +@echo off +setlocal ENABLEDELAYEDEXPANSION + +set ARCH=%1 +if "%ARCH%"=="" set ARCH=x86_64 + +if not "%ARCH%"=="x86_64" ^ +if not "%ARCH%"=="x86" ^ +if not "%ARCH%"=="arm64" ^ +if not "%ARCH%"=="arm-v6" ^ +if not "%ARCH%"=="arm-v7" ^ +if not "%ARCH%"=="all" ( + echo Usage: %0 [x86_64^|x86^|arm64^|arm-v6^|arm-v7^|all] + exit /b 1 +) + mkdir build\stencils mkdir build\runner -python stencils/generate_stencils.py build/stencils/stencils.c +python stencils/generate_stencils.py build\stencils\stencils.c + +REM ============================================================ +REM x86_64 +REM ============================================================ +if "%ARCH%"=="x86_64" goto BUILD_X86_64 +if "%ARCH%"=="all" goto BUILD_X86_64 +goto SKIP_X86_64 + +:BUILD_X86_64 echo -------------x86_64 - 64 bit----------------- + call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64 + echo - Compile stencil test... cl /Zi /Od stencils\test.c /Fe:build\stencils\test.exe echo - Build runner for Windows 64 bit... -cl /Zi /Od /DENABLE_BASIC_LOGGING src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:build\runner\coparun.exe +cl /Zi /Od /DENABLE_BASIC_LOGGING ^ + src\coparun\runmem.c ^ + src\coparun\coparun.c ^ + src\coparun\mem_man.c ^ + /Fe:build\runner\coparun.exe -REM Optimized: -REM cl /O2 src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:build\runner\coparun.exe - -echo - Build stencils for 64 bit... -REM ../copapy/tools/cross_compiler_unix/packobjs.sh gcc ld ../copapy/build/musl/musl_objects_x86_64.o +echo - Build stencils for x86_64... wsl gcc -fno-pic -ffunction-sections -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o wsl ld -r build/stencils/stencils.o build/musl/musl_objects_x86_64.o -o src/copapy/obj/stencils_x86_64_O3.o wsl objdump -d -x src/copapy/obj/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm -echo ---------------x86 - 32 bit--------------- +:SKIP_X86_64 + +REM ============================================================ +REM x86 32-bit +REM ============================================================ +if "%ARCH%"=="x86" goto BUILD_X86 +if "%ARCH%"=="all" goto BUILD_X86 +goto SKIP_X86 + +:BUILD_X86 +echo ---------------x86 - 32 bit---------------- + call "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x86 echo - Build runner for Windows 32 bit... -cl /Zi /Od /DENABLE_LOGGING src\coparun\runmem.c src\coparun\coparun.c src\coparun\mem_man.c /Fe:build\runner\coparun-x86.exe +cl /Zi /Od /DENABLE_LOGGING ^ + src\coparun\runmem.c ^ + src\coparun\coparun.c ^ + src\coparun\mem_man.c ^ + /Fe:build\runner\coparun-x86.exe -echo - Build runner for linux x86 32 bit... -wsl i686-linux-gnu-gcc-12 -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-x86 +echo - Build runner for Linux x86 32 bit... +wsl i686-linux-gnu-gcc-12 -static -O3 -DENABLE_LOGGING ^ + src/coparun/runmem.c ^ + src/coparun/coparun.c ^ + src/coparun/mem_man.c ^ + -o build/runner/coparun-x86 echo - Build stencils x86 32 bit... -REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh i686-linux-gnu-gcc-12 i686-linux-gnu-ld ../copapy/build/musl/musl_objects_x86.o -fno-pic wsl i686-linux-gnu-gcc-12 -fno-pic -ffunction-sections -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o wsl i686-linux-gnu-ld -r build/stencils/stencils.o build/musl/musl_objects_x86.o -o src/copapy/obj/stencils_x86_O3.o wsl i686-linux-gnu-objdump -d -x src/copapy/obj/stencils_x86_O3.o > build/stencils/stencils_x86_O3.asm +:SKIP_X86 + +REM ============================================================ +REM ARM64 +REM ============================================================ +if "%ARCH%"=="arm64" goto BUILD_ARM64 +if "%ARCH%"=="all" goto BUILD_ARM64 +goto SKIP_ARM64 + +:BUILD_ARM64 +echo --------------arm64 64 bit---------------- -echo --------------arm64 64 bit---------------- wsl aarch64-linux-gnu-gcc-12 -fno-pic -ffunction-sections -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o wsl aarch64-linux-gnu-ld -r build/stencils/stencils.o build/musl/musl_objects_arm64.o -o src/copapy/obj/stencils_arm64_O3.o wsl aarch64-linux-gnu-objdump -d -x src/copapy/obj/stencils_arm64_O3.o > build/stencils/stencils_arm64_O3.asm -echo ------------------------------ -echo - Build runner for Aarch64... -wsl aarch64-linux-gnu-gcc-12 -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-aarch64 +echo - Build runner for AArch64... +wsl aarch64-linux-gnu-gcc-12 -static -O3 -DENABLE_LOGGING ^ + src/coparun/runmem.c ^ + src/coparun/coparun.c ^ + src/coparun/mem_man.c ^ + -o build/runner/coparun-aarch64 + +:SKIP_ARM64 + +REM ============================================================ +REM ARM v6 +REM ============================================================ +if "%ARCH%"=="arm-v6" goto BUILD_ARMV6 +if "%ARCH%"=="all" goto BUILD_ARMV6 +goto SKIP_ARMV6 + +:BUILD_ARMV6 +echo --------------arm-v6 32 bit---------------- + +wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm ^ + -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o + +wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv6.o ^ + $(arm-none-eabi-gcc -print-libgcc-file-name) ^ + -o src/copapy/obj/stencils_armv6_O3.o -echo --------------arm-v6 32 bit---------------- -REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld ../copapy/build/musl/musl_objects_armv6.o "-march=armv6 -mfpu=vfp -marm" -wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o -wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv6.o $(arm-none-eabi-gcc -print-libgcc-file-name) -o src/copapy/obj/stencils_armv6_O3.o wsl arm-none-eabi-objdump -d -x src/copapy/obj/stencils_armv6_O3.o > build/stencils/stencils_armv6_O3.asm -echo ------------------------------ -REM echo - Build runner -REM wsl arm-linux-gnueabihf-gcc -march=armv6 -mfpu=vfp -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv6 +:SKIP_ARMV6 +REM ============================================================ +REM ARM v7 +REM ============================================================ +if "%ARCH%"=="arm-v7" goto BUILD_ARMV7 +if "%ARCH%"=="all" goto BUILD_ARMV7 +goto END + +:BUILD_ARMV7 +echo --------------arm-v7 32 bit---------------- + +wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm ^ + -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o + +wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o ^ + $(arm-none-eabi-gcc -print-libgcc-file-name) ^ + -o src/copapy/obj/stencils_armv7_O3.o -echo --------------arm-v7 32 bit---------------- -REM sh ../copapy/tools/cross_compiler_unix/packobjs.sh arm-none-eabi-gcc arm-none-eabi-ld ../copapy/build/musl/musl_objects_armv7.o "-march=armv7-a -mfpu=neon-vfpv3 -marm" -wsl arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -c build/stencils/stencils.c -O3 -o build/stencils/stencils.o -wsl arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o $(arm-none-eabi-gcc -print-libgcc-file-name) -o src/copapy/obj/stencils_armv7_O3.o wsl arm-none-eabi-objdump -d -x src/copapy/obj/stencils_armv7_O3.o > build/stencils/stencils_armv7_O3.asm +echo - Build runner for ARM v7... +wsl arm-linux-gnueabihf-gcc -static -O3 -DENABLE_LOGGING ^ + src/coparun/runmem.c ^ + src/coparun/coparun.c ^ + src/coparun/mem_man.c ^ + -o build/runner/coparun-armv7 -echo ------------------------------ -echo - Build runner -wsl arm-linux-gnueabihf-gcc -march=armv7-a -mfpu=neon-vfpv3 -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv7 - +:END +echo Build completed for %ARCH% +endlocal diff --git a/tools/build.sh b/tools/build.sh index 2ece1e8..b7e3cd6 100644 --- a/tools/build.sh +++ b/tools/build.sh @@ -1,6 +1,16 @@ #!/bin/bash -set -e -set -v +set -eux + +ARCH=${1:-x86_64} + +case "$ARCH" in + (x86_64|arm-v6|arm-v7|all) + ;; + (*) + echo "Usage: $0 [x86_64|arm-v6|arm-v7|all]" + exit 1 + ;; +esac mkdir -p build/stencils mkdir -p build/runner @@ -10,34 +20,90 @@ DEST=src/copapy/obj python3 stencils/generate_stencils.py $SRC mkdir -p $DEST -gcc -fno-pic -ffunction-sections -c $SRC -O3 -o build/stencils/stencils.o -ld -r build/stencils/stencils.o build/musl/musl_objects_x86_64.o -o $DEST/stencils_x86_64_O3.o -objdump -d -x $DEST/stencils_x86_64_O3.o > build/stencils/stencils_x86_64_O3.asm +####################################### +# x86_64 +####################################### +if [[ "$ARCH" == "x86_64" || "$ARCH" == "all" ]]; then + echo "--------------x86_64----------------" -mkdir bin -p -gcc -Wall -Wextra -Wconversion -Wsign-conversion \ - -Wshadow -Wstrict-overflow -Werror -g -O3 \ - -DENABLE_LOGGING \ - src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun + gcc -fno-pic -ffunction-sections -c $SRC -O3 -o build/stencils/stencils.o + ld -r build/stencils/stencils.o build/musl/musl_objects_x86_64.o \ + -o $DEST/stencils_x86_64_O3.o + objdump -d -x $DEST/stencils_x86_64_O3.o \ + > build/stencils/stencils_x86_64_O3.asm + mkdir -p bin + gcc -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -Werror -g -O3 \ + -DENABLE_LOGGING \ + src/coparun/runmem.c \ + src/coparun/coparun.c \ + src/coparun/mem_man.c \ + -o build/runner/coparun +fi -echo "--------------arm-v6 32 bit----------------" -LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) -#LIBM=$(arm-none-eabi-gcc -print-file-name=libm.a) -#LIBC=$(arm-none-eabi-gcc -print-file-name=libc.a) +####################################### +# ARM v6 +####################################### +if [[ "$ARCH" == "arm-v6" || "$ARCH" == "all" ]]; then + echo "--------------arm-v6 32 bit----------------" -arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm -c $SRC -O3 -o build/stencils/stencils.o -arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv6.o $LIBGCC -o $DEST/stencils_armv6_O3.o -arm-none-eabi-objdump -d -x $DEST/stencils_armv6_O3.o > build/stencils/stencils_armv6_O3.asm -arm-linux-gnueabihf-gcc -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv6 + LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) + arm-none-eabi-gcc -fno-pic -ffunction-sections \ + -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm \ + -c $SRC -O3 -o build/stencils/stencils.o -echo "--------------arm-v7 32 bit----------------" -LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) -#LIBM=$(arm-none-eabi-gcc -print-file-name=libm.a) -#LIBC=$(arm-none-eabi-gcc -print-file-name=libc.a) + arm-none-eabi-ld -r \ + build/stencils/stencils.o \ + build/musl/musl_objects_armv6.o \ + $LIBGCC \ + -o $DEST/stencils_armv6_O3.o -arm-none-eabi-gcc -fno-pic -ffunction-sections -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -c $SRC -O3 -o build/stencils/stencils.o -arm-none-eabi-ld -r build/stencils/stencils.o build/musl/musl_objects_armv7.o $LIBGCC -o $DEST/stencils_armv7_O3.o -arm-none-eabi-objdump -d -x $DEST/stencils_armv7_O3.o > build/stencils/stencils_armv7_O3.asm -arm-linux-gnueabihf-gcc -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static -Wall -Wextra -Wconversion -Wsign-conversion -Wshadow -Wstrict-overflow -O3 -DENABLE_LOGGING src/coparun/runmem.c src/coparun/coparun.c src/coparun/mem_man.c -o build/runner/coparun-armv7 + arm-none-eabi-objdump -d -x \ + $DEST/stencils_armv6_O3.o \ + > build/stencils/stencils_armv6_O3.asm + + arm-linux-gnueabihf-gcc \ + -march=armv6 -mfpu=vfp -mfloat-abi=hard -marm -static \ + -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + src/coparun/runmem.c \ + src/coparun/coparun.c \ + src/coparun/mem_man.c \ + -o build/runner/coparun-armv6 +fi + +####################################### +# ARM v7 +####################################### +if [[ "$ARCH" == "arm-v7" || "$ARCH" == "all" ]]; then + echo "--------------arm-v7 32 bit----------------" + + LIBGCC=$(arm-none-eabi-gcc -print-libgcc-file-name) + + arm-none-eabi-gcc -fno-pic -ffunction-sections \ + -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm \ + -c $SRC -O3 -o build/stencils/stencils.o + + arm-none-eabi-ld -r \ + build/stencils/stencils.o \ + build/musl/musl_objects_armv7.o \ + $LIBGCC \ + -o $DEST/stencils_armv7_O3.o + + arm-none-eabi-objdump -d -x \ + $DEST/stencils_armv7_O3.o \ + > build/stencils/stencils_armv7_O3.asm + + arm-linux-gnueabihf-gcc \ + -march=armv7-a -mfpu=neon-vfpv3 -mfloat-abi=hard -marm -static \ + -Wall -Wextra -Wconversion -Wsign-conversion \ + -Wshadow -Wstrict-overflow -O3 \ + -DENABLE_LOGGING \ + src/coparun/runmem.c \ + src/coparun/coparun.c \ + src/coparun/mem_man.c \ + -o build/runner/coparun-armv7 +fi diff --git a/tools/make_example.py b/tools/make_example.py index 57c203c..abac7ff 100644 --- a/tools/make_example.py +++ b/tools/make_example.py @@ -1,5 +1,5 @@ from copapy import value -from copapy.backend import Write, compile_to_dag, stencil_db_from_package +from copapy.backend import Store, compile_to_dag, stencil_db_from_package from copapy._binwrite import Command input = value(9.0) @@ -8,7 +8,7 @@ result = input ** 2 / 3.3 + 5 arch = 'native' sdb = stencil_db_from_package(arch) -dw, _ = compile_to_dag([Write(result)], sdb) +dw, _ = compile_to_dag([Store(result)], sdb) # Instruct runner to dump patched code to a file: dw.write_com(Command.DUMP_CODE)