Merge pull request #37 from Nonannet/dev

Dev
2026-06-17 10:29:54 +02:00 · 2026-06-17 10:29:54 +02:00 · 4483757b12
parent 345c89dd1c 5c21bf2287
commit 4483757b12
10 changed files with 202 additions and 51 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -299,7 +299,7 @@ jobs:
  build-windows:
    needs: [build_stencils]
-    runs-on: windows-latest
+    runs-on: windows-2022
    strategy:
      matrix:
--- a/README.md
+++ b/README.md
@ -39,7 +39,7 @@ Despite missing SIMD-optimization, benchmark performance shows promising numbers
 For the benchmark (`tests/benchmark.py`) the timing of 30000 iterations for calculating the therm `sum((v1 + i) @ v2 for i in range(10))` where measured on an Ryzen 5 3400G. Where the vectors `v1` and `v2` both have a lengths of `v_size` which was varied according to the chart from 10 to 500. For the NumPy case the "i in range(10)" loop was vectorized like this: `np.sum((v1 + i) @ v2)` with i being here a `NDArray` with a dimension of `[10, 1]`. The number of calculated scalar operations is the same for both contenders. Obviously Copapy profits from less overheat by calling a single function from python per iteration, where the NumPy variant requires 3. Interestingly there is no indication visible in the chart that for increasing `v_size` the calling overhead for NumPy will be compensated by using faster SIMD instructions. It is to note that in this benchmark the Copapy case does not move any data between python and the compiled code.
-Furthermore for many applications copypy will benefit by reducing the actual number of operations significantly compared to a NumPy implementation, by precompute constant values know at compile time and benefiting from sparcity. Multiplying by zero (e.g. in a diagonal matrix) eliminate a hole branch in the computation graph. Operations without effect, like multiplications by 1 oder additions with zero gets eliminated at compile time.
+Furthermore for many applications Copapy performance will benefit by reducing the actual number of operations significantly compared to a NumPy implementation, by precompute constant values know at compile time and benefiting from sparcity. Multiplying by zero (e.g. in a diagonal matrix) eliminate a hole branch in the computation graph. Operations without effect, like multiplications by 1 oder additions with zero gets eliminated at compile time.
 For Testing and using Copapy to speed up computations in conventional Python programs there is also the `@cp.jit` decorator available, to compile functions on first use and cache the compiled version for later calls:
@ -188,7 +188,7 @@ For more complex operations - where inlining is less useful - stencils call a no
 Unlike stencils, non-stencil functions like `sinf` are not stripped and do not need to be tail-call-optimizable. These functions can be provided as C code and compiled together with the stencils or can be object files like in the case of `sinf` compiled from C and assembly code and merged into the stencil object files. Math functions like `sinf` are currently provided by the MUSL C library, with architecture-specific optimizations. 
-Non-stencil functions and constants are stored together with the stencils in an ELF object file for each supported CPU architecture. The required non-stencil functions and constants are bundled during compilation. The compiler includes only the data and code required for a specific Copapy program.
+Non-stencil functions and constants used by them are stored together with the stencils in ELF object files for each supported CPU architecture. The required non-stencil functions and constants are bundled during compilation. The compiler includes only the data and code required for a specific Copapy program.
 The Copapy compilation process is independent of the actual instruction set. It relies purely on relocation entries and symbol metadata from the ELF file generated by the C compiler.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -30,7 +30,7 @@ where = ["src"]
 copapy = ["obj/*.o", "py.typed"]
 [tool.setuptools_scm]
-version_scheme = "post-release"
+version_scheme = "no-guess-dev"
 local_scheme = "no-local-version"
 tag_regex = "^v(?P<version>\\d+\\.\\d+\\.\\d+(?:-beta)?)$"
 fallback_version = "0.0.0"
--- a/src/copapy/init.py
+++ b/src/copapy/init.py
@ -37,8 +37,9 @@ from ._target import Target, jit
 from ._basic_types import NumLike, value, generic_sdb, iif
 from ._vectors import vector, distance, scalar_projection, angle_between, rotate_vector, vector_projection
 from ._quaternion import quaternion
-from ._tensors import tensor, zeros, ones, arange, eye, identity, diagonal
+from ._tensors import tensor, zeros, ones, arange, eye, identity, diagonal, concat
-from ._math import sqrt, abs, sign, sin, cos, tan, asin, acos, atan, atan2, log, exp, pow, get_42, clamp, min, max, relu
+from ._math import sqrt, abs, sign, sin, cos, tan, asin, acos, atan, atan2, log, exp, pow, get_42, clamp, minimum, maximum
 from ._nn import relu, sigmoid
 from ._autograd import grad
 from ._tensors import tensor as matrix
 from ._version import __version__  # Run "pip install -e ." to generate _version.py
@ -74,8 +75,8 @@ __all__ = [
    "pow",
    "get_42",
    "clamp",
-    "min",
+    "minimum",
-    "max",
+    "maximum",
    "relu",
    "distance",
    "scalar_projection",
@ -85,5 +86,7 @@ __all__ = [
    "quaternion",
    "grad",
    "eye",
    "concat",
    "sigmoid",
    "jit"
 ]
--- a/src/copapy/_math.py
+++ b/src/copapy/_math.py
@ -393,12 +393,20 @@ def clamp(x: U | value[U] | vector[U], min_value: U | value[U], max_value:  U |
@overload
-def min(x: value[U], y: U | value[U]) -> value[U]: ...
+def minimum(x: U, y: U) -> U: ...
@overload
-def min(x: U | value[U], y: value[U]) -> value[U]: ...
+def minimum(x: value[U], y: U | value[U]) -> value[U]: ...
@overload
-def min(x: U, y: U) -> U: ...
+def minimum(x: U | value[U], y: value[U]) -> value[U]: ...
-def min(x: U | value[U], y: U | value[U]) -> Any:
+@overload
 def minimum(x: vector[U], y: U | value[U] | vector[U]) -> vector[U]: ...
@overload
 def minimum(x: U | value[U] | vector[U], y: vector[U]) -> vector[U]: ...
@overload
 def minimum(x: tensor[U], y: U | value[U] | tensor[U]) -> tensor[U]: ...
@overload
 def minimum(x: U | value[U] | tensor[U], y: tensor[U]) -> tensor[U]: ...
 def minimum(x: U | value[U] | vector[U] | tensor[U], y: U | value[U] | vector[U] | tensor[U]) -> Any:
    """Minimum function to get the smaller of two values.
    Arguments:
@ -408,22 +416,30 @@ def min(x: U | value[U], y: U | value[U]) -> Any:
    Returns:
        Minimum of x and y
    """
-    if isinstance(x, value):
+    if isinstance(x, tensor) or isinstance(y, tensor):
        return _map2_tensor(x, y, minimum)
    if isinstance(x, vector) or isinstance(y, vector):
        return _map2_vector(x, y, minimum)
    if isinstance(x, value) or isinstance(y, value):
        return add_op('min', [x, y])
    if isinstance(x, tensor):
        return _map2_tensor(x, y, min)
    if isinstance(x, vector):
        return _map2_vector(x, y, min)
    return x if x < y else y
@overload
-def max(x: value[U], y: U | value[U]) -> value[U]: ...
+def maximum(x: U, y: U) -> U: ...
@overload
-def max(x: U | value[U], y: value[U]) -> value[U]: ...
+def maximum(x: value[U], y: U | value[U]) -> value[U]: ...
@overload
-def max(x: U, y: U) -> U: ...
+def maximum(x: U | value[U], y: value[U]) -> value[U]: ...
-def max(x: U | value[U], y: U | value[U]) -> Any:
+@overload
 def maximum(x: vector[U], y: U | value[U] | vector[U]) -> vector[U]: ...
@overload
 def maximum(x: U | value[U] | vector[U], y: vector[U]) -> vector[U]: ...
@overload
 def maximum(x: tensor[U], y: U | value[U] | tensor[U]) -> tensor[U]: ...
@overload
 def maximum(x: U | value[U] | tensor[U], y: tensor[U]) -> tensor[U]: ...
 def maximum(x: U | value[U] | vector[U] | tensor[U], y: U | value[U] | vector[U] | tensor[U]) -> Any:
    """Maximum function to get the larger of two values.
    Arguments:
@ -433,12 +449,12 @@ def max(x: U | value[U], y: U | value[U]) -> Any:
    Returns:
        Maximum of x and y
    """
-    if isinstance(x, value):
+    if isinstance(x, tensor) or isinstance(y, tensor):
        return _map2_tensor(x, y, maximum)
    if isinstance(x, vector) or isinstance(y, vector):
        return _map2_vector(x, y, maximum)
    if isinstance(x, value) or isinstance(y, value):
        return add_op('max', [x, y])
    if isinstance(x, tensor):
        return _map2_tensor(x, y, max)
    if isinstance(x, vector):
        return _map2_vector(x, y, max)
    return x if x > y else y
@ -470,20 +486,6 @@ def lerp(v1: U | value[U] | vector[U], v2: U | value[U] | vector[U], t:  unifloa
    return v1 * (1 - t) + v2 * t
@overload
 def relu(x: U) -> U: ...
@overload
 def relu(x: value[U]) -> value[U]: ...
@overload
 def relu(x: vector[U]) -> vector[U]: ...
@overload
 def relu(x: tensor[U]) -> tensor[U]: ...
 def relu(x: U | value[U] | vector[U] | tensor[U]) -> Any:
    """Returns x for x > 0 and otherwise 0."""
    ret = x * (x > 0)
    return ret
 def _map2_vector(self: VecNumLike, other: VecNumLike, func: Callable[[Any, Any], value[U] | U]) -> vector[U]:
    """Applies a function to each element of the vector and a second vector or scalar."""
    if isinstance(self, vector) and isinstance(other, vector):
@ -499,9 +501,9 @@ def _map2_vector(self: VecNumLike, other: VecNumLike, func: Callable[[Any, Any],
 def _map2_tensor(self: TensorNumLike, other: TensorNumLike, func: Callable[[Any, Any], value[U] | U]) -> tensor[U]:
    """Applies a function to each element of the vector and a second vector or scalar."""
    if isinstance(self, vector):
-        self = tensor(self.values, (len(self.values),))
+        self = tensor(self)
    if isinstance(other, vector):
-        other = tensor(other.values, (len(other.values),))
+        other = tensor(other)
    if isinstance(self, tensor) and isinstance(other, tensor):
        assert self.shape == other.shape, "Tensors must have the same shape"
        return tensor([func(x, y) for x, y in zip(self.values, other.values)], self.shape)
--- a/src/copapy/_nn.py
+++ b/src/copapy/_nn.py
@ -0,0 +1,34 @@
 from . import vector
 from . import tensor 
 from . import value
 from typing import TypeVar, Any, overload
 import copapy as cp
 U = TypeVar("U", int, float)
@overload
 def relu(x: U) -> U: ...
@overload
 def relu(x: value[U]) -> value[U]: ...
@overload
 def relu(x: vector[U]) -> vector[U]: ...
@overload
 def relu(x: tensor[U]) -> tensor[U]: ...
 def relu(x: U | value[U] | vector[U] | tensor[U]) -> Any:
    """Returns x for x > 0 and otherwise 0."""
    ret = x * (x > 0)
    return ret
@overload
 def sigmoid(x: U) -> float: ...
@overload
 def sigmoid(x: value[U]) -> value[float]: ...
@overload
 def sigmoid(x: vector[U]) -> vector[float]: ...
@overload
 def sigmoid(x: tensor[U]) -> tensor[float]: ...
 def sigmoid(x: U | value[U] | vector[U] | tensor[U]) -> Any:
    """Sigmoid function to map any value to the range (0, 1)."""
    return 1 / (1 + cp.exp(-x))
--- a/src/copapy/_tensors.py
+++ b/src/copapy/_tensors.py
@ -32,7 +32,7 @@ class tensor(ArrayType[TNum]):
            self.shape: tuple[int, ...] = tuple(shape)
            assert (isinstance(values, Sequence) and
                    any(isinstance(v, (value, int, float)) for v in values)), \
-                    "Values must be a sequence of values if shape is provided"
+                    "Values must be a sequence of scalars if shape is provided"
            self.values: tuple[TNum | value[TNum], ...] = tuple(v for v in values if not isinstance(v, Sequence))
            self.ndim: int = len(shape)
        elif isinstance(values, (int, float)):
@ -856,6 +856,84 @@ def ones(shape: Sequence[int] | int) -> tensor[int]:
    return tensor([1] * size, tuple(shape))
@overload
 def concat(tensors: Sequence[vector[U]]) -> vector[U]: ...
@overload
 def concat(tensors: Sequence[tensor[U]], axis: int = 0) -> tensor[U]: ...
 def concat(tensors: Sequence[tensor[U] | vector[U]], axis: int = 0) -> tensor[U] | vector[U]:
    """Concatenate tensors or vectors along a specified axis.
    Arguments:
        tensors: Tensors or vectors to concatenate. Must have the same shape except for the specified axis.
        axis: Axis along which to concatenate (default 0).
    Returns:
        A new tensor or vector resulting from concatenation.
    """
    assert tensors, "At least one tensor must be provided"
    # Check if all inputs are vectors
    all_vectors = all(isinstance(item, vector) for item in tensors)
    # Convert vectors to tensors for uniform processing
    tensor_list: list[tensor[U]] = []
    for item in tensors:
        if isinstance(item, vector):
            tensor_list.append(tensor(item.values, item.shape))
        else:
            tensor_list.append(item)
    first_shape = tensor_list[0].shape
    ndim = len(first_shape)
    if axis < 0:
        axis += ndim
    assert 0 <= axis < ndim
    # Shape checks
    for t in tensor_list:
        assert len(t.shape) == ndim
        for i in range(ndim):
            if i != axis:
                assert t.shape[i] == first_shape[i]
    # Output shape
    new_shape = list(first_shape)
    new_shape[axis] = sum(t.shape[axis] for t in tensor_list)
    # Compute block sizes
    inner_block: int = 1
    for s in first_shape[axis + 1:]:
        inner_block *= s
    outer_block: int = 1
    for s in first_shape[:axis]:
        outer_block *= s
    new_values: list[value[U] | U] = []
    for outer in range(outer_block):
        for t in tensor_list:
            axis_size = t.shape[axis]
            start = outer * axis_size * inner_block
            end = start + axis_size * inner_block
            new_values.extend(t.values[start:end])
    result_tensor = tensor(new_values, tuple(new_shape))
    # If all inputs were vectors and result is 1D, return as vector
    if all_vectors and result_tensor.ndim == 1:
        return vector(result_tensor.values)
    return result_tensor
 def flatten(t: tensor[U]) -> tensor[U]:
    """Flatten a tensor to a 1D tensor."""
    return t.flatten()
 def arange(start: int | float, stop: int | float | None = None,
           step: int | float = 1) -> tensor[int] | tensor[float]:
    """Create a tensor with evenly spaced values.
@ -927,10 +1005,10 @@ def identity(size: int) -> tensor[int]:
@overload
-def diagonal(vec: 'tensor[int] | vector[int]') -> tensor[int]: ...
+def diagonal(vec: tensor[int] | vector[int]) -> tensor[int]: ...
@overload
-def diagonal(vec: 'tensor[float] | vector[float]') -> tensor[float]: ...
+def diagonal(vec: tensor[float] | vector[float]) -> tensor[float]: ...
-def diagonal(vec: 'tensor[Any] | vector[Any]') -> 'tensor[Any]':
+def diagonal(vec: tensor[Any] | vector[Any]) -> tensor[Any]:
    """Create a diagonal tensor from a 1D tensor.
    Arguments:
--- a/tests/test_math.py
+++ b/tests/test_math.py
@ -23,8 +23,8 @@ def test_fine():
                cp.abs(-c_f),
                cp.sign(c_i),
                cp.sign(-c_f),
-                cp.min(c_i, 5),
+                cp.minimum(c_i, 5),
-                cp.max(c_f, 5))
+                cp.maximum(c_f, 5))
    re2_test = (a_f ** 2,
                a_i ** -1,
@ -39,8 +39,8 @@ def test_fine():
                cp.abs(-a_f),
                cp.sign(a_i),
                cp.sign(-a_f),
-                cp.min(a_i, 5),
+                cp.minimum(a_i, 5),
-                cp.max(a_f, 5))
+                cp.maximum(a_f, 5))
    ret_refe = (a_f ** 2,
                a_i ** -1,
--- a/tests/test_tensor_basic.py
+++ b/tests/test_tensor_basic.py
@ -3,6 +3,7 @@
 import copapy as cp
 def test_tensor_basic():
    # Test 1: Create a scalar tensor
    print("Test 1: Scalar tensor")
@ -225,6 +226,7 @@ def test_tensor_basic():
    assert t.ndim == 3
    print()
 def test_tensor_slicing():
    print("Test Numpy-style slicing")
    t = cp.tensor([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
@ -251,6 +253,25 @@ def test_tensor_slicing():
    assert slice4[2] == 90
    print()
 def test_tensor_concat():
    print("Test tensor concatenation")
    t1 = cp.tensor([[1, 2], [3, 4]])
    t2 = cp.tensor([[5, 6], [7, 8]])
    t3 = cp.tensor([[9, 10], [11, 12]])
    concat_0 = cp.concat([t1, t2, t3], axis=0)
    print(f"Concatenate along axis 0: shape={concat_0.shape}")
    assert concat_0.shape == (6, 2)
    assert concat_0[4, 1] == 10
    concat_1 = cp.concat([t1, t2, t3], axis=1)
    print(f"Concatenate along axis 1: shape={concat_1.shape}")
    assert concat_1.shape == (2, 6)
    assert concat_1[1, 4] == 11
    print()
 if __name__ == "__main__":
    test_tensor_basic()
    print("All tests completed!")
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@ -114,6 +114,19 @@ def test_sort_vector():
    assert ref == result
 def test_vector_concat():
    print("Test vector concatenation")
    v1 = cp.vector([1, 2, 3])
    v2 = cp.vector([4, 5])
    v3 = cp.vector([6])
    concat_vec = cp.concat([v1, v2, v3])
    print(f"Concatenate vectors: shape={concat_vec.shape}")
    assert concat_vec.shape == (6,)
    assert concat_vec[4] == 5
    print()
 if __name__ == "__main__":
    #test_vectors_init()
    #test_compiled_vectors()