From decdbeb270b903d0646e383ec89dacb8fa4466dc Mon Sep 17 00:00:00 2001
From: Nicolas <Nicolas@nonan.net>
Date: Mon, 6 Apr 2026 12:22:38 +0200
Subject: [PATCH 1/5] Changes setuptools_scm version_scheme to no-guess-next to
 get correctly named pypi releases.

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pyproject.toml b/pyproject.toml
index ae57b25..f5b21d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ where = ["src"]
 copapy = ["obj/*.o", "py.typed"]
 
 [tool.setuptools_scm]
-version_scheme = "post-release"
+version_scheme = "no-guess-next"
 local_scheme = "no-local-version"
 tag_regex = "^v(?P<version>\\d+\\.\\d+\\.\\d+(?:-beta)?)$"
 fallback_version = "0.0.0"

From d6ff9599f4edff903a2df776153471dfd89a29b7 Mon Sep 17 00:00:00 2001
From: Nicolas <Nicolas@nonan.net>
Date: Tue, 19 May 2026 20:25:05 +0200
Subject: [PATCH 2/5] setuptools_scm version_scheme set to no-guess-next

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index f5b21d5..8c1b845 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,7 +30,7 @@ where = ["src"]
 copapy = ["obj/*.o", "py.typed"]
 
 [tool.setuptools_scm]
-version_scheme = "no-guess-next"
+version_scheme = "no-guess-dev"
 local_scheme = "no-local-version"
 tag_regex = "^v(?P<version>\\d+\\.\\d+\\.\\d+(?:-beta)?)$"
 fallback_version = "0.0.0"

From 4a71db0e383353103189882d05d23fc2d6c3b43a Mon Sep 17 00:00:00 2001
From: Nicolas <Nicolas@nonan.net>
Date: Tue, 19 May 2026 20:27:42 +0200
Subject: [PATCH 3/5] concat and sigmoid function added, moved relu and sigmoid
 to _nn.py, renamed min and max to minimum and maximum like in numpy and
 updated functions to work with tensors

---
 src/copapy/__init__.py     | 13 +++---
 src/copapy/_math.py        | 70 ++++++++++++++++---------------
 src/copapy/_nn.py          | 34 +++++++++++++++
 src/copapy/_tensors.py     | 86 ++++++++++++++++++++++++++++++++++++--
 tests/test_math.py         |  8 ++--
 tests/test_tensor_basic.py | 21 ++++++++++
 tests/test_vector.py       | 13 ++++++
 7 files changed, 198 insertions(+), 47 deletions(-)
 create mode 100644 src/copapy/_nn.py

diff --git a/src/copapy/__init__.py b/src/copapy/__init__.py
index 680e6a4..808dd4e 100644
--- a/src/copapy/__init__.py
+++ b/src/copapy/__init__.py
@@ -37,8 +37,9 @@ from ._target import Target, jit
 from ._basic_types import NumLike, value, generic_sdb, iif
 from ._vectors import vector, distance, scalar_projection, angle_between, rotate_vector, vector_projection
 from ._quaternion import quaternion
-from ._tensors import tensor, zeros, ones, arange, eye, identity, diagonal
-from ._math import sqrt, abs, sign, sin, cos, tan, asin, acos, atan, atan2, log, exp, pow, get_42, clamp, min, max, relu
+from ._tensors import tensor, zeros, ones, arange, eye, identity, diagonal, concat
+from ._math import sqrt, abs, sign, sin, cos, tan, asin, acos, atan, atan2, log, exp, pow, get_42, clamp, minimum, maximum
+from ._nn import relu, sigmoid
 from ._autograd import grad
 from ._tensors import tensor as matrix
 from ._version import __version__  # Run "pip install -e ." to generate _version.py
@@ -74,16 +75,18 @@ __all__ = [
     "pow",
     "get_42",
     "clamp",
-    "min",
-    "max",
+    "minimum",
+    "maximum",
     "relu",
     "distance",
     "scalar_projection",
     "angle_between",
     "rotate_vector",
-"vector_projection",
+    "vector_projection",
     "quaternion",
     "grad",
     "eye",
+    "concat",
+    "sigmoid",
     "jit"
 ]
diff --git a/src/copapy/_math.py b/src/copapy/_math.py
index fcd14c9..4ad6204 100644
--- a/src/copapy/_math.py
+++ b/src/copapy/_math.py
@@ -393,12 +393,20 @@ def clamp(x: U | value[U] | vector[U], min_value: U | value[U], max_value:  U |
 
 
 @overload
-def min(x: value[U], y: U | value[U]) -> value[U]: ...
+def minimum(x: U, y: U) -> U: ...
 @overload
-def min(x: U | value[U], y: value[U]) -> value[U]: ...
+def minimum(x: value[U], y: U | value[U]) -> value[U]: ...
 @overload
-def min(x: U, y: U) -> U: ...
-def min(x: U | value[U], y: U | value[U]) -> Any:
+def minimum(x: U | value[U], y: value[U]) -> value[U]: ...
+@overload
+def minimum(x: vector[U], y: U | value[U] | vector[U]) -> vector[U]: ...
+@overload
+def minimum(x: U | value[U] | vector[U], y: vector[U]) -> vector[U]: ...
+@overload
+def minimum(x: tensor[U], y: U | value[U] | tensor[U]) -> tensor[U]: ...
+@overload
+def minimum(x: U | value[U] | tensor[U], y: tensor[U]) -> tensor[U]: ...
+def minimum(x: U | value[U] | vector[U] | tensor[U], y: U | value[U] | vector[U] | tensor[U]) -> Any:
     """Minimum function to get the smaller of two values.
 
     Arguments:
@@ -408,22 +416,30 @@ def min(x: U | value[U], y: U | value[U]) -> Any:
     Returns:
         Minimum of x and y
     """
-    if isinstance(x, value):
+    if isinstance(x, tensor) or isinstance(y, tensor):
+        return _map2_tensor(x, y, minimum)
+    if isinstance(x, vector) or isinstance(y, vector):
+        return _map2_vector(x, y, minimum)
+    if isinstance(x, value) or isinstance(y, value):
         return add_op('min', [x, y])
-    if isinstance(x, tensor):
-        return _map2_tensor(x, y, min)
-    if isinstance(x, vector):
-        return _map2_vector(x, y, min)
     return x if x < y else y
 
 
 @overload
-def max(x: value[U], y: U | value[U]) -> value[U]: ...
+def maximum(x: U, y: U) -> U: ...
 @overload
-def max(x: U | value[U], y: value[U]) -> value[U]: ...
+def maximum(x: value[U], y: U | value[U]) -> value[U]: ...
 @overload
-def max(x: U, y: U) -> U: ...
-def max(x: U | value[U], y: U | value[U]) -> Any:
+def maximum(x: U | value[U], y: value[U]) -> value[U]: ...
+@overload
+def maximum(x: vector[U], y: U | value[U] | vector[U]) -> vector[U]: ...
+@overload
+def maximum(x: U | value[U] | vector[U], y: vector[U]) -> vector[U]: ...
+@overload
+def maximum(x: tensor[U], y: U | value[U] | tensor[U]) -> tensor[U]: ...
+@overload
+def maximum(x: U | value[U] | tensor[U], y: tensor[U]) -> tensor[U]: ...
+def maximum(x: U | value[U] | vector[U] | tensor[U], y: U | value[U] | vector[U] | tensor[U]) -> Any:
     """Maximum function to get the larger of two values.
 
     Arguments:
@@ -433,12 +449,12 @@ def max(x: U | value[U], y: U | value[U]) -> Any:
     Returns:
         Maximum of x and y
     """
-    if isinstance(x, value):
+    if isinstance(x, tensor) or isinstance(y, tensor):
+        return _map2_tensor(x, y, maximum)
+    if isinstance(x, vector) or isinstance(y, vector):
+        return _map2_vector(x, y, maximum)
+    if isinstance(x, value) or isinstance(y, value):
         return add_op('max', [x, y])
-    if isinstance(x, tensor):
-        return _map2_tensor(x, y, max)
-    if isinstance(x, vector):
-        return _map2_vector(x, y, max)
     return x if x > y else y
 
 
@@ -470,20 +486,6 @@ def lerp(v1: U | value[U] | vector[U], v2: U | value[U] | vector[U], t:  unifloa
     return v1 * (1 - t) + v2 * t
 
 
-@overload
-def relu(x: U) -> U: ...
-@overload
-def relu(x: value[U]) -> value[U]: ...
-@overload
-def relu(x: vector[U]) -> vector[U]: ...
-@overload
-def relu(x: tensor[U]) -> tensor[U]: ...
-def relu(x: U | value[U] | vector[U] | tensor[U]) -> Any:
-    """Returns x for x > 0 and otherwise 0."""
-    ret = x * (x > 0)
-    return ret
-
-
 def _map2_vector(self: VecNumLike, other: VecNumLike, func: Callable[[Any, Any], value[U] | U]) -> vector[U]:
     """Applies a function to each element of the vector and a second vector or scalar."""
     if isinstance(self, vector) and isinstance(other, vector):
@@ -499,9 +501,9 @@ def _map2_vector(self: VecNumLike, other: VecNumLike, func: Callable[[Any, Any],
 def _map2_tensor(self: TensorNumLike, other: TensorNumLike, func: Callable[[Any, Any], value[U] | U]) -> tensor[U]:
     """Applies a function to each element of the vector and a second vector or scalar."""
     if isinstance(self, vector):
-        self = tensor(self.values, (len(self.values),))
+        self = tensor(self)
     if isinstance(other, vector):
-        other = tensor(other.values, (len(other.values),))
+        other = tensor(other)
     if isinstance(self, tensor) and isinstance(other, tensor):
         assert self.shape == other.shape, "Tensors must have the same shape"
         return tensor([func(x, y) for x, y in zip(self.values, other.values)], self.shape)
diff --git a/src/copapy/_nn.py b/src/copapy/_nn.py
new file mode 100644
index 0000000..7ff1c96
--- /dev/null
+++ b/src/copapy/_nn.py
@@ -0,0 +1,34 @@
+from . import vector
+from . import tensor 
+from . import value
+from typing import TypeVar, Any, overload
+import copapy as cp
+
+U = TypeVar("U", int, float)
+
+
+@overload
+def relu(x: U) -> U: ...
+@overload
+def relu(x: value[U]) -> value[U]: ...
+@overload
+def relu(x: vector[U]) -> vector[U]: ...
+@overload
+def relu(x: tensor[U]) -> tensor[U]: ...
+def relu(x: U | value[U] | vector[U] | tensor[U]) -> Any:
+    """Returns x for x > 0 and otherwise 0."""
+    ret = x * (x > 0)
+    return ret
+
+
+@overload
+def sigmoid(x: U) -> float: ...
+@overload
+def sigmoid(x: value[U]) -> value[float]: ...
+@overload
+def sigmoid(x: vector[U]) -> vector[float]: ...
+@overload
+def sigmoid(x: tensor[U]) -> tensor[float]: ...
+def sigmoid(x: U | value[U] | vector[U] | tensor[U]) -> Any:
+    """Sigmoid function to map any value to the range (0, 1)."""
+    return 1 / (1 + cp.exp(-x))
diff --git a/src/copapy/_tensors.py b/src/copapy/_tensors.py
index 740d302..522b7e4 100644
--- a/src/copapy/_tensors.py
+++ b/src/copapy/_tensors.py
@@ -32,7 +32,7 @@ class tensor(ArrayType[TNum]):
             self.shape: tuple[int, ...] = tuple(shape)
             assert (isinstance(values, Sequence) and
                     any(isinstance(v, (value, int, float)) for v in values)), \
-                    "Values must be a sequence of values if shape is provided"
+                    "Values must be a sequence of scalars if shape is provided"
             self.values: tuple[TNum | value[TNum], ...] = tuple(v for v in values if not isinstance(v, Sequence))
             self.ndim: int = len(shape)
         elif isinstance(values, (int, float)):
@@ -856,6 +856,84 @@ def ones(shape: Sequence[int] | int) -> tensor[int]:
     return tensor([1] * size, tuple(shape))
 
 
+@overload
+def concat(tensors: Sequence[vector[U]]) -> vector[U]: ...
+@overload
+def concat(tensors: Sequence[tensor[U]], axis: int = 0) -> tensor[U]: ...
+def concat(tensors: Sequence[tensor[U] | vector[U]], axis: int = 0) -> tensor[U] | vector[U]:
+    """Concatenate tensors or vectors along a specified axis.
+
+    Arguments:
+        tensors: Tensors or vectors to concatenate. Must have the same shape except for the specified axis.
+        axis: Axis along which to concatenate (default 0).
+
+    Returns:
+        A new tensor or vector resulting from concatenation.
+    """
+    assert tensors, "At least one tensor must be provided"
+
+    # Check if all inputs are vectors
+    all_vectors = all(isinstance(item, vector) for item in tensors)
+
+    # Convert vectors to tensors for uniform processing
+    tensor_list: list[tensor[U]] = []
+    for item in tensors:
+        if isinstance(item, vector):
+            tensor_list.append(tensor(item.values, item.shape))
+        else:
+            tensor_list.append(item)
+
+    first_shape = tensor_list[0].shape
+    ndim = len(first_shape)
+
+    if axis < 0:
+        axis += ndim
+
+    assert 0 <= axis < ndim
+
+    # Shape checks
+    for t in tensor_list:
+        assert len(t.shape) == ndim
+        for i in range(ndim):
+            if i != axis:
+                assert t.shape[i] == first_shape[i]
+
+    # Output shape
+    new_shape = list(first_shape)
+    new_shape[axis] = sum(t.shape[axis] for t in tensor_list)
+
+    # Compute block sizes
+    inner_block: int = 1
+    for s in first_shape[axis + 1:]:
+        inner_block *= s
+
+    outer_block: int = 1
+    for s in first_shape[:axis]:
+        outer_block *= s
+
+    new_values: list[value[U] | U] = []
+
+    for outer in range(outer_block):
+        for t in tensor_list:
+            axis_size = t.shape[axis]
+            start = outer * axis_size * inner_block
+            end = start + axis_size * inner_block
+            new_values.extend(t.values[start:end])
+
+    result_tensor = tensor(new_values, tuple(new_shape))
+
+    # If all inputs were vectors and result is 1D, return as vector
+    if all_vectors and result_tensor.ndim == 1:
+        return vector(result_tensor.values)
+
+    return result_tensor
+
+
+def flatten(t: tensor[U]) -> tensor[U]:
+    """Flatten a tensor to a 1D tensor."""
+    return t.flatten()
+
+
 def arange(start: int | float, stop: int | float | None = None,
            step: int | float = 1) -> tensor[int] | tensor[float]:
     """Create a tensor with evenly spaced values.
@@ -927,10 +1005,10 @@ def identity(size: int) -> tensor[int]:
 
 
 @overload
-def diagonal(vec: 'tensor[int] | vector[int]') -> tensor[int]: ...
+def diagonal(vec: tensor[int] | vector[int]) -> tensor[int]: ...
 @overload
-def diagonal(vec: 'tensor[float] | vector[float]') -> tensor[float]: ...
-def diagonal(vec: 'tensor[Any] | vector[Any]') -> 'tensor[Any]':
+def diagonal(vec: tensor[float] | vector[float]) -> tensor[float]: ...
+def diagonal(vec: tensor[Any] | vector[Any]) -> tensor[Any]:
     """Create a diagonal tensor from a 1D tensor.
 
     Arguments:
diff --git a/tests/test_math.py b/tests/test_math.py
index a6361ce..e5d83a5 100644
--- a/tests/test_math.py
+++ b/tests/test_math.py
@@ -23,8 +23,8 @@ def test_fine():
                 cp.abs(-c_f),
                 cp.sign(c_i),
                 cp.sign(-c_f),
-                cp.min(c_i, 5),
-                cp.max(c_f, 5))
+                cp.minimum(c_i, 5),
+                cp.maximum(c_f, 5))
 
     re2_test = (a_f ** 2,
                 a_i ** -1,
@@ -39,8 +39,8 @@ def test_fine():
                 cp.abs(-a_f),
                 cp.sign(a_i),
                 cp.sign(-a_f),
-                cp.min(a_i, 5),
-                cp.max(a_f, 5))
+                cp.minimum(a_i, 5),
+                cp.maximum(a_f, 5))
 
     ret_refe = (a_f ** 2,
                 a_i ** -1,
diff --git a/tests/test_tensor_basic.py b/tests/test_tensor_basic.py
index 9ddf85e..cdde7ef 100644
--- a/tests/test_tensor_basic.py
+++ b/tests/test_tensor_basic.py
@@ -3,6 +3,7 @@
 
 import copapy as cp
 
+
 def test_tensor_basic():
     # Test 1: Create a scalar tensor
     print("Test 1: Scalar tensor")
@@ -225,6 +226,7 @@ def test_tensor_basic():
     assert t.ndim == 3
     print()
 
+
 def test_tensor_slicing():
     print("Test Numpy-style slicing")
     t = cp.tensor([[10, 20, 30], [40, 50, 60], [70, 80, 90]])
@@ -251,6 +253,25 @@ def test_tensor_slicing():
     assert slice4[2] == 90
     print()
 
+
+def test_tensor_concat():
+    print("Test tensor concatenation")
+    t1 = cp.tensor([[1, 2], [3, 4]])
+    t2 = cp.tensor([[5, 6], [7, 8]])
+    t3 = cp.tensor([[9, 10], [11, 12]])
+
+    concat_0 = cp.concat([t1, t2, t3], axis=0)
+    print(f"Concatenate along axis 0: shape={concat_0.shape}")
+    assert concat_0.shape == (6, 2)
+    assert concat_0[4, 1] == 10
+
+    concat_1 = cp.concat([t1, t2, t3], axis=1)
+    print(f"Concatenate along axis 1: shape={concat_1.shape}")
+    assert concat_1.shape == (2, 6)
+    assert concat_1[1, 4] == 11
+    print()
+
+
 if __name__ == "__main__":
     test_tensor_basic()
     print("All tests completed!")
diff --git a/tests/test_vector.py b/tests/test_vector.py
index b7ce95c..849da43 100644
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@@ -114,6 +114,19 @@ def test_sort_vector():
     assert ref == result
 
 
+def test_vector_concat():
+    print("Test vector concatenation")
+    v1 = cp.vector([1, 2, 3])
+    v2 = cp.vector([4, 5])
+    v3 = cp.vector([6])
+
+    concat_vec = cp.concat([v1, v2, v3])
+    print(f"Concatenate vectors: shape={concat_vec.shape}")
+    assert concat_vec.shape == (6,)
+    assert concat_vec[4] == 5
+    print()
+
+
 if __name__ == "__main__":
     #test_vectors_init()
     #test_compiled_vectors()

From bbb8eca67839cb83d9ec31ad2dff8f6563106674 Mon Sep 17 00:00:00 2001
From: Nicolas <Nicolas@nonan.net>
Date: Wed, 17 Jun 2026 09:48:29 +0200
Subject: [PATCH 4/5] some fixes in the readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index e68f4c5..eae753b 100644
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ Despite missing SIMD-optimization, benchmark performance shows promising numbers
 
 For the benchmark (`tests/benchmark.py`) the timing of 30000 iterations for calculating the therm `sum((v1 + i) @ v2 for i in range(10))` where measured on an Ryzen 5 3400G. Where the vectors `v1` and `v2` both have a lengths of `v_size` which was varied according to the chart from 10 to 500. For the NumPy case the "i in range(10)" loop was vectorized like this: `np.sum((v1 + i) @ v2)` with i being here a `NDArray` with a dimension of `[10, 1]`. The number of calculated scalar operations is the same for both contenders. Obviously Copapy profits from less overheat by calling a single function from python per iteration, where the NumPy variant requires 3. Interestingly there is no indication visible in the chart that for increasing `v_size` the calling overhead for NumPy will be compensated by using faster SIMD instructions. It is to note that in this benchmark the Copapy case does not move any data between python and the compiled code.
 
-Furthermore for many applications copypy will benefit by reducing the actual number of operations significantly compared to a NumPy implementation, by precompute constant values know at compile time and benefiting from sparcity. Multiplying by zero (e.g. in a diagonal matrix) eliminate a hole branch in the computation graph. Operations without effect, like multiplications by 1 oder additions with zero gets eliminated at compile time.
+Furthermore for many applications Copapy performance will benefit by reducing the actual number of operations significantly compared to a NumPy implementation, by precompute constant values know at compile time and benefiting from sparcity. Multiplying by zero (e.g. in a diagonal matrix) eliminate a hole branch in the computation graph. Operations without effect, like multiplications by 1 oder additions with zero gets eliminated at compile time.
 
 For Testing and using Copapy to speed up computations in conventional Python programs there is also the `@cp.jit` decorator available, to compile functions on first use and cache the compiled version for later calls:
 
@@ -188,7 +188,7 @@ For more complex operations - where inlining is less useful - stencils call a no
 
 Unlike stencils, non-stencil functions like `sinf` are not stripped and do not need to be tail-call-optimizable. These functions can be provided as C code and compiled together with the stencils or can be object files like in the case of `sinf` compiled from C and assembly code and merged into the stencil object files. Math functions like `sinf` are currently provided by the MUSL C library, with architecture-specific optimizations. 
 
-Non-stencil functions and constants are stored together with the stencils in an ELF object file for each supported CPU architecture. The required non-stencil functions and constants are bundled during compilation. The compiler includes only the data and code required for a specific Copapy program.
+Non-stencil functions and constants used by them are stored together with the stencils in ELF object files for each supported CPU architecture. The required non-stencil functions and constants are bundled during compilation. The compiler includes only the data and code required for a specific Copapy program.
 
 The Copapy compilation process is independent of the actual instruction set. It relies purely on relocation entries and symbol metadata from the ELF file generated by the C compiler.
 

From 5c21bf22872138ab8040ce02d5812b1131f7e6fe Mon Sep 17 00:00:00 2001
From: Nicolas <Nicolas@nonan.net>
Date: Wed, 17 Jun 2026 10:13:58 +0200
Subject: [PATCH 5/5] Image version for the windows image set to windows-2022
 to fix the issue with the path of VsDevCmd.bat for switching the architecture

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5b91bb1..9228d31 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -299,7 +299,7 @@ jobs:
 
   build-windows:
     needs: [build_stencils]
-    runs-on: windows-latest
+    runs-on: windows-2022
 
     strategy:
       matrix: