general tensor type introduced replacing "matrix"

2025-12-30 17:58:34 +01:00 · 2025-12-30 17:58:34 +01:00 · 0d0e675207
parent 27c40e036b
commit 0d0e675207
7 changed files with 1018 additions and 411 deletions
--- a/src/copapy/init.py
+++ b/src/copapy/init.py
@ -36,9 +36,11 @@ Example usage:
 from ._target import Target, jit
 from ._basic_types import NumLike, value, generic_sdb, iif
 from ._vectors import vector, distance, scalar_projection, angle_between, rotate_vector, vector_projection
-from ._matrices import matrix, identity, zeros, ones, diagonal, eye
+from ._tensors import tensor, zeros, ones, arange, eye, identity, diagonal
 from ._math import sqrt, abs, sign, sin, cos, tan, asin, acos, atan, atan2, log, exp, pow, get_42, clamp, min, max, relu
 from ._autograd import grad
 from ._tensors import tensor as matrix
 __all__ = [
    "Target",
@ -47,11 +49,13 @@ __all__ = [
    "generic_sdb",
    "iif",
    "vector",
    "tensor",
    "matrix",
    "identity",
    "zeros",
    "ones",
    "diagonal",
    "arange",
    "sqrt",
    "abs",
    "sin",
--- a/src/copapy/_autograd.py
+++ b/src/copapy/_autograd.py
@ -1,4 +1,4 @@
-from . import value, vector, matrix
+from . import value, vector, tensor
 import copapy.backend as cpb
 from typing import Any, Sequence, overload
 import copapy as cp
@ -10,10 +10,10 @@ def grad(x: Any, y: value[Any]) -> unifloat: ...
@overload
 def grad(x: Any, y: vector[Any]) -> vector[float]: ...
@overload
-def grad(x: Any, y: Sequence[value[Any]]) -> list[unifloat]: ...
+def grad(x: Any, y: tensor[Any]) -> tensor[float]: ...
@overload
-def grad(x: Any, y: matrix[Any]) -> matrix[float]: ...
+def grad(x: Any, y: Sequence[value[Any]]) -> list[unifloat]: ...
-def grad(x: Any, y: value[Any] | Sequence[value[Any]] | vector[Any] | matrix[Any]) -> Any:
+def grad(x: Any, y: value[Any] | Sequence[value[Any]] | vector[Any] | tensor[Any]) -> Any:
    """Returns the partial derivative dx/dy where x needs to be a scalar
    and y might be a scalar, a list of scalars, a vector or matrix. It
    uses automatic differentiation in reverse-mode.
@ -29,11 +29,11 @@ def grad(x: Any, y: value[Any] | Sequence[value[Any]] | vector[Any] | matrix[Any
    if isinstance(y, value):
        y_set = {y}
-    if isinstance(y, matrix):
+    if isinstance(y, tensor):
-        y_set = {v for row in y for v in row}
+        y_set = {v.get_scalar(0) for v in y.flatten()}
    else:
        assert isinstance(y, Sequence) or isinstance(y, vector)
-        y_set = {v for v in y}
+        y_set = set(y)
    edges = cpb.get_all_dag_edges_between([x.net.source], (v.net.source for v in y_set if isinstance(v, value)))
    ordered_ops = cpb.stable_toposort(edges)
@ -121,7 +121,7 @@ def grad(x: Any, y: value[Any] | Sequence[value[Any]] | vector[Any] | matrix[Any
    if isinstance(y, value):
        return grad_dict[y.net]
    if isinstance(y, vector):
-        return vector(grad_dict[yi.net] if isinstance(yi, value) else 0.0 for yi in y)
+        return vector(grad_dict[yi.net] if isinstance(yi, value) else 0.0 for yi in y.values)
-    if isinstance(y, matrix):
+    if isinstance(y, tensor):
-        return matrix((grad_dict[yi.net] if isinstance(yi, value) else 0.0 for yi in row) for row in y)
+        return tensor([grad_dict[yi.net] if isinstance(yi, value) else 0.0 for yi in y.values], y.shape)
    return [grad_dict[yi.net] for yi in y]
--- a/src/copapy/_basic_types.py
+++ b/src/copapy/_basic_types.py
@ -1,5 +1,5 @@
 import pkgutil
-from typing import Any, Sequence, TypeVar, overload, TypeAlias, Generic, cast
+from typing import Any, Sequence, TypeVar, overload, TypeAlias, Generic, cast, Callable
 from ._stencils import stencil_database, detect_process_arch
 import copapy as cp
 from ._helper_types import TNum
@ -75,7 +75,7 @@ class Net:
    def __hash__(self) -> int:
        return self.source.node_hash
-    
+
    def __eq__(self, other: object) -> bool:
        return isinstance(other, Net) and self.source == other.source
@ -396,7 +396,7 @@ class Op(Node):
            return True
        if not isinstance(other, Op):
            return NotImplemented
-        
+
        # Traverse graph for both notes. Return false on first difference.
        # A false inequality result in seldom cases is ok, whereas a false
        # equality result leads to wrong computation results.
@ -427,10 +427,19 @@ class Op(Node):
                return False
            seen.add(key)
        return True
-    
+
    def __hash__(self) -> int:
        return self.node_hash
 # Interface for vector and tensor types
 class ArrayType(Generic[TNum]):
    def __init__(self, shape: tuple[int, ...]) -> None:
        self.shape = shape
        self.values: tuple[TNum | value[TNum], ...] = ()
    def map(self, func: Callable[[TNum | value[TNum]], Any]) -> 'ArrayType[Any]':
        return self
 def value_from_number(val: Any) -> value[Any]:
    # Create anonymous constant that can be removed during optimization
@ -454,7 +463,7 @@ def iif(expression: float | int | value[Any], true_result: TNum | value[TNum], f
 def iif(expression: Any, true_result: Any, false_result: Any) -> Any:
    """Inline if-else operation. Returns true_result if expression is non-zero,
    else returns false_result.
-    
+
    Arguments:
        expression: The condition to evaluate.
        true_result: The result if expression is non-zero.
--- a/src/copapy/_matrices.py
+++ b/src/copapy/_matrices.py
@ -1,359 +0,0 @@
 from . import value
 from ._vectors import vector
 from ._mixed import mixed_sum
 from typing import TypeVar, Iterable, Any, overload, TypeAlias, Callable, Iterator, Generic
 from ._helper_types import TNum
 MatNumLike: TypeAlias = 'matrix[int] | matrix[float] | value[int] | value[float] | int | float'
 MatIntLike: TypeAlias = 'matrix[int] | value[int] | int'
 MatFloatLike: TypeAlias = 'matrix[float] | value[float] | float'
 U = TypeVar("U", int, float)
 class matrix(Generic[TNum]):
    """Mathematical matrix class supporting basic operations and interactions with values.
    """
    def __init__(self, values: Iterable[Iterable[TNum | value[TNum]]] | vector[TNum]):
        """Create a matrix with given values.
        Arguments:
            values: iterable of iterable of constant values
        """
        if isinstance(values, vector):
            rows = [values.values]
        else:
            rows = [tuple(row) for row in values]
        if rows:
            row_len = len(rows[0])
            assert all(len(row) == row_len for row in rows), "All rows must have the same length"
        self.values: tuple[tuple[value[TNum] | TNum, ...], ...] = tuple(rows)
        self.rows = len(self.values)
        self.cols = len(self.values[0]) if self.values else 0
    def __repr__(self) -> str:
        return f"matrix({self.values})"
    def __len__(self) -> int:
        """Return the number of rows in the matrix."""
        return self.rows
    @overload
    def __getitem__(self, key: int) -> vector[TNum]: ...
    @overload
    def __getitem__(self, key: tuple[int, int]) -> value[TNum] | TNum: ...
    def __getitem__(self, key: int | tuple[int, int]) -> Any:
        """Get a row as a vector or a specific element.
            Arguments:
                key: row index or (row, col) tuple
            Returns:
                vector if row index is given, else the element at (row, col)
        """
        if isinstance(key, tuple):
            assert len(key) == 2
            return self.values[key[0]][key[1]]
        else:
            return vector(self.values[key])
    def __iter__(self) -> Iterator[tuple[value[TNum] | TNum, ...]]:
        return iter(self.values)
    def __neg__(self) -> 'matrix[TNum]':
        return matrix((-a for a in row) for row in self.values)
    @overload
    def __add__(self: 'matrix[int]', other: MatFloatLike) -> 'matrix[float]': ...
    @overload
    def __add__(self: 'matrix[int]', other: MatIntLike) -> 'matrix[int]': ...
    @overload
    def __add__(self: 'matrix[float]', other: MatNumLike) -> 'matrix[float]': ...
    @overload
    def __add__(self, other: MatNumLike) -> 'matrix[int] | matrix[float]': ...
    def __add__(self, other: MatNumLike) -> Any:
        if isinstance(other, matrix):
            assert self.rows == other.rows and self.cols == other.cols, \
                "Matrices must have the same dimensions"
            return matrix(
                tuple(a + b for a, b in zip(row1, row2))
                for row1, row2 in zip(self.values, other.values)
            )
        if isinstance(other, value):
            return matrix(
                tuple(a + other for a in row)
                for row in self.values
            )
        o = value(other)  # Make sure a single constant is allocated
        return matrix(
            tuple(a + o if isinstance(a, value) else a + other  for a in row)
            for row in self.values
        )
    @overload
    def __radd__(self: 'matrix[float]', other: MatNumLike) -> 'matrix[float]': ...
    @overload
    def __radd__(self: 'matrix[int]', other: value[int] | int) -> 'matrix[int]': ...
    def __radd__(self, other: Any) -> Any:
        return self + other
    @overload
    def __sub__(self: 'matrix[int]', other: MatFloatLike) -> 'matrix[float]': ...
    @overload
    def __sub__(self: 'matrix[int]', other: MatIntLike) -> 'matrix[int]': ...
    @overload
    def __sub__(self: 'matrix[float]', other: MatNumLike) -> 'matrix[float]': ...
    @overload
    def __sub__(self, other: MatNumLike) -> 'matrix[int] | matrix[float]': ...
    def __sub__(self, other: MatNumLike) -> Any:
        if isinstance(other, matrix):
            assert self.rows == other.rows and self.cols == other.cols, \
                "Matrices must have the same dimensions"
            return matrix(
                tuple(a - b for a, b in zip(row1, row2))
                for row1, row2 in zip(self.values, other.values)
            )
        if isinstance(other, value):
            return matrix(
                tuple(a - other for a in row)
                for row in self.values
            )
        o = value(other)  # Make sure a single constant is allocated
        return matrix(
            tuple(a - o if isinstance(a, value) else a - other  for a in row)
            for row in self.values
        )
    @overload
    def __rsub__(self: 'matrix[float]', other: MatNumLike) -> 'matrix[float]': ...
    @overload
    def __rsub__(self: 'matrix[int]', other: value[int] | int) -> 'matrix[int]': ...
    def __rsub__(self, other: MatNumLike) -> Any:
        if isinstance(other, matrix):
            assert self.rows == other.rows and self.cols == other.cols, \
                "Matrices must have the same dimensions"
            return matrix(
                tuple(b - a for a, b in zip(row1, row2))
                for row1, row2 in zip(self.values, other.values)
            )
        if isinstance(other, value):
            return matrix(
                tuple(other - a for a in row)
                for row in self.values
            )
        o = value(other)  # Make sure a single constant is allocated
        return matrix(
            tuple(o - a if isinstance(a, value) else other - a for a in row)
            for row in self.values
        )
    @overload
    def __mul__(self: 'matrix[int]', other: MatFloatLike) -> 'matrix[float]': ...
    @overload
    def __mul__(self: 'matrix[int]', other: MatIntLike) -> 'matrix[int]': ...
    @overload
    def __mul__(self: 'matrix[float]', other: MatNumLike) -> 'matrix[float]': ...
    @overload
    def __mul__(self, other: MatNumLike) -> 'matrix[int] | matrix[float]': ...
    def __mul__(self, other: MatNumLike) -> Any:
        """Element-wise multiplication"""
        if isinstance(other, matrix):
            assert self.rows == other.rows and self.cols == other.cols, \
                "Matrices must have the same dimensions"
            return matrix(
                tuple(a * b for a, b in zip(row1, row2))
                for row1, row2 in zip(self.values, other.values)
            )
        if isinstance(other, value):
            return matrix(
                tuple(a * other for a in row)
                for row in self.values
            )
        o = value(other)  # Make sure a single constant is allocated
        return matrix(
            tuple(a * o if isinstance(a, value) else a * other  for a in row)
            for row in self.values
        )
    @overload
    def __rmul__(self: 'matrix[float]', other: MatNumLike) -> 'matrix[float]': ...
    @overload
    def __rmul__(self: 'matrix[int]', other: value[int] | int) -> 'matrix[int]': ...
    def __rmul__(self, other: MatNumLike) -> Any:
        return self * other
    def __truediv__(self, other: MatNumLike) -> 'matrix[float]':
        """Element-wise division"""
        if isinstance(other, matrix):
            assert self.rows == other.rows and self.cols == other.cols, \
                "Matrices must have the same dimensions"
            return matrix(
                tuple(a / b for a, b in zip(row1, row2))
                for row1, row2 in zip(self.values, other.values)
            )
        if isinstance(other, value):
            return matrix(
                tuple(a / other for a in row)
                for row in self.values
            )
        o = value(other)  # Make sure a single constant is allocated
        return matrix(
            tuple(a / o if isinstance(a, value) else a / other  for a in row)
            for row in self.values
        )
    def __rtruediv__(self, other: MatNumLike) -> 'matrix[float]':
        if isinstance(other, matrix):
            assert self.rows == other.rows and self.cols == other.cols, \
                "Matrices must have the same dimensions"
            return matrix(
                tuple(b / a for a, b in zip(row1, row2))
                for row1, row2 in zip(self.values, other.values)
            )
        if isinstance(other, value):
            return matrix(
                tuple(other / a for a in row)
                for row in self.values
            )
        o = value(other)  # Make sure a single constant is allocated
        return matrix(
            tuple(o / a if isinstance(a, value) else other / a  for a in row)
            for row in self.values
        )
    @overload
    def __matmul__(self: 'matrix[TNum]', other: 'vector[TNum]') -> 'vector[TNum]': ...
    @overload
    def __matmul__(self: 'matrix[TNum]', other: 'matrix[TNum]') -> 'matrix[TNum]': ...
    def __matmul__(self: 'matrix[TNum]', other: 'matrix[TNum] | vector[TNum]') -> 'matrix[TNum] | vector[TNum]':
        """Matrix multiplication using @ operator"""
        if isinstance(other, vector):
            assert self.cols == len(other.values), \
                f"Matrix columns ({self.cols}) must match vector length ({len(other.values)})"
            vec_result = (mixed_sum(a * b for a, b in zip(row, other.values)) for row in self.values)
            return vector(vec_result)
        else:
            assert isinstance(other, matrix), "Cannot multiply matrix with {type(other)}"
            assert self.cols == other.rows, \
                f"Matrix columns ({self.cols}) must match other matrix rows ({other.rows})"
            result: list[list[TNum | value[TNum]]] = []
            for row in self.values:
                new_row: list[TNum | value[TNum]] = []
                for col_idx in range(other.cols):
                    col = tuple(other.values[i][col_idx] for i in range(other.rows))
                    element = sum(a * b for a, b in zip(row, col))
                    new_row.append(element)
                result.append(new_row)
            return matrix(result)
    def transpose(self) -> 'matrix[TNum]':
        """Return the transpose of the matrix."""
        if not self.values:
            return matrix([])
        return matrix(
            tuple(self.values[i][j] for i in range(self.rows))
            for j in range(self.cols)
        )
    @property
    def shape(self) -> tuple[int, int]:
        """Return the shape of the matrix as (rows, cols)."""
        return (self.rows, self.cols)
    @property
    def T(self) ->  'matrix[TNum]':
        return self.transpose()
    def row(self, index: int) -> vector[TNum]:
        """Get a row as a vector."""
        assert 0 <= index < self.rows, f"Row index {index} out of bounds"
        return vector(self.values[index])
    def col(self, index: int) -> vector[TNum]:
        """Get a column as a vector."""
        assert 0 <= index < self.cols, f"Column index {index} out of bounds"
        return vector(self.values[i][index] for i in range(self.rows))
    @overload
    def trace(self: 'matrix[TNum]') -> TNum | value[TNum]: ...
    @overload
    def trace(self: 'matrix[int]') -> int | value[int]: ...
    @overload
    def trace(self: 'matrix[float]') -> float | value[float]: ...
    def trace(self) -> Any:
        """Calculate the trace (sum of diagonal elements)."""
        assert self.rows == self.cols, "Trace is only defined for square matrices"
        return mixed_sum(self.values[i][i] for i in range(self.rows))
    @overload
    def sum(self: 'matrix[TNum]') -> TNum | value[TNum]: ...
    @overload
    def sum(self: 'matrix[int]') -> int | value[int]: ...
    @overload
    def sum(self: 'matrix[float]') -> float | value[float]: ...
    def sum(self) -> Any:
        """Calculate the sum of all elements."""
        return mixed_sum(a for row in self.values for a in row)
    def map(self, func: Callable[[Any], value[U] | U]) -> 'matrix[U]':
        """Applies a function to each element of the matrix and returns a new matrix."""
        return matrix(
            tuple(func(a) for a in row)
            for row in self.values
        )
    def homogenize(self) -> 'matrix[TNum]':
        """Convert all elements to copapy values if any element is a copapy value."""
        if any(isinstance(val, value) for row in self.values for val in row):
            return matrix(
                tuple(value(val) if not isinstance(val, value) else val for val in row)
                for row in self.values
            )
        else:
            return self
 def identity(size: int) -> matrix[int]:
    """Create an identity matrix of given size."""
    return matrix(
        tuple(1 if i == j else 0 for j in range(size))
        for i in range(size)
    )
 def zeros(rows: int, cols: int) -> matrix[int]:
    """Create a zero matrix of given dimensions."""
    return matrix(
        tuple(0 for _ in range(cols))
        for _ in range(rows)
    )
 def ones(rows: int, cols: int) -> matrix[int]:
    """Create a matrix of ones with given dimensions."""
    return matrix(
        tuple(1 for _ in range(cols))
        for _ in range(rows)
    )
 def eye(rows: int, cols: int | None = None) -> matrix[int]:
    """Create a matrix with ones on the diagonal and zeros elsewhere."""
    cols = cols if cols else rows
    return matrix(
        tuple(1 if i == j else 0 for j in range(cols))
        for i in range(rows)
    )
@overload
 def diagonal(vec: 'vector[int]') -> matrix[int]: ...
@overload
 def diagonal(vec: 'vector[float]') -> matrix[float]: ...
 def diagonal(vec: vector[Any]) -> matrix[Any]:
    """Create a diagonal matrix from a vector."""
    size = len(vec)
    return matrix(
        tuple(vec[i] if i == j else 0 for j in range(size))
        for i in range(size)
    )
--- a/src/copapy/_target.py
+++ b/src/copapy/_target.py
@ -2,8 +2,7 @@ from typing import Iterable, overload, TypeVar, Any, Callable, TypeAlias
 from . import _binwrite as binw
 from coparun_module import coparun, read_data_mem, create_target, clear_target
 import struct
-from ._basic_types import stencil_db_from_package
+from ._basic_types import value, Net, Node, Write, NumLike, ArrayType, stencil_db_from_package
 from ._basic_types import value, Net, Node, Write, NumLike
 from ._compiler import compile_to_dag
 T = TypeVar("T", int, float)
@ -66,7 +65,7 @@ class Target():
    def __del__(self) -> None:
        clear_target(self._context)
-    def compile(self, *values: int | float | value[Any] | Iterable[int | float | value[Any]]) -> None:
+    def compile(self, *values: NumLike | value[T] | ArrayType[T] | Iterable[T | value[T]]) -> None:
        """Compiles the code to compute the given values.
        Arguments:
@ -74,13 +73,16 @@ class Target():
        """
        nodes: list[Node] = []
        for input in values:
-            if isinstance(input, Iterable):
+            if isinstance(input, ArrayType):
                for v in input.values:
                    if isinstance(v, value):
                        nodes.append(Write(v))
            elif isinstance(input, Iterable):
                for v in input:
                    if isinstance(v, value):
                        nodes.append(Write(v))
-            else:
+            elif isinstance(input, value):
-                if isinstance(input, value):
+                nodes.append(Write(input))
                    nodes.append(Write(input))
        dw, self._values = compile_to_dag(nodes, self.sdb)
        dw.write_com(binw.Command.END_COM)
@ -100,7 +102,9 @@ class Target():
    def read_value(self, variables: NumLike) -> float | int | bool: ...
    @overload
    def read_value(self, variables: Iterable[T | value[T]]) -> list[T]: ...
-    def read_value(self, variables: NumLike | value[T] | Iterable[T | value[T]]) -> Any:
+    @overload
    def read_value(self, variables: ArrayType[T]) -> ArrayType[T]: ...
    def read_value(self, variables: NumLike | value[T] | ArrayType[T] | Iterable[T | value[T]]) -> Any:
        """Reads the numeric value of a copapy type.
        Arguments:
@ -109,6 +113,9 @@ class Target():
        Returns:
            Numeric value or values
        """
        if isinstance(variables, ArrayType):
            return variables.map(lambda v: self.read_value(v))
        if isinstance(variables, Iterable):
            return [self.read_value(ni) if isinstance(ni, value) else ni for ni in variables]
@ -142,7 +149,7 @@ class Target():
            return val
        else:
            raise ValueError(f"Unsupported value type: {var_type}")
-        
+
    def write_value(self, variables: value[Any] | Iterable[value[Any]], data: int | float | Iterable[int | float]) -> None:
        """Write to a copapy value on the target.
@ -155,7 +162,7 @@ class Target():
            for ni, vi in zip(variables, data):
                self.write_value(ni, vi)
            return
-        
+
        assert not isinstance(data, Iterable), "If net is not iterable, value must not be iterable"
        assert isinstance(variables, value), "Argument must be a copapy value"
@ -174,7 +181,7 @@ class Target():
            dw.write_value(int(data), lengths)
        else:
            raise ValueError(f"Unsupported value type: {var_type}")
-        
+
        dw.write_com(binw.Command.END_COM)
        assert coparun(self._context, dw.get_data()) > 0
--- a/src/copapy/_tensors.py
+++ b/src/copapy/_tensors.py
@ -0,0 +1,938 @@
 from copapy._basic_types import NumLike, ArrayType
 from . import value
 from ._vectors import vector
 from ._mixed import mixed_sum
 from typing import TypeVar, Any, overload, TypeAlias, Callable, Iterator, Sequence
 from ._helper_types import TNum
 TensorNumLike: TypeAlias = 'tensor[Any] | vector[Any] | value[Any] | int | float | bool'
 TensorIntLike: TypeAlias = 'tensor[int] | value[int] | int'
 TensorFloatLike: TypeAlias = 'tensor[float] | value[float] | float'
 TensorSequence: TypeAlias = 'Sequence[TNum | value[TNum]] | Sequence[Sequence[TNum | value[TNum]]] | Sequence[Sequence[Sequence[TNum | value[TNum]]]]'
 U = TypeVar("U", int, float)
 class tensor(ArrayType[TNum]):
    """Generalized n-dimensional tensor class supporting numpy-style operations.
    A tensor can have any number of dimensions and supports element-wise operations,
    reshaping, transposition, and various reduction operations.
    """
    def __init__(self, values: 'TNum | value[TNum] | vector[TNum] | tensor[TNum] | TensorSequence[TNum]', shape: Sequence[int] | None = None):
        """Create a tensor with given values.
        Arguments:
            values: Nested iterables of constant values or copapy values.
                    Can be a scalar, 1D iterable (vector),
                    or n-dimensional nested structure.
        """
        if shape:
            self.shape: tuple[int, ...] = tuple(shape)
            assert (isinstance(values, Sequence) and
                    any(isinstance(v, (value, int, float)) for v in values)), \
                    "Values must be a sequence of values if shape is provided"
            self.values: tuple[TNum | value[TNum], ...] = tuple(v for v in values if not isinstance(v, Sequence))
            self.ndim: int = len(shape)
        elif isinstance(values, (int, float)):
            # Scalar case: 0-dimensional tensor
            self.shape = ()
            self.values = (values,)
            self.ndim = 0
        elif isinstance(values, value):
            # Scalar value case
            self.shape = ()
            self.values = (values,)
            self.ndim = 0
        elif isinstance(values, vector):
            # 1D case from vector
            self.shape = (len(values),)
            self.values = values.values
            self.ndim = 1
        elif isinstance(values, tensor):
            # Copy constructor
            self.shape = values.shape
            self.values = values.values
            self.ndim = values.ndim
        else:
            # General n-dimensional case
            self.values, self.shape = self._infer_shape_and_flatten(values)
            self.ndim = len(self.shape)
    @staticmethod
    def _infer_shape_and_flatten(values: Sequence[Any]) -> tuple[tuple[Any, ...], tuple[int, ...]]:
        """Infer the shape of a nested iterable and validate consistency."""
        def get_shape(val: int | float | value[Any] | Sequence[Any]) -> list[int]:
            if isinstance(val, int | float):
                return []
            if isinstance(val, value):
                return []
            else:
                if not val:
                    return [0]
                sub_shape = get_shape(val[0])
                if any(get_shape(item) != sub_shape for item in val[1:]):
                    raise ValueError("All elements must have consistent shape")
                return [len(val)] + sub_shape
            return []
        shape = tuple(get_shape(values))
        if not shape:
            # Scalar
            return (values,), ()
        # Flatten nested structure
        def flatten_recursive(val: Any) -> list[Any]:
            if isinstance(val, int | float | value):
                return [val]
            else:
                result: list[value[Any]] = []
                for item in val:
                    if isinstance(item, int | float | value | Sequence):
                        result.extend(flatten_recursive(item))
                return result
        flattened = flatten_recursive(values)
        return tuple(flattened), shape
    def _get_flat_index(self, indices: Sequence[int]) -> int:
        """Convert multi-dimensional indices to flat index."""
        if len(indices) != len(self.shape):
            raise IndexError(f"Expected {len(self.shape)} indices, got {len(indices)}")
        flat_idx = 0
        stride = 1
        for i in range(len(self.shape) - 1, -1, -1):
            if not (0 <= indices[i] < self.shape[i]):
                raise IndexError(f"Index {indices[i]} out of bounds for dimension {i} with size {self.shape[i]}")
            flat_idx += indices[i] * stride
            stride *= self.shape[i]
        return flat_idx
    def _get_indices_from_flat(self, flat_idx: int) -> tuple[int, ...]:
        """Convert flat index to multi-dimensional indices."""
        indices: list[int] = []
        for dim_size in reversed(self.shape):
            indices.append(flat_idx % dim_size)
            flat_idx //= dim_size
        return tuple(reversed(indices))
    def __repr__(self) -> str:
        return f"tensor(shape={self.shape}, values={self.values if self.ndim == 0 else '...'})"
    def __len__(self) -> int:
        """Return the size of the first dimension."""
        if self.ndim == 0:
            raise TypeError("len() of a 0-d tensor")
        return self.shape[0]
    def get_scalar(self: 'tensor[TNum]', *key: int) -> TNum | value[TNum]:
        """Get a single scalar value from the tensor given multi-dimensional indices."""
        assert len(key) == self.ndim, f"Expected {self.ndim} indices, got {len(key)}"
        flat_idx = self._get_flat_index(key)
        return self.values[flat_idx]
    def __getitem__(self, key: int | slice | Sequence[int | slice]) -> 'tensor[TNum]':
        """Get a sub-tensor or element.
        Arguments:
            key: Integer index (returns tensor of rank n-1),
                 slice object (returns tensor with same rank),
                 tuple of indices/slices (returns sub-tensor or element),
                 or tuple of indices (returns single element).
        Returns:
            Sub-tensor or element value.
        """
        if self.ndim == 0:
            raise TypeError("Cannot index a 0-d tensor")
        # Handle single slice
        if isinstance(key, slice):
            return self._handle_slice((key,))
        # Handle tuple of indices/slices
        if isinstance(key, Sequence):
            return self._handle_slice(key)
        # Handle single integer index
        assert isinstance(key, int), f"indices must be integers, slices, or tuples thereof, not {type(key)}"
        # Return a sub-tensor of rank n-1
        if not (-self.shape[0] <= key < self.shape[0]):
            raise IndexError(f"Index {key} out of bounds for dimension 0 with size {self.shape[0]}")
        if key < 0:
            key += self.shape[0]
        # Calculate which elements belong to this slice
        sub_shape = self.shape[1:]
        sub_size = 1
        for s in sub_shape:
            sub_size *= s
        start_idx = key * sub_size
        end_idx = start_idx + sub_size
        sub_values = self.values[start_idx:end_idx]
        if not sub_shape:
            #assert False, (sub_shape, len(sub_shape), sub_values[0])
            return tensor(sub_values[0])
        return tensor(sub_values, sub_shape)
    def _handle_slice(self, keys: Sequence[int | slice]) -> 'tensor[TNum]':
        """Handle slicing operations on the tensor."""
        # Process all keys and identify ranges for each dimension
        ranges: list[range] = []
        for i, key in enumerate(keys):
            if i >= self.ndim:
                raise IndexError(f"Too many indices for tensor of rank {self.ndim}")
            dim_size = self.shape[i]
            if isinstance(key, int):
                if not (-dim_size <= key < dim_size):
                    raise IndexError(f"Index {key} out of bounds for dimension {i} with size {dim_size}")
                if key < 0:
                    key += dim_size
                ranges.append(range(key, key + 1))
            else:
                assert isinstance(key, slice), f"indices must be integers or slices, not {type(key)}"
                start, stop, step = key.indices(dim_size)
                ranges.append(range(start, stop, step))
        # Handle remaining dimensions (full ranges)
        for i in range(len(keys), self.ndim):
            ranges.append(range(self.shape[i]))
        # Collect elements matching the ranges
        selected_values: list[TNum | value[TNum]] = []
        new_shape: list[int] = []
        # Calculate new shape (only include dimensions that weren't single integers)
        for i, key in enumerate(keys):
            if not isinstance(key, int):
                new_shape.append(len(ranges[i]))
        # Add remaining dimensions
        for i in range(len(keys), self.ndim):
            new_shape.append(self.shape[i])
        # Iterate through all combinations of indices in the ranges
        def iterate_ranges(range_list: list[range], current_indices: list[int]) -> None:
            if len(current_indices) == len(range_list):
                # Compute flat index
                flat_idx = 0
                stride = 1
                for i in range(len(self.shape) - 1, -1, -1):
                    flat_idx += current_indices[i] * stride
                    stride *= self.shape[i]
                selected_values.append(self.values[flat_idx])
            else:
                dim = len(current_indices)
                for idx in range_list[dim]:
                    iterate_ranges(range_list, current_indices + [idx])
        iterate_ranges(ranges, [])
        # Return based on result shape
        if not new_shape:
            # Single element (all were integers)
            return tensor(selected_values[0])
        return tensor(tuple(selected_values), tuple(new_shape))
    def __iter__(self) -> Iterator['tensor[TNum]']:
        """Iterate over the first dimension."""
        if self.ndim == 0:
            raise TypeError("Cannot iterate over a 0-d tensor")
        for i in range(self.shape[0]):
            yield self[i]
    def __neg__(self) -> 'tensor[TNum]':
        """Negate all elements."""
        negated_values: tuple[Any, ...] = tuple(-v for v in self.values)
        return tensor(negated_values, self.shape)
    @overload
    def __add__(self: 'tensor[int]', other: TensorFloatLike) -> 'tensor[float]': ...
    @overload
    def __add__(self: 'tensor[int]', other: TensorIntLike) -> 'tensor[int]': ...
    @overload
    def __add__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __add__(self, other: TensorNumLike) -> 'tensor[int] | tensor[float]': ...
    def __add__(self, other: TensorNumLike) -> Any:
        """Element-wise addition."""
        return self._binary_op(other, lambda a, b: a + b)
    @overload
    def __radd__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __radd__(self: 'tensor[int]', other: value[int] | int) -> 'tensor[int]': ...
    def __radd__(self, other: Any) -> Any:
        return self + other
    @overload
    def __sub__(self: 'tensor[int]', other: TensorFloatLike) -> 'tensor[float]': ...
    @overload
    def __sub__(self: 'tensor[int]', other: TensorIntLike) -> 'tensor[int]': ...
    @overload
    def __sub__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __sub__(self, other: TensorNumLike) -> 'tensor[int] | tensor[float]': ...
    def __sub__(self, other: TensorNumLike) -> Any:
        """Element-wise subtraction."""
        return self._binary_op(other, lambda a, b: a - b, commutative=False)
    @overload
    def __rsub__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __rsub__(self: 'tensor[int]', other: value[int] | int) -> 'tensor[int]': ...
    def __rsub__(self, other: TensorNumLike) -> Any:
        return self._binary_op(other, lambda a, b: b - a, commutative=False, reversed=True)
    @overload
    def __mul__(self: 'tensor[int]', other: TensorFloatLike) -> 'tensor[float]': ...
    @overload
    def __mul__(self: 'tensor[int]', other: TensorIntLike) -> 'tensor[int]': ...
    @overload
    def __mul__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __mul__(self, other: TensorNumLike) -> 'tensor[int] | tensor[float]': ...
    def __mul__(self, other: TensorNumLike) -> Any:
        """Element-wise multiplication."""
        return self._binary_op(other, lambda a, b: a * b)
    @overload
    def __rmul__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __rmul__(self: 'tensor[int]', other: value[int] | int) -> 'tensor[int]': ...
    def __rmul__(self, other: TensorNumLike) -> Any:
        return self * other
    def __truediv__(self, other: TensorNumLike) -> 'tensor[float]':
        """Element-wise division."""
        return self._binary_op(other, lambda a, b: a / b, commutative=False)
    def __rtruediv__(self, other: TensorNumLike) -> 'tensor[float]':
        """Element-wise right division."""
        return self._binary_op(other, lambda a, b: b / a, commutative=False, reversed=True)
    @overload
    def __pow__(self: 'tensor[int]', other: TensorFloatLike) -> 'tensor[float]': ...
    @overload
    def __pow__(self: 'tensor[int]', other: TensorIntLike) -> 'tensor[int]': ...
    @overload
    def __pow__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __pow__(self, other: TensorNumLike) -> 'tensor[int] | tensor[float]': ...
    def __pow__(self, other: TensorNumLike) -> Any:
        """Element-wise power."""
        return self._binary_op(other, lambda a, b: a ** b, commutative=False)
    @overload
    def __rpow__(self: 'tensor[float]', other: TensorNumLike) -> 'tensor[float]': ...
    @overload
    def __rpow__(self: 'tensor[int]', other: value[int] | int) -> 'tensor[int]': ...
    def __rpow__(self, other: TensorNumLike) -> Any:
        return self._binary_op(other, lambda a, b: b ** a, commutative=False, reversed=True)
    def __gt__(self, other: TensorNumLike) -> 'tensor[int]':
        """Element-wise greater than."""
        return self._binary_op(other, lambda a, b: a > b, commutative=False)
    def __lt__(self, other: TensorNumLike) -> 'tensor[int]':
        """Element-wise less than."""
        return self._binary_op(other, lambda a, b: a < b, commutative=False)
    def __ge__(self, other: TensorNumLike) -> 'tensor[int]':
        """Element-wise greater than or equal."""
        return self._binary_op(other, lambda a, b: a >= b, commutative=False)
    def __le__(self, other: TensorNumLike) -> 'tensor[int]':
        """Element-wise less than or equal."""
        return self._binary_op(other, lambda a, b: a <= b, commutative=False)
    def __eq__(self, other: TensorNumLike) -> 'tensor[int]':  # type: ignore
        """Element-wise equality."""
        return self._binary_op(other, lambda a, b: a == b)
    def __ne__(self, other: TensorNumLike) -> 'tensor[int]':  # type: ignore
        """Element-wise inequality."""
        return self._binary_op(other, lambda a, b: a != b)
    def _binary_op(self, other: TensorNumLike, op: Callable[[Any, Any], 'tensor[TNum]'],
                   commutative: bool = True, reversed: bool = False) -> 'tensor[Any]':
        """Perform binary operation with broadcasting support.
        """
        seen_consts: dict[NumLike, NumLike] = {}
        def call_op(a: TNum | value[TNum], b: NumLike) -> Any:
            if isinstance(b, value) or not isinstance(a, value):
                b_trans = b
            else:
                if b in seen_consts:
                    b_trans = seen_consts[b]
                else:
                    b_trans = value(b)
                    seen_consts[b] = b_trans
            if reversed:
                return op(b_trans, a)
            else:
                return op(a, b_trans)
        if isinstance(other, Sequence | vector):
            other_tensor: tensor[Any] = tensor(other)
            return self._binary_op(other_tensor, op, commutative, reversed)
        elif isinstance(other, tensor):
            self_shape = self.shape
            other_shape = other.shape
            # Check if shapes are identical
            if self_shape == other_shape:
                result_vals = tuple(call_op(a, b) for a, b in zip(self.values, other.values))
                return tensor(result_vals, self_shape)
            # Broadcast shapes using numpy-style broadcasting rules
            result_shape = self._broadcast_shapes(self_shape, other_shape)
            # Expand both tensors to the broadcast shape
            self_expanded = self._expand_to_shape(result_shape)
            other_expanded = other._expand_to_shape(result_shape)
            # Apply operation element-wise
            result_vals = tuple(call_op(a, b) for a, b in zip(self_expanded.values, other_expanded.values))
            return tensor(result_vals, result_shape)
        else:
            # Broadcast scalar
            result_vals = tuple(call_op(v, other) for v in self.values)
            return tensor(result_vals, self.shape)
    def _broadcast_shapes(self, shape1: tuple[int, ...], shape2: tuple[int, ...]) -> tuple[int, ...]:
        """Compute the broadcast shape of two shapes following numpy rules.
        Rules:
        - Dimensions are compared from right to left
        - Dimensions must either be equal or one must be 1
        - Missing dimensions are treated as 1
        """
        # Align from the right
        max_ndim = max(len(shape1), len(shape2))
        # Pad with 1s on the left
        padded_shape1 = (1,) * (max_ndim - len(shape1)) + shape1
        padded_shape2 = (1,) * (max_ndim - len(shape2)) + shape2
        result_shape: list[int] = []
        for dim1, dim2 in zip(padded_shape1, padded_shape2):
            if dim1 == dim2:
                result_shape.append(dim1)
            elif dim1 == 1:
                result_shape.append(dim2)
            elif dim2 == 1:
                result_shape.append(dim1)
            else:
                raise ValueError(f"Incompatible shapes for broadcasting: {shape1} vs {shape2}")
        return tuple(result_shape)
    def _expand_to_shape(self, target_shape: tuple[int, ...]) -> 'tensor[TNum]':
        """Expand tensor to target shape using broadcasting (repeating dimensions of size 1).
        """
        if self.shape == target_shape:
            return self
        # Pad self.shape with 1s on the left to match target_shape length
        padded_self_shape = (1,) * (len(target_shape) - len(self.shape)) + self.shape
        # Validate broadcasting is possible
        for s, t in zip(padded_self_shape, target_shape):
            if s != t and s != 1:
                raise ValueError(f"Cannot broadcast shape {self.shape} to {target_shape}")
        # Expand step by step from left to right
        current_tensor = self
        current_shape = self.shape
        # Add missing dimensions on the left
        if len(self.shape) < len(target_shape):
            diff = len(target_shape) - len(self.shape)
            for _ in range(diff):
                # Reshape to add dimension of size 1 on the left
                current_tensor = current_tensor.reshape(1, *current_tensor.shape)
                current_shape = (1,) + current_shape
        # Expand each dimension that is 1 to the target size
        for i, (curr_dim, target_dim) in enumerate(zip(current_shape, target_shape)):
            if curr_dim == 1 and target_dim != 1:
                current_tensor = current_tensor._repeat_along_axis(i, target_dim)
                current_shape = current_tensor.shape
        return current_tensor
    def _repeat_along_axis(self, axis: int, repetitions: int) -> 'tensor[TNum]':
        """Repeat tensor along a specific axis.
        """
        if self.shape[axis] != 1:
            raise ValueError(f"Can only repeat dimensions of size 1, got {self.shape[axis]}")
        # Create list of indices to select (repeat the single index)
        new_shape = list(self.shape)
        new_shape[axis] = repetitions
        new_values: list[TNum | value[TNum]] = []
        # Iterate through all positions in the new tensor
        def iterate_and_repeat(current_indices: list[int], depth: int) -> None:
            if depth == len(new_shape):
                # Get the source index (with 0 for the repeated dimension)
                source_indices = list(current_indices)
                source_indices[axis] = 0
                source_idx = self._get_flat_index(tuple(source_indices))
                new_values.append(self.values[source_idx])
            else:
                for i in range(new_shape[depth]):
                    iterate_and_repeat(current_indices + [i], depth + 1)
        iterate_and_repeat([], 0)
        return tensor(tuple(new_values), tuple(new_shape))
    def reshape(self, *new_shape: int) -> 'tensor[TNum]':
        """Reshape the tensor to a new shape.
        Arguments:
            *new_shape: New shape dimensions. Use -1 for one dimension to infer automatically.
        Returns:
            A new tensor with the specified shape.
        """
        shape_arg: tuple[int, ...] | int = new_shape if len(new_shape) != 1 else new_shape[0]
        if isinstance(shape_arg, int):
            new_shape = (shape_arg,)
        else:
            new_shape = shape_arg
        # Handle -1 in shape (automatic dimension inference)
        neg_one_count = sum(1 for d in new_shape if d == -1)
        if neg_one_count > 1:
            raise ValueError("Only one dimension can be -1")
        if neg_one_count == 1:
            known_size = 1
            for dim in new_shape:
                if dim != -1:
                    known_size *= dim
            if self.size() % known_size != 0:
                raise ValueError(f"Cannot infer dimension from size {self.size()} with shape {new_shape}")
            inferred_dim = self.size() // known_size
            new_shape = tuple(inferred_dim if d == -1 else d for d in new_shape)
        total_size = 1
        for dim in new_shape:
            total_size *= dim
        if total_size != self.size():
            raise ValueError(f"Cannot reshape tensor of size {self.size()} into shape {new_shape}")
        return tensor(self.values, new_shape)
    @overload
    def trace(self: 'tensor[TNum]') -> TNum | value[TNum]: ...
    @overload
    def trace(self: 'tensor[int]') -> int | value[int]: ...
    @overload
    def trace(self: 'tensor[float]') -> float | value[float]: ...
    def trace(self) -> Any:
        """Calculate the trace (sum of diagonal elements)."""
        assert self.ndim == 2 and self.shape[0] == self.shape[1], "Trace is only defined for square matrices"
        return mixed_sum(self.get_scalar(i, i) for i in range(self.shape[0]))
    def transpose(self, *axes: int) -> 'tensor[TNum]':
        """Transpose the tensor.
        Arguments:
            *axes: Permutation of axes. If not provided, reverses all axes.
        Returns:
            A transposed tensor.
        """
        if not axes:
            axes = tuple(range(self.ndim - 1, -1, -1))
        if len(axes) != self.ndim:
            raise ValueError("axes don't match tensor")
        if any(not (0 <= ax < self.ndim) for ax in axes):
            raise ValueError(f"Invalid axes for tensor of rank {self.ndim}")
        new_shape = tuple(self.shape[ax] for ax in axes)
        new_values: list[Any] = [None] * len(self.values)
        for old_idx in range(len(self.values)):
            old_indices = self._get_indices_from_flat(old_idx)
            new_indices = tuple(old_indices[ax] for ax in axes)
            new_flat_idx = 0
            stride = 1
            for i in range(len(new_shape) - 1, -1, -1):
                new_flat_idx += new_indices[i] * stride
                stride *= new_shape[i]
            new_values[new_flat_idx] = self.values[old_idx]
        return tensor(new_values, new_shape)
    def flatten(self) -> 'tensor[TNum]':
        """Flatten the tensor to 1D.
        Returns:
            A flattened 1D tensor.
        """
        return self.reshape(-1)
    def size(self) -> int:
        """Return total number of elements."""
        size = 1
        for dim in self.shape:
            size *= dim
        return size
    def matmul(self, other: 'tensor[TNum] | vector[TNum]') -> 'TNum | value[TNum] | tensor[TNum]':
        """Matrix multiplication (@ operator).
        Arguments:
            other: Another tensor to multiply with.
        Returns:
            Result of matrix multiplication.
        Raises:
            ValueError: If shapes are incompatible for matrix multiplication.
        """
        if self.ndim < 1 or other.ndim < 1:
            raise ValueError("matmul requires tensors with at least 1 dimension")
        # For 1D x 1D: dot product (returns scalar)
        if self.ndim == 1 and other.ndim == 1:
            if self.shape[0] != other.shape[0]:
                raise ValueError(f"Shape mismatch: ({self.shape[0]},) @ ({other.shape[0]},)")
            result = mixed_sum(a * b for a, b in zip(self.values, other.values))
            return result
        # For 2D x 2D: standard matrix multiplication
        if self.ndim == 2 and other.ndim == 2 and isinstance(other, tensor):
            if self.shape[1] != other.shape[0]:
                raise ValueError(f"Shape mismatch: {self.shape} @ {other.shape}")
            result_values: list[Any] = []
            for i in range(self.shape[0]):
                for j in range(other.shape[1]):
                    dot_sum = sum(self.get_scalar(i, k) * other.get_scalar(k, j) for k in range(self.shape[1]))
                    result_values.append(dot_sum)
            return tensor(tuple(result_values), (self.shape[0], other.shape[1]))
        # For 1D x 2D: treat 1D as row vector
        if self.ndim == 1 and other.ndim == 2 and isinstance(other, tensor):
            if self.shape[0] != other.shape[0]:
                raise ValueError(f"Shape mismatch: ({self.shape[0]},) @ {other.shape}")
            result_values = []
            for j in range(other.shape[1]):
                dot_sum = sum(self.get_scalar(k) * other.get_scalar(k, j) for k in range(self.shape[0]))
                result_values.append(dot_sum)
            return tensor(tuple(result_values), (other.shape[1],))
        # For 2D x 1D: treat 1D as column vector
        if self.ndim == 2 and other.ndim == 1:
            if self.shape[1] != other.shape[0]:
                raise ValueError(f"Shape mismatch: {self.shape} @ ({other.shape[0]},)")
            result_values = []
            if isinstance(other, vector):
                for i in range(self.shape[0]):
                    dot_sum = value(0)
                    for k in range(self.shape[1]):
                        dot_sum = dot_sum + self.get_scalar(i, k) * other.get_scalar(k)
                    result_values.append(dot_sum)
            else:
                for i in range(self.shape[0]):
                    dot_sum = value(0)
                    for k in range(self.shape[1]):
                        dot_sum = dot_sum + self.get_scalar(i, k) * other.get_scalar(k)
                    result_values.append(dot_sum)
            return tensor(tuple(result_values), (self.shape[0],))
        raise NotImplementedError(f"matmul not implemented for shapes {self.ndim}D @ {other.ndim}D")
    def __matmul__(self, other: 'tensor[TNum] | vector[TNum]') -> 'TNum | value[TNum] | tensor[TNum]':
        """Matrix multiplication operator (@)."""
        return self.matmul(other)
    def __rmatmul__(self, other: 'tensor[TNum] | vector[TNum]') -> 'TNum | value[TNum] | tensor[TNum]':
        """Right matrix multiplication operator."""
        if isinstance(other, tensor):
            return other.matmul(self)
        return NotImplemented
    def sum(self, axis: int | Sequence[int] | None = None, keepdims: bool = False) -> TNum | value[TNum] | 'tensor[TNum]':
        """Sum all or along specified axis/axes.
        Arguments:
            axis: Axis or tuple of axes along which to sum. If None, sums all elements.
            keepdims: If True, keep reduced dimensions as size 1.
        Returns:
            Scalar or tensor with reduced dimension(s).
        """
        if axis is None:
            result = mixed_sum(self.values)
            if keepdims:
                # Return tensor with all dimensions set to 1
                new_shape = [1 for _ in self.shape]
                return tensor((result,), new_shape)
            return result
        # Handle single axis (convert to tuple for uniform processing)
        if isinstance(axis, int):
            axes: tuple[int, ...] = (axis,)
        else:
            axes = tuple(axis)
        # Validate and normalize axes
        normalized_axes: list[int] = []
        for ax in axes:
            if not (0 <= ax < self.ndim):
                raise ValueError(f"Axis {ax} is out of bounds for tensor of rank {self.ndim}")
            if ax not in normalized_axes:
                normalized_axes.append(ax)
        # Sort axes in descending order for easier dimension removal
        normalized_axes = sorted(set(normalized_axes), reverse=True)
        # Sum along specified axes
        new_shape = list(self.shape)
        for ax in normalized_axes:
            new_shape.pop(ax)
        if not new_shape:
            # All axes summed - return scalar
            return mixed_sum(self.values)
        new_size = 1
        for dim in new_shape:
            new_size *= dim
        new_values: list[TNum | value[TNum]] = [self.values[0]] * new_size
        new_v_mask: list[bool] = [False] * new_size
        for old_idx in range(len(self.values)):
            old_indices = list(self._get_indices_from_flat(old_idx))
            # Build new indices by removing summed axes
            new_indices: list[int] = []
            for i, idx in enumerate(old_indices):
                if i not in normalized_axes:
                    new_indices.append(idx)
            # Compute flat index in new shape
            new_flat_idx = 0
            stride = 1
            for i in range(len(new_shape) - 1, -1, -1):
                new_flat_idx += new_indices[i] * stride
                stride *= new_shape[i]
            if new_v_mask[new_flat_idx]:
                new_values[new_flat_idx] = new_values[new_flat_idx] + self.values[old_idx]
            else:
                new_values[new_flat_idx] = self.values[old_idx]
                new_v_mask[new_flat_idx] = True
        if keepdims:
            # Restore reduced dimensions as size 1
            full_shape = list(self.shape)
            for ax in normalized_axes:
                full_shape[ax] = 1
            return tensor(new_values, tuple(full_shape))
        if not new_shape:
            return new_values[0]
        return tensor(new_values, tuple(new_shape))
    def mean(self, axis: int | None = None) -> Any:
        """Calculate mean along axis or overall.
        Arguments:
            axis: Axis along which to compute mean. If None, computes overall mean.
        Returns:
            Scalar or tensor with reduced dimension.
        """
        if axis is None:
            total_sum: Any = mixed_sum(self.values)
            return total_sum / self.size()
        sum_result: Any = self.sum(axis)
        axis_size = self.shape[axis]
        if isinstance(sum_result, tensor):
            return sum_result / axis_size
        else:
            return sum_result / axis_size
    def map(self, func: Callable[[Any], value[U] | U]) -> 'tensor[U]':
        """Apply a function to each element.
        Arguments:
            func: Function to apply to each element.
        Returns:
            A new tensor with the function applied element-wise.
        """
        result_vals = tuple(func(v) for v in self.values)
        return tensor(result_vals, self.shape)
    def homogenize(self) -> 'tensor[TNum]':
        """Convert all elements to copapy values if any element is a copapy value."""
        if any(isinstance(val, value) for val in self.values):
            homogenized: tuple[value[Any], ...] = tuple(value(val) if not isinstance(val, value) else val for val in self.values)
            return tensor(homogenized, self.shape)
        return self
    @property
    def T(self) -> 'tensor[TNum]':
        """Transpose all axes (equivalent to transpose() with no args)."""
        return self.transpose()
 def zeros(shape: Sequence[int] | int) -> tensor[int]:
    """Create a zero tensor of given shape."""
    if isinstance(shape, int):
        shape = (shape,)
    size = 1
    for dim in shape:
        size *= dim
    return tensor([0] * size, tuple(shape))
 def ones(shape: Sequence[int] | int) -> tensor[int]:
    """Create a tensor of ones with given shape."""
    if isinstance(shape, int):
        shape = (shape,)
    size = 1
    for dim in shape:
        size *= dim
    return tensor([1] * size, tuple(shape))
 def arange(start: int | float, stop: int | float | None = None,
           step: int | float = 1) -> tensor[int] | tensor[float]:
    """Create a tensor with evenly spaced values.
    Arguments:
        start: Start value (or stop if stop is None).
        stop: Stop value (exclusive).
        step: Step between values.
    Returns:
        A 1D tensor.
    """
    if stop is None:
        stop = start
        start = 0
    # Determine type
    values_list: list[value[Any]] = []
    current = start
    if step > 0:
        while current < stop:
            values_list.append(value(current))
            current += step
    elif step < 0:
        while current > stop:
            values_list.append(value(current))
            current += step
    else:
        raise ValueError("step cannot be zero")
    return tensor(tuple(values_list), (len(values_list),))
 def eye(rows: int, cols: int | None = None) -> tensor[int]:
    """Create an identity tensor with ones on diagonal.
    Arguments:
        rows: Number of rows.
        cols: Number of columns (defaults to rows).
    Returns:
        A 2D identity tensor.
    """
    if cols is None:
        cols = rows
    values_list: list[value[int]] = []
    for i in range(rows):
        for j in range(cols):
            if i == j:
                values_list.append(value(1))
            else:
                values_list.append(value(0))
    return tensor(tuple(values_list), (rows, cols))
 def identity(size: int) -> tensor[int]:
    """Create a square identity tensor.
    Arguments:
        size: Size of the square tensor.
    Returns:
        A square 2D identity tensor.
    """
    return eye(size, size)
@overload
 def diagonal(vec: 'tensor[int] | vector[int]') -> tensor[int]: ...
@overload
 def diagonal(vec: 'tensor[float] | vector[float]') -> tensor[float]: ...
 def diagonal(vec: 'tensor[Any] | vector[Any]') -> 'tensor[Any]':
    """Create a diagonal tensor from a 1D tensor.
    Arguments:
        vec: A 1D tensor with values to place on the diagonal.
    Returns:
        A 2D tensor with the input values on the diagonal and zeros elsewhere.
    """
    if vec.ndim != 1:
        raise ValueError(f"Input must be 1D, got {vec.ndim}D")
    size = len(vec)
    values_list: list[Any] = []
    for i in range(size):
        for j in range(size):
            if i == j:
                values_list.append(vec[i])
            else:
                values_list.append(value(0))
    return tensor(tuple(values_list), (size, size))
--- a/src/copapy/_vectors.py
+++ b/src/copapy/_vectors.py
@ -1,8 +1,9 @@
 from . import value
 from ._mixed import mixed_sum, mixed_homogenize
-from typing import Sequence, TypeVar, Iterable, Any, overload, TypeAlias, Callable, Iterator, Generic
+from typing import Sequence, TypeVar, Iterable, Any, overload, TypeAlias, Callable, Iterator
 import copapy as cp
 from ._helper_types import TNum
 from ._basic_types import ArrayType
 #VecNumLike: TypeAlias = 'vector[int] | vector[float] | value[int] | value[float] | int | float | bool'
 VecNumLike: TypeAlias = 'vector[Any] | value[Any] | int | float | bool'
@ -13,8 +14,13 @@ U = TypeVar("U", int, float)
 epsilon = 1e-20
-class vector(Generic[TNum]):
+class vector(ArrayType[TNum]):
    """Mathematical vector class supporting basic operations and interactions with values.
    Attributes:
        values (tuple[value[TNum] | TNum, ...]): The elements of the vector.
        ndim (int): Number of dimensions (always 1 for vector).
        shape (tuple[int, ...]): Shape of the vector as a tuple.
    """
    def __init__(self, values: Iterable[TNum | value[TNum]]):
        """Create a vector with given values.
@ -23,6 +29,8 @@ class vector(Generic[TNum]):
            values: iterable of constant values
        """
        self.values: tuple[value[TNum] | TNum, ...] = tuple(values)
        self.ndim: int = 1
        self.shape: tuple[int, ...] = (len(self.values),)
    def __repr__(self) -> str:
        return f"vector({self.values})"
@ -45,6 +53,10 @@ class vector(Generic[TNum]):
    def __iter__(self) -> Iterator[value[TNum] | TNum]:
        return iter(self.values)
    def get_scalar(self, index: int) -> TNum | value[TNum]:
        """Get a single scalar value from the vector."""
        return self.values[index]
    @overload
    def __add__(self: 'vector[int]', other: VecFloatLike) -> 'vector[float]': ...
    @overload
@ -213,7 +225,7 @@ class vector(Generic[TNum]):
            a3 * b1 - a1 * b3,
            a1 * b2 - a2 * b1
        ])
-    
+
    def __gt__(self, other: VecNumLike) -> 'vector[int]':
        if isinstance(other, vector):
            assert len(self.values) == len(other.values)
@ -267,11 +279,7 @@ class vector(Generic[TNum]):
            return vector(a != other for a in self.values)
        o = value(other)  # Make sure a single constant is allocated
        return vector(a != o if isinstance(a, value) else a != other for a in self.values)
-    
+
    @property
    def shape(self) -> tuple[int]:
        """Return the shape of the vector as (length,)."""
        return (len(self.values),)
    @overload
    def sum(self: 'vector[int]') -> int | value[int]: ...
@ -299,15 +307,15 @@ class vector(Generic[TNum]):
    def map(self, func: Callable[[Any], value[U] | U]) -> 'vector[U]':
        """Applies a function to each element of the vector and returns a new vector.
-        
+
        Arguments:
            func: A function that takes a single argument.
-        
+
        Returns:
            A new vector with the function applied to each element.
        """
        return vector(func(x) for x in self.values)
-    
+
    def _map2(self, other: VecNumLike, func: Callable[[Any, Any], value[int] | value[float]]) -> 'vector[Any]':
        if isinstance(other, vector):
            assert len(self.values) == len(other.values)
@ -320,11 +328,11 @@ class vector(Generic[TNum]):
 def cross_product(v1: vector[float], v2: vector[float]) -> vector[float]:
    """Calculate the cross product of two 3D vectors.
-    
+
    Arguments:
        v1: First 3D vector.
        v2: Second 3D vector.
-        
+
    Returns:
        The cross product vector.
    """
@ -333,11 +341,11 @@ def cross_product(v1: vector[float], v2: vector[float]) -> vector[float]:
 def dot_product(v1: vector[float], v2: vector[float]) -> 'float | value[float]':
    """Calculate the dot product of two vectors.
-    
+
    Arguments:
        v1: First vector.
        v2: Second vector.
-        
+
    Returns:
        The dot product.
    """
@ -346,11 +354,11 @@ def dot_product(v1: vector[float], v2: vector[float]) -> 'float | value[float]':
 def distance(v1: vector[float], v2: vector[float]) -> 'float | value[float]':
    """Calculate the Euclidean distance between two vectors.
-    
+
    Arguments:
        v1: First vector.
        v2: Second vector.
-        
+
    Returns:
        The Euclidean distance.
    """
@ -360,11 +368,11 @@ def distance(v1: vector[float], v2: vector[float]) -> 'float | value[float]':
 def scalar_projection(v1: vector[float], v2: vector[float]) -> 'float | value[float]':
    """Calculate the scalar projection of v1 onto v2.
-    
+
    Arguments:
        v1: First vector.
        v2: Second vector.
-        
+
    Returns:
        The scalar projection.
    """
@ -375,11 +383,11 @@ def scalar_projection(v1: vector[float], v2: vector[float]) -> 'float | value[fl
 def vector_projection(v1: vector[float], v2: vector[float]) -> vector[float]:
    """Calculate the vector projection of v1 onto v2.
-    
+
    Arguments:
        v1: First vector.
        v2: Second vector.
-        
+
    Returns:
        The projected vector.
    """
@ -391,11 +399,11 @@ def vector_projection(v1: vector[float], v2: vector[float]) -> vector[float]:
 def angle_between(v1: vector[float], v2: vector[float]) -> 'float | value[float]':
    """Calculate the angle in radians between two vectors.
-    
+
    Arguments:
        v1: First vector.
        v2: Second vector.
-        
+
    Returns:
        The angle in radians.
    """
@ -408,12 +416,12 @@ def angle_between(v1: vector[float], v2: vector[float]) -> 'float | value[float]
 def rotate_vector(v: vector[float], axis: vector[float], angle: 'float | value[float]') -> vector[float]:
    """Rotate vector v around a given axis by a specified angle using Rodrigues' rotation formula.
-    
+
    Arguments:
        v: The 3D vector to be rotated.
        axis: A 3D vector defining the axis of rotation.
        angle: The angle of rotation in radians.
-    
+
    Returns:
        The rotated vector.
    """