From 6d47779c030e016b43715d72ba87a4b7a392eeb1 Mon Sep 17 00:00:00 2001 From: Nicolas Kruse Date: Thu, 4 Dec 2025 22:39:12 +0100 Subject: [PATCH] benchmark script updated --- .gitignore | 1 + tests/benchmark.py | 237 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 196 insertions(+), 42 deletions(-) diff --git a/.gitignore b/.gitignore index 85770e0..f6a7942 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ build/* /*.obj /src/*.pyd vc140.pdb +benchmark_results* diff --git a/tests/benchmark.py b/tests/benchmark.py index 762e4c5..a7a1597 100644 --- a/tests/benchmark.py +++ b/tests/benchmark.py @@ -1,61 +1,214 @@ import copapy as cp -import numpy as np import time +import json +import os +import subprocess +import sys +import numpy as np +from numpy.core._multiarray_umath import __cpu_features__ -def cp_vs_python(): +from copapy._matrices import diagonal - from numpy.core._multiarray_umath import __cpu_features__ - print(__cpu_features__) - - for v_size in range(10, 800, 40): - - sum_size = 10 - #v_size = 400 - iter_size = 30000 - - v1 = cp.vector(cp.variable(float(v)) for v in range(v_size)) - v2 = cp.vector(cp.variable(float(v)) for v in [5]*v_size) - - v3 = sum((v1 + i) @ v2 for i in range(sum_size)) - - tg = cp.Target() - tg.compile(v3) - - time.sleep(0.1) - t0 = time.perf_counter() - for _ in range(iter_size): - tg.run() - elapsed_cp = time.perf_counter() - t0 - - #print(f"Copapy: {elapsed_cp:.4f} s") +CPU_SIMD_FEATURES = "SSE SSE2 SSE3 SSSE3 SSE41 SSE42 AVX AVX2 AVX512F FMA3" - v1 = cp.vector(float(v) for v in range(v_size)) - v2 = cp.vector(float(v) for v in [5]*v_size) +def cp_vs_python(path: str): + os.environ.get("NPY_DISABLE_CPU_FEATURES") + cpu_f = CPU_SIMD_FEATURES.split(' ') + print('\n'.join(f"> {k}: {v}" for k, v in __cpu_features__.items() if k in cpu_f)) + + + results: list[dict[str, str | float | int]] = [] + + for _ in range(7): + for v_size in [10, 20, 40, 60] + list(range(100, 500, 50)): + + sum_size = 10 + #v_size = 400 + iter_size = 30000 + + v1 = cp.vector(cp.variable(float(v)) for v in range(v_size)) + v2 = cp.vector(cp.variable(float(v)) for v in [5]*v_size) - time.sleep(0.1) - t0 = time.perf_counter() - for _ in range(iter_size//10): v3 = sum((v1 + i) @ v2 for i in range(sum_size)) - elapsed_python = time.perf_counter() - t0 + tg = cp.Target() + tg.compile(v3) - #print(f"Python: {elapsed_python:.4f} s") + time.sleep(0.1) + t0 = time.perf_counter() + for _ in range(iter_size): + tg.run() + elapsed_cp = time.perf_counter() - t0 + + #print(f"Copapy: {elapsed_cp:.4f} s") + results.append({'benchmark': 'Copapy', 'iter_size': iter_size, 'elapsed_time': elapsed_cp, 'sum_size': sum_size, 'v_size': v_size}) - i = np.array(list(range(sum_size)),).reshape([sum_size, 1]) - time.sleep(0.1) - t0 = time.perf_counter() - for _ in range(iter_size): - v3 = np.sum((v1 + i) @ v2) + v1 = cp.vector(float(v) for v in range(v_size)) + v2 = cp.vector(float(v) for v in [5]*v_size) - elapsed_np2 = time.perf_counter() - t0 + time.sleep(0.1) + t0 = time.perf_counter() + for _ in range(iter_size//10): + v3 = sum((v1 + i) @ v2 for i in range(sum_size)) - #print(f"Numpy 2: {elapsed_np2:.4f} s") + elapsed_python = time.perf_counter() - t0 + + #print(f"Python: {elapsed_python:.4f} s") + results.append({'benchmark': 'Python','iter_size': iter_size//10, 'elapsed_time': elapsed_python, 'sum_size': sum_size, 'v_size': v_size}) + + v1 = np.array(list(range(v_size)), dtype=np.float32) + v2 = np.array([5]*v_size, dtype=np.float32) + i = np.array(list(range(sum_size)), dtype=np.int32).reshape([sum_size, 1]) + + time.sleep(0.1) + t0 = time.perf_counter() + for _ in range(iter_size): + v3 = np.sum((v1 + i) @ v2) + + elapsed_np = time.perf_counter() - t0 + + #print(f"Numpy 2: {elapsed_np2:.4f} s") + results.append({'benchmark': 'NumPy', 'iter_size': iter_size, 'elapsed_time': elapsed_np, 'sum_size': sum_size, 'v_size': v_size}) - print(f"{elapsed_cp}, {elapsed_python}, {elapsed_np2}") + print(f"{v_size} {elapsed_cp}, {elapsed_python}, {elapsed_np}") + + with open(path, 'w') as f: + json.dump(results, f) + + +def cp_vs_python_sparse(path: str = 'benchmark_results_001_sparse.json'): + results: list[dict[str, str | float | int]] = [] + + for _ in range(7): + for v_size in [10, 20, 40, 60] + list(range(100, 500, 50)): + + sum_size = 10 + #v_size = 400 + iter_size = 30000 + + v1 = cp.vector(cp.variable(float(v)) for v in range(v_size)) + v2 = cp.vector(cp.variable(float(v)) for v in [5]*v_size) + + v3 = sum((cp.diagonal(v1) + i) @ v2 for i in range(sum_size)) + + tg = cp.Target() + tg.compile(v3) + + time.sleep(0.1) + t0 = time.perf_counter() + for _ in range(iter_size): + tg.run() + elapsed_cp = time.perf_counter() - t0 + + #print(f"Copapy: {elapsed_cp:.4f} s") + results.append({'benchmark': 'Copapy', 'iter_size': iter_size, 'elapsed_time': elapsed_cp, 'sum_size': sum_size, 'v_size': v_size}) + + + + v1 = cp.vector(float(v) for v in range(v_size)) + v2 = cp.vector(float(v) for v in [5]*v_size) + + time.sleep(0.1) + t0 = time.perf_counter() + for _ in range(iter_size//10): + v3 = sum((cp.diagonal(v1) + i) @ v2 for i in range(sum_size)) + + elapsed_python = time.perf_counter() - t0 + + #print(f"Python: {elapsed_python:.4f} s") + results.append({'benchmark': 'Python','iter_size': iter_size//10, 'elapsed_time': elapsed_python, 'sum_size': sum_size, 'v_size': v_size}) + + v1 = np.array(list(range(v_size)), dtype=np.float32) + v2 = np.array([5]*v_size, dtype=np.float32) + i = np.array(list(range(sum_size)), dtype=np.int32).reshape([sum_size, 1, 1]) + + time.sleep(0.1) + t0 = time.perf_counter() + for _ in range(iter_size): + v3 = np.sum((v1 * np.eye(v_size) + i) @ v2) + + elapsed_np = time.perf_counter() - t0 + + #print(f"Numpy 2: {elapsed_np2:.4f} s") + results.append({'benchmark': 'NumPy', 'iter_size': iter_size, 'elapsed_time': elapsed_np, 'sum_size': sum_size, 'v_size': v_size}) + + + print(f"{v_size} {elapsed_cp}, {elapsed_python}, {elapsed_np}") + + with open(path, 'w') as f: + json.dump(results, f) + + +def plot_results(path: str): + import json + import matplotlib.pyplot as plt + import numpy as np + from collections import defaultdict + + # Load the benchmark results + with open(path, 'r') as f: + results = json.load(f) + + # Group data by benchmark and v_size, then calculate medians + data_by_benchmark = defaultdict(lambda: defaultdict(list)) + + for entry in results: + benchmark = entry['benchmark'] + v_size = entry['v_size'] + elapsed_time = entry['elapsed_time'] + data_by_benchmark[benchmark][v_size].append(elapsed_time) + + # Calculate medians + medians_by_benchmark = {} + for benchmark, v_sizes in data_by_benchmark.items(): + medians_by_benchmark[benchmark] = { + v_size: np.median(times) + for v_size, times in v_sizes.items() + } + + # Sort by v_size for plotting + benchmarks = sorted(medians_by_benchmark.keys()) + v_sizes_set = sorted(set(v for benchmark_data in medians_by_benchmark.values() for v in benchmark_data.keys())) + + # Create the plot + plt.figure(figsize=(10, 6)) + + for benchmark in benchmarks: + if benchmark != 'Python': + v_sizes = sorted(medians_by_benchmark[benchmark].keys()) + elapsed_times = [medians_by_benchmark[benchmark][v] for v in v_sizes] + plt.plot(v_sizes, elapsed_times, '.', label=benchmark) + + plt.xlabel('Vector Size (v_size)') + plt.ylabel('Elapsed Time (seconds)') + #plt.title('Benchmark Results: Elapsed Time vs Vector Size') + plt.legend() + #plt.grid(True, alpha=0.3) + plt.ylim(bottom=0) + plt.tight_layout() + + # Save to PNG + plt.savefig(path.replace('.json', '') + '.png', dpi=300) + print("Plot saved") + if __name__ == "__main__": - cp_vs_python() \ No newline at end of file + path1 = 'benchmark_results_001.json' + path2 = 'benchmark_results_001_sparse.json' + + if 'no_simd' in sys.argv[1:]: + os.environ["NPY_DISABLE_CPU_FEATURES"] = CPU_SIMD_FEATURES + subprocess.run([sys.executable, "tests/benchmark.py"]) + elif 'plot' in sys.argv[1:]: + plot_results(path1) + plot_results(path2) + else: + cp_vs_python(path1) + plot_results(path1) + + cp_vs_python_sparse(path2) + plot_results(path2)