From 9e7c9a47b36052ad314fd1b4258ade2dce09a420 Mon Sep 17 00:00:00 2001 From: Nicolas Kruse Date: Tue, 17 Jun 2025 22:56:43 +0200 Subject: [PATCH] db_reader memory usage reduced by using an iterator instead of split --- src/gaspype/_phys_data.py | 54 ++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 21 deletions(-) diff --git a/src/gaspype/_phys_data.py b/src/gaspype/_phys_data.py index f8ce196..0299b11 100644 --- a/src/gaspype/_phys_data.py +++ b/src/gaspype/_phys_data.py @@ -1,32 +1,44 @@ import struct from typing import Generator, Iterator +from dataclasses import dataclass -class SpeciesData(): +def split_on_space(data: bytes, offset: int, end: int) -> Iterator[str]: + """Splits a byte array into ASCII strings based on spaces. + Args: + data: The byte array to split. + offset: The starting index in the byte array. + end: The ending index in the byte array. + Yields: + str: ASCII strings found in the byte array. + """ + start = offset + for i in range(offset, end): + if data[i] == 0x20: # ASCII space character + if start < i: + yield data[start:i].decode('ascii') + start = i + 1 + if start < end: + yield data[start:end].decode('ascii') + + +@dataclass +class SpeciesData: """Class to hold the physical data for a species. Attributes: - comp: Dictionary of species composition with element symbols as keys and their counts as values. + name: Name of the species. + composition: Dictionary of species composition with element symbols as keys and their counts as values. model: Number of polynomial coefficients used in the model. ref_string: Reference string for the data source. t_range: List of temperatures nodes marking intervals. data: List of lists containing physical data for each temperature interval. """ - - def __init__(self, name: str, comp: dict[str, int], model: int, ref: str, t_range: list[float], data: list[list[float]]): - self.name = name - self.composition: dict[str, int] = comp - self.model: int = model - self.ref_string: str = ref - self.t_range: list[float] = t_range - self.data: list[list[float]] = data - - def __repr__(self) -> str: - return (f"Name: {self.name}\n" + - f"Composition: {self.composition}\n" + - f"Model: {self.model}\n" + - f"Reference: {self.ref_string}\n" + - f"Temperatures: {self.t_range}\n" + - f"Data: {self.data}".replace('),', '),\n')) + name: str + composition: dict[str, int] + model: int + ref_string: str + t_range: list[float] + data: list[list[float]] class db_reader(): @@ -45,8 +57,8 @@ class db_reader(): """ assert inp_data[:4] == b'gapy', 'Unknown data format' self._bin_data = inp_data - self._name_count = struct.unpack('