added binary thermo db feature to the package; additional test added

2025-06-02 16:26:44 +02:00 · 2025-06-02 16:26:44 +02:00 · b6ed6c0bf7
parent 3665487733
commit b6ed6c0bf7
3 changed files with 178 additions and 27 deletions
--- a/src/gaspype/init.py
+++ b/src/gaspype/init.py
@ -2,9 +2,9 @@ import numpy as np
 from numpy.typing import NDArray
 from typing import Literal, Sequence, Callable, Any, TypeVar, Iterator, overload
 from math import log as ln, ceil, exp
 import yaml
 from scipy.optimize import minimize, root
 from scipy.linalg import null_space
 from gaspype._phys_data import atomic_weights, db_reader
 import re
 import pkgutil
@ -14,15 +14,9 @@ FloatArray = NDArray[NDFloat]
 _T = TypeVar('_T', 'fluid', 'elements')
-
+data = pkgutil.get_data(__name__, 'data/therm_data.bin')
-def _get_data(path: str) -> Any:
+assert data is not None, 'Could not load thermodynamic data'
-    data = pkgutil.get_data(__name__, path)
+species_db = db_reader(data)
    assert data is not None
    return yaml.safe_load(data)
 _atomic_weights = _get_data('data/atomic_prop.yaml')['atomic_weights']['data']
 _species_data = {s['name']: s for s in _get_data('data/therm_prop.yaml')['species']}
 kB = 1.380649e-23  # J/K
 NA = 6.02214076e23  # 1/mol
@ -82,21 +76,22 @@ def species(pattern: str = '*', element_names: str | list[str] = [], use_regex:
        elements = set(element_names)
    for el in elements:
-        assert el in _atomic_weights, f'element {el} unknown'
+        assert el in atomic_weights, f'element {el} unknown'
    if not use_regex:
-        el_pattern = '|'.join([el for el in _atomic_weights.keys()])
+        el_pattern = '|'.join([el for el in atomic_weights.keys()])
        pattern = pattern.replace('*', '.*')
        pattern = pattern.replace('#', '\\d*')
        pattern = pattern.replace('$', '(' + el_pattern + ')')
        pattern = '^' + pattern + '(,.*)?$'
-    return sorted(list(
+    if element_names == []:
-        [
+        return [sn for sn in species_db.names if re.fullmatch(pattern, sn)]
-            s['name'] for s in _species_data.values()
+    else:
-            if re.fullmatch(pattern, s['name']) and
+        return [
-            (len(elements) == 0 or set(s['composition'].keys()).issubset(elements))
+            s.name for s in species_db
-        ]))
+            if re.fullmatch(pattern, s.name) and
            (len(elements) == 0 or set(s.composition.keys()).issubset(elements))]
 def set_solver(solver: Literal['gibs minimization', 'system of equations']) -> None:
@ -162,17 +157,17 @@ class fluid_system:
        self._t_offset = int(t_min)
        self.species = species
        self.active_species = species
-        element_compositions: list[dict[str, float]] = list()
+        element_compositions: list[dict[str, int]] = list()
        for i, s in enumerate(species):
-            if s not in _species_data:
+            species_data = species_db.read(s)
            if not species_data:
                raise Exception(f'Species {s} not found')
-            element_compositions.append(_species_data[s]['composition'])
+            element_compositions.append(species_data.composition)
-            data = _species_data[s]['thermo']
+            assert species_data.model == 9, 'Only NASA9 polynomials are supported'
            assert data['model'] == 'NASA9'
-            for t1, t2, a in zip(data['temperature-ranges'][:-1], data['temperature-ranges'][1:], data['data']):
+            for t1, t2, a in zip(species_data.t_range[:-1], species_data.t_range[1:], species_data.data):
                for j, T in enumerate(temperature_base_points):
                    if t2 >= T >= t1:
@ -193,7 +188,7 @@ class fluid_system:
        self.elements: list[str] = sorted(list(set(k for ac in element_compositions for k in ac.keys())))
        self.array_species_elements = np.array([[ec[el] if el in ec else 0.0 for el in self.elements] for ec in element_compositions])
-        self.array_atomic_mass = np.array([_atomic_weights[el] for el in self.elements]) * 1e-3  # kg/mol
+        self.array_atomic_mass = np.array([atomic_weights[el] for el in self.elements]) * 1e-3  # kg/mol
        self.array_molar_mass: FloatArray = np.sum(self.array_atomic_mass * self.array_species_elements, axis=-1)  # kg/mol
        self.array_stoichiometric_coefficients: FloatArray = np.array(null_space(self.array_species_elements.T), dtype=NDFloat).T
@ -315,7 +310,7 @@ class fluid:
        self.array_composition: FloatArray = comp_array
        self.total: FloatArray | float = np.sum(self.array_composition, axis=-1, dtype=NDFloat)
-        self.array_fractions = self.array_composition / (np.expand_dims(self.total, -1) + _epsy)
+        self.array_fractions: FloatArray = self.array_composition / (np.expand_dims(self.total, -1) + _epsy)
        self.shape: _Shape = self.array_composition.shape[:-1]
        self.fs = fs
        self.array_elemental_composition: FloatArray = np.dot(self.array_composition, fs.array_species_elements)
@ -899,7 +894,7 @@ def _equilibrium_eq(fs: fluid_system, element_composition: FloatArray, t: float,
        return (np.hstack([eq_resid, ab_resid]), np.concatenate([j_eq, j_ab], axis=0))
-    ret = root(residuals, logn_start, jac=True, tol=1e-30)  # type: ignore
+    ret = root(residuals, logn_start, jac=True, tol=1e-30)
    n = np.exp(np.array(ret['x'], dtype=NDFloat))
    return n * el_max
--- a/src/gaspype/_phys_data.py
+++ b/src/gaspype/_phys_data.py
@ -0,0 +1,137 @@
 import struct
 from typing import Generator, Iterator
 class SpeciesData():
    """Class to hold the physical data for a species.
    Attributes:
        comp: Dictionary of species composition with element symbols as keys and their counts as values.
        model: Number of polynomial coefficients used in the model.
        ref_string: Reference string for the data source.
        t_range: List of temperatures nodes marking intervals.
        data: List of lists containing physical data for each temperature interval.
    """
    def __init__(self, name: str, comp: dict[str, int], model: int, ref: str, t_range: list[float], data: list[list[float]]):
        self.name = name
        self.composition: dict[str, int] = comp
        self.model: int = model
        self.ref_string: str = ref
        self.t_range: list[float] = t_range
        self.data: list[list[float]] = data
    def __repr__(self) -> str:
        return (f"Name: {self.name}\n" +
                f"Composition: {self.composition}\n" +
                f"Model: {self.model}\n" +
                f"Reference: {self.ref_string}\n" +
                f"Temperatures: {self.t_range}\n" +
                f"Data: {self.data}".replace('),', '),\n'))
 class db_reader():
    """Class to read and parse the binary gas phase species database.
    Attributes:
        names: An iterator over the names of species in the database.
    """
    header_len = 8
    def __init__(self, inp_data: bytes):
        """ Initializes the database reader with binary data.
        Args:
            inp_data: The binary data of the gas phase species database.
        """
        assert inp_data[:4] == b'gapy', 'Unknown data format'
        self._bin_data = inp_data
        self._name_count = struct.unpack('<I', self._bin_data[4:8])[0]
        species_names = self._bin_data[db_reader.header_len:(db_reader.header_len + self._name_count)].decode('ASCII').split(' ')
        self._index = {s: i for i, s in enumerate(species_names)}
    @property
    def names(self) -> Iterator[str]:
        return iter(self._index.keys())
    def __iter__(self) -> Generator[SpeciesData, None, None]:
        return (d for d in (self.read(species) for species in self._index.keys()) if d is not None)
    def __contains__(self, name: str) -> bool:
        """ Checks if a species name is present in the database.
        Args:
            name: The name of the species to check.
        Returns:
            bool: True if the species name is present
        """
        return name in self._index
    def __getitem__(self, name: str) -> SpeciesData:
        species_data = self.read(name)
        assert species_data, f"Species '{name}' not found in the database."
        return species_data
    def read(self, name: str) -> SpeciesData | None:
        """ Reads the physical data for a given species name from the binary data.
        Args:
            name (str): The name of the species to read data for.
        Returns:
            phys_data: An instance of the phys_data class containing the physical data.
        """
        if name not in self._index:
            return None
        head_offset = self._name_count + db_reader.header_len + self._index[name] * db_reader.header_len
        head = struct.unpack('<I4B', self._bin_data[head_offset:head_offset + db_reader.header_len])
        offset = head[0]
        composition_count = head[1]
        model = head[2]
        temperature_count = head[3]
        ref_string_len = head[4]
        td_data_num = (temperature_count - 1) * model
        data_len = composition_count * 3 + (temperature_count + td_data_num) * 4 + ref_string_len
        format_string = '<' + '2sB' * composition_count + f'{temperature_count}f{td_data_num}f{ref_string_len}s'
        bindat = struct.unpack(format_string, self._bin_data[offset:offset + data_len])
        comp = {bindat[i * 2].strip().decode('ASCII'): bindat[i * 2 + 1] for i in range(composition_count)}
        noffs = composition_count * 2
        t_range = list(bindat[noffs:noffs + temperature_count])
        noffs += temperature_count
        data = [list(bindat[(noffs + i * model):(noffs + (i + 1) * model)]) for i in range(temperature_count - 1)]
        ref = bindat[-1].decode('utf-8')
        return SpeciesData(name, comp, model, ref, t_range, data)
 """
 Atomic weights values are used from CIAAW.
 when a single value is given. Available online at
 http://www.ciaaw.org/atomic-weights.htm
 When a range of values is given in the CIAAW table, the "conventional
 atomic weight" from the IUPAC Periodic Table is used. Available
 online at https://iupac.org/wp-content/uploads/2018/12/IUPAC_Periodic_Table-01Dec18.pdf
 """
 atomic_weights = {'H': 1.008, 'He': 4.002602, 'Li': 6.94, 'Be': 9.0121831, 'B': 10.81, 'C': 12.011,
                  'N': 14.007, 'O': 15.999, 'F': 18.998403163, 'Ne': 20.1797, 'Na': 22.98976928,
                  'Mg': 24.305, 'Al': 26.9815384, 'Si': 28.085, 'P': 30.973761998, 'S': 32.06,
                  'Cl': 35.45, 'Ar': 39.95, 'K': 39.0983, 'Ca': 40.078, 'Sc': 44.955908,
                  'Ti': 47.867, 'V': 50.9415, 'Cr': 51.9961, 'Mn': 54.938043, 'Fe': 55.845,
                  'Co': 58.933194, 'Ni': 58.6934, 'Cu': 63.546, 'Zn': 65.38, 'Ga': 69.723,
                  'Ge': 72.63, 'As': 74.921595, 'Se': 78.971, 'Br': 79.904, 'Kr': 83.798,
                  'Rb': 85.4678, 'Sr': 87.62, 'Y': 88.90584, 'Zr': 91.224, 'Nb': 92.90637,
                  'Mo': 95.95, 'Ru': 101.07, 'Rh': 102.90549, 'Pd': 106.42, 'Ag': 107.8682,
                  'Cd': 112.414, 'In': 114.818, 'Sn': 118.71, 'Sb': 121.76, 'Te': 127.6,
                  'I': 126.90447, 'Xe': 131.293, 'Cs': 132.90545196, 'Ba': 137.327, 'La': 138.90547,
                  'Ce': 140.116, 'Pr': 140.90766, 'Nd': 144.242, 'Sm': 150.36, 'Eu': 151.964,
                  'Gd': 157.25, 'Tb': 158.925354, 'Dy': 162.5, 'Ho': 164.930328, 'Er': 167.259,
                  'Tm': 168.934218, 'Yb': 173.045, 'Lu': 174.9668, 'Hf': 178.49, 'Ta': 180.94788,
                  'W': 183.84, 'Re': 186.207, 'Os': 190.23, 'Ir': 192.217, 'Pt': 195.084,
                  'Au': 196.96657, 'Hg': 200.592, 'Tl': 204.38, 'Pb': 207.2, 'Bi': 208.9804,
                  'Th': 232.0377, 'Pa': 231.03588, 'U': 238.02891}
--- a/tests/test_db_reader.py
+++ b/tests/test_db_reader.py
@ -0,0 +1,19 @@
 from gaspype._phys_data import db_reader
 def test_db_reader():
    with open('src/gaspype/data/therm_data.bin', 'rb') as f:
        db = db_reader(f.read())
    assert 'HCl' in db
    assert 'TEST' not in db
    assert db['HCl'].name == 'HCl'
    assert db['CH4'].composition == {'C': 1, 'H': 4}
    assert db['H2O'].model == 9
    for species in db:
        print(species.name)
        assert species.model == 9
        assert len(species.name) > 0
        assert len(species.composition) > 0
        assert any(el in species.name for el in species.composition.keys())