diff --git a/src/gaspype/__init__.py b/src/gaspype/__init__.py
index d4bf323..4e76e32 100644
--- a/src/gaspype/__init__.py
+++ b/src/gaspype/__init__.py
@@ -2,9 +2,9 @@ import numpy as np
 from numpy.typing import NDArray
 from typing import Literal, Sequence, Callable, Any, TypeVar, Iterator, overload
 from math import log as ln, ceil, exp
-import yaml
 from scipy.optimize import minimize, root
 from scipy.linalg import null_space
+from gaspype._phys_data import atomic_weights, db_reader
 import re
 import pkgutil
 
@@ -14,15 +14,9 @@ FloatArray = NDArray[NDFloat]
 
 _T = TypeVar('_T', 'fluid', 'elements')
 
-
-def _get_data(path: str) -> Any:
-    data = pkgutil.get_data(__name__, path)
-    assert data is not None
-    return yaml.safe_load(data)
-
-
-_atomic_weights = _get_data('data/atomic_prop.yaml')['atomic_weights']['data']
-_species_data = {s['name']: s for s in _get_data('data/therm_prop.yaml')['species']}
+data = pkgutil.get_data(__name__, 'data/therm_data.bin')
+assert data is not None, 'Could not load thermodynamic data'
+species_db = db_reader(data)
 
 kB = 1.380649e-23  # J/K
 NA = 6.02214076e23  # 1/mol
@@ -82,21 +76,22 @@ def species(pattern: str = '*', element_names: str | list[str] = [], use_regex:
         elements = set(element_names)
 
     for el in elements:
-        assert el in _atomic_weights, f'element {el} unknown'
+        assert el in atomic_weights, f'element {el} unknown'
 
     if not use_regex:
-        el_pattern = '|'.join([el for el in _atomic_weights.keys()])
+        el_pattern = '|'.join([el for el in atomic_weights.keys()])
         pattern = pattern.replace('*', '.*')
         pattern = pattern.replace('#', '\\d*')
         pattern = pattern.replace('$', '(' + el_pattern + ')')
         pattern = '^' + pattern + '(,.*)?$'
 
-    return sorted(list(
-        [
-            s['name'] for s in _species_data.values()
-            if re.fullmatch(pattern, s['name']) and
-            (len(elements) == 0 or set(s['composition'].keys()).issubset(elements))
-        ]))
+    if element_names == []:
+        return [sn for sn in species_db.names if re.fullmatch(pattern, sn)]
+    else:
+        return [
+            s.name for s in species_db
+            if re.fullmatch(pattern, s.name) and
+            (len(elements) == 0 or set(s.composition.keys()).issubset(elements))]
 
 
 def set_solver(solver: Literal['gibs minimization', 'system of equations']) -> None:
@@ -162,17 +157,17 @@ class fluid_system:
         self._t_offset = int(t_min)
         self.species = species
         self.active_species = species
-        element_compositions: list[dict[str, float]] = list()
+        element_compositions: list[dict[str, int]] = list()
 
         for i, s in enumerate(species):
-            if s not in _species_data:
+            species_data = species_db.read(s)
+            if not species_data:
                 raise Exception(f'Species {s} not found')
-            element_compositions.append(_species_data[s]['composition'])
+            element_compositions.append(species_data.composition)
 
-            data = _species_data[s]['thermo']
-            assert data['model'] == 'NASA9'
+            assert species_data.model == 9, 'Only NASA9 polynomials are supported'
 
-            for t1, t2, a in zip(data['temperature-ranges'][:-1], data['temperature-ranges'][1:], data['data']):
+            for t1, t2, a in zip(species_data.t_range[:-1], species_data.t_range[1:], species_data.data):
 
                 for j, T in enumerate(temperature_base_points):
                     if t2 >= T >= t1:
@@ -193,7 +188,7 @@ class fluid_system:
         self.elements: list[str] = sorted(list(set(k for ac in element_compositions for k in ac.keys())))
         self.array_species_elements = np.array([[ec[el] if el in ec else 0.0 for el in self.elements] for ec in element_compositions])
 
-        self.array_atomic_mass = np.array([_atomic_weights[el] for el in self.elements]) * 1e-3  # kg/mol
+        self.array_atomic_mass = np.array([atomic_weights[el] for el in self.elements]) * 1e-3  # kg/mol
         self.array_molar_mass: FloatArray = np.sum(self.array_atomic_mass * self.array_species_elements, axis=-1)  # kg/mol
 
         self.array_stoichiometric_coefficients: FloatArray = np.array(null_space(self.array_species_elements.T), dtype=NDFloat).T
@@ -315,7 +310,7 @@ class fluid:
 
         self.array_composition: FloatArray = comp_array
         self.total: FloatArray | float = np.sum(self.array_composition, axis=-1, dtype=NDFloat)
-        self.array_fractions = self.array_composition / (np.expand_dims(self.total, -1) + _epsy)
+        self.array_fractions: FloatArray = self.array_composition / (np.expand_dims(self.total, -1) + _epsy)
         self.shape: _Shape = self.array_composition.shape[:-1]
         self.fs = fs
         self.array_elemental_composition: FloatArray = np.dot(self.array_composition, fs.array_species_elements)
@@ -899,7 +894,7 @@ def _equilibrium_eq(fs: fluid_system, element_composition: FloatArray, t: float,
 
         return (np.hstack([eq_resid, ab_resid]), np.concatenate([j_eq, j_ab], axis=0))
 
-    ret = root(residuals, logn_start, jac=True, tol=1e-30)  # type: ignore
+    ret = root(residuals, logn_start, jac=True, tol=1e-30)
     n = np.exp(np.array(ret['x'], dtype=NDFloat))
 
     return n * el_max
diff --git a/src/gaspype/_phys_data.py b/src/gaspype/_phys_data.py
new file mode 100644
index 0000000..f8ce196
--- /dev/null
+++ b/src/gaspype/_phys_data.py
@@ -0,0 +1,137 @@
+import struct
+from typing import Generator, Iterator
+
+
+class SpeciesData():
+    """Class to hold the physical data for a species.
+    Attributes:
+        comp: Dictionary of species composition with element symbols as keys and their counts as values.
+        model: Number of polynomial coefficients used in the model.
+        ref_string: Reference string for the data source.
+        t_range: List of temperatures nodes marking intervals.
+        data: List of lists containing physical data for each temperature interval.
+    """
+
+    def __init__(self, name: str, comp: dict[str, int], model: int, ref: str, t_range: list[float], data: list[list[float]]):
+        self.name = name
+        self.composition: dict[str, int] = comp
+        self.model: int = model
+        self.ref_string: str = ref
+        self.t_range: list[float] = t_range
+        self.data: list[list[float]] = data
+
+    def __repr__(self) -> str:
+        return (f"Name: {self.name}\n" +
+                f"Composition: {self.composition}\n" +
+                f"Model: {self.model}\n" +
+                f"Reference: {self.ref_string}\n" +
+                f"Temperatures: {self.t_range}\n" +
+                f"Data: {self.data}".replace('),', '),\n'))
+
+
+class db_reader():
+    """Class to read and parse the binary gas phase species database.
+
+    Attributes:
+        names: An iterator over the names of species in the database.
+    """
+
+    header_len = 8
+
+    def __init__(self, inp_data: bytes):
+        """ Initializes the database reader with binary data.
+        Args:
+            inp_data: The binary data of the gas phase species database.
+        """
+        assert inp_data[:4] == b'gapy', 'Unknown data format'
+        self._bin_data = inp_data
+        self._name_count = struct.unpack('<I', self._bin_data[4:8])[0]
+        species_names = self._bin_data[db_reader.header_len:(db_reader.header_len + self._name_count)].decode('ASCII').split(' ')
+        self._index = {s: i for i, s in enumerate(species_names)}
+
+    @property
+    def names(self) -> Iterator[str]:
+        return iter(self._index.keys())
+
+    def __iter__(self) -> Generator[SpeciesData, None, None]:
+        return (d for d in (self.read(species) for species in self._index.keys()) if d is not None)
+
+    def __contains__(self, name: str) -> bool:
+        """ Checks if a species name is present in the database.
+        Args:
+            name: The name of the species to check.
+
+        Returns:
+            bool: True if the species name is present
+        """
+        return name in self._index
+
+    def __getitem__(self, name: str) -> SpeciesData:
+        species_data = self.read(name)
+        assert species_data, f"Species '{name}' not found in the database."
+        return species_data
+
+    def read(self, name: str) -> SpeciesData | None:
+        """ Reads the physical data for a given species name from the binary data.
+        Args:
+            name (str): The name of the species to read data for.
+
+        Returns:
+            phys_data: An instance of the phys_data class containing the physical data.
+        """
+        if name not in self._index:
+            return None
+
+        head_offset = self._name_count + db_reader.header_len + self._index[name] * db_reader.header_len
+
+        head = struct.unpack('<I4B', self._bin_data[head_offset:head_offset + db_reader.header_len])
+
+        offset = head[0]
+        composition_count = head[1]
+        model = head[2]
+        temperature_count = head[3]
+        ref_string_len = head[4]
+
+        td_data_num = (temperature_count - 1) * model
+        data_len = composition_count * 3 + (temperature_count + td_data_num) * 4 + ref_string_len
+
+        format_string = '<' + '2sB' * composition_count + f'{temperature_count}f{td_data_num}f{ref_string_len}s'
+
+        bindat = struct.unpack(format_string, self._bin_data[offset:offset + data_len])
+        comp = {bindat[i * 2].strip().decode('ASCII'): bindat[i * 2 + 1] for i in range(composition_count)}
+
+        noffs = composition_count * 2
+        t_range = list(bindat[noffs:noffs + temperature_count])
+
+        noffs += temperature_count
+        data = [list(bindat[(noffs + i * model):(noffs + (i + 1) * model)]) for i in range(temperature_count - 1)]
+        ref = bindat[-1].decode('utf-8')
+
+        return SpeciesData(name, comp, model, ref, t_range, data)
+
+
+"""
+Atomic weights values are used from CIAAW.
+when a single value is given. Available online at
+http://www.ciaaw.org/atomic-weights.htm
+When a range of values is given in the CIAAW table, the "conventional
+atomic weight" from the IUPAC Periodic Table is used. Available
+online at https://iupac.org/wp-content/uploads/2018/12/IUPAC_Periodic_Table-01Dec18.pdf
+"""
+atomic_weights = {'H': 1.008, 'He': 4.002602, 'Li': 6.94, 'Be': 9.0121831, 'B': 10.81, 'C': 12.011,
+                  'N': 14.007, 'O': 15.999, 'F': 18.998403163, 'Ne': 20.1797, 'Na': 22.98976928,
+                  'Mg': 24.305, 'Al': 26.9815384, 'Si': 28.085, 'P': 30.973761998, 'S': 32.06,
+                  'Cl': 35.45, 'Ar': 39.95, 'K': 39.0983, 'Ca': 40.078, 'Sc': 44.955908,
+                  'Ti': 47.867, 'V': 50.9415, 'Cr': 51.9961, 'Mn': 54.938043, 'Fe': 55.845,
+                  'Co': 58.933194, 'Ni': 58.6934, 'Cu': 63.546, 'Zn': 65.38, 'Ga': 69.723,
+                  'Ge': 72.63, 'As': 74.921595, 'Se': 78.971, 'Br': 79.904, 'Kr': 83.798,
+                  'Rb': 85.4678, 'Sr': 87.62, 'Y': 88.90584, 'Zr': 91.224, 'Nb': 92.90637,
+                  'Mo': 95.95, 'Ru': 101.07, 'Rh': 102.90549, 'Pd': 106.42, 'Ag': 107.8682,
+                  'Cd': 112.414, 'In': 114.818, 'Sn': 118.71, 'Sb': 121.76, 'Te': 127.6,
+                  'I': 126.90447, 'Xe': 131.293, 'Cs': 132.90545196, 'Ba': 137.327, 'La': 138.90547,
+                  'Ce': 140.116, 'Pr': 140.90766, 'Nd': 144.242, 'Sm': 150.36, 'Eu': 151.964,
+                  'Gd': 157.25, 'Tb': 158.925354, 'Dy': 162.5, 'Ho': 164.930328, 'Er': 167.259,
+                  'Tm': 168.934218, 'Yb': 173.045, 'Lu': 174.9668, 'Hf': 178.49, 'Ta': 180.94788,
+                  'W': 183.84, 'Re': 186.207, 'Os': 190.23, 'Ir': 192.217, 'Pt': 195.084,
+                  'Au': 196.96657, 'Hg': 200.592, 'Tl': 204.38, 'Pb': 207.2, 'Bi': 208.9804,
+                  'Th': 232.0377, 'Pa': 231.03588, 'U': 238.02891}
diff --git a/tests/test_db_reader.py b/tests/test_db_reader.py
new file mode 100644
index 0000000..29e19bd
--- /dev/null
+++ b/tests/test_db_reader.py
@@ -0,0 +1,19 @@
+from gaspype._phys_data import db_reader
+
+
+def test_db_reader():
+    with open('src/gaspype/data/therm_data.bin', 'rb') as f:
+        db = db_reader(f.read())
+
+    assert 'HCl' in db
+    assert 'TEST' not in db
+    assert db['HCl'].name == 'HCl'
+    assert db['CH4'].composition == {'C': 1, 'H': 4}
+    assert db['H2O'].model == 9
+
+    for species in db:
+        print(species.name)
+        assert species.model == 9
+        assert len(species.name) > 0
+        assert len(species.composition) > 0
+        assert any(el in species.name for el in species.composition.keys())