docstrings added

This commit is contained in:
Nicolas Kruse 2025-02-27 22:47:50 +01:00
parent e8c6c366ff
commit a39cff3a5b
1 changed files with 277 additions and 33 deletions

View File

@ -5,18 +5,53 @@ from typing import TypeVar, Literal, Iterable, Generic
_T = TypeVar('_T') _T = TypeVar('_T')
def open_elf_file(file_path: str): def open_elf_file(file_path: str) -> 'elf_file':
"""Reads ELF data from file
Args:
file_path: path of the ELF file
Returns:
elf_file object
"""
with open(file_path, mode='rb') as f: with open(file_path, mode='rb') as f:
return elf_file(f.read()) return elf_file(f.read())
class elf_symbol(): class elf_symbol():
"""A class for representing data of an ELF symbol
Args:
file: ELF file object
fields: symbol header fields
index: Absolut index in the symbol table
Attributes:
file: Points to the parent ELF file object.
name: Name of the symbol
index: Absolut index in the symbol table
info: Type of the symbol
description: Description of the symbol type
stb: visibility of the symbol (local, global, etc.)
stb_description: Description of the symbol visibility
fields: All symbol header fields as dict
"""
def __init__(self, file: 'elf_file', fields: dict[str, int], index: int): def __init__(self, file: 'elf_file', fields: dict[str, int], index: int):
self.fields = fields self.fields = fields
self.file = file self.file = file
if file.string_table: if file.string_table_section:
self.name = file.read_string(file.string_table['sh_offset'] + fields['st_name']) self.name = file.read_string(file.string_table_section['sh_offset'] + fields['st_name'])
else: else:
self.name = '' self.name = ''
@ -26,13 +61,28 @@ class elf_symbol():
self.stb, self.stb_description = fdat.stb_values[fields['st_info'] >> 4] self.stb, self.stb_description = fdat.stb_values[fields['st_info'] >> 4]
def read_data(self) -> bytes: def read_data(self) -> bytes:
"""Returns the binary data the symbol is pointing to.
The offset in the ELF file is calculated by:
sections[symbol.st_shndx].sh_offset + symbol.st_value
Returns:
Symbol data
"""
offset = self.file.sections[self['st_shndx']]['sh_offset'] + self['st_value'] offset = self.file.sections[self['st_shndx']]['sh_offset'] + self['st_value']
return self.file.read_bytes(offset, self['st_size']) return self.file.read_bytes(offset, self['st_size'])
def read_data_hex(self): def read_data_hex(self) -> str:
return ' '.join(f'{d:02X}' for d in self.read_data()) return ' '.join(f'{d:02X}' for d in self.read_data())
def get_relocations(self) -> 'relocation_list': def get_relocations(self) -> 'relocation_list':
"""List all relocations that are pointing to this symbol.
The symbol must be of type SHT_PROGBITS (program code). Therefore
this function lists typically all relocations that will be
applied to the function represented by the symbol.
Returns:
List of relocations
"""
ret: list[elf_relocation] = list() ret: list[elf_relocation] = list()
section = self.file.sections[self.fields['st_shndx']] section = self.file.sections[self.fields['st_shndx']]
assert section.type == 'SHT_PROGBITS' assert section.type == 'SHT_PROGBITS'
@ -59,14 +109,38 @@ class elf_symbol():
class elf_section(): class elf_section():
"""A class for representing data of an ELF section
Args:
file: ELF file object
fields: Section header fields
name: Name of the section
index: Absolut index in the symbol table
Attributes:
file: Points to the parent ELF file object.
name: Name of the section
index: Absolut index of the section
type: Type of the section
description: Description of the section type
fields: All symbol header fields as dict
"""
def __init__(self, file: 'elf_file', fields: dict[str, int], name: str, index: int): def __init__(self, file: 'elf_file', fields: dict[str, int], name: str, index: int):
self.fields = fields self.fields = fields
self.file = file self.file = file
self.index = index self.index = index
self.name = name self.name = name
self.data = self.file.read_bytes(self['sh_offset'], self['sh_size'])
if fields['sh_type'] > 0x60000000: if fields['sh_type'] > 0x60000000:
# Range for OS, compiler and application specific types
self.description = [v for k, v in fdat.section_header_types_ex.items() if k >= fields['sh_type']][0] self.description = [v for k, v in fdat.section_header_types_ex.items() if k >= fields['sh_type']][0]
self.type = str(hex(fields['sh_type'])) self.type = str(hex(fields['sh_type']))
elif fields['sh_type'] in fdat.section_header_types: elif fields['sh_type'] in fdat.section_header_types:
@ -75,8 +149,18 @@ class elf_section():
self.description = '' self.description = ''
self.type = str(hex(fields['sh_type'])) self.type = str(hex(fields['sh_type']))
def get_data_hex(self): def read_data(self) -> bytes:
return ' '.join(f'{d:02X}' for d in self.data) """Returns the binary data from the section.
The offset in the ELF file is given by: section.sh_offset
Returns:
Data of the section
"""
return self.file.read_bytes(self['sh_offset'], self['sh_size'])
def get_data_hex(self) -> str:
data = self.file.read_bytes(self['sh_offset'], self['sh_size'])
return ' '.join(f'{d:02X}' for d in data)
def __getitem__(self, key: str | int): def __getitem__(self, key: str | int):
if isinstance(key, str): if isinstance(key, str):
@ -93,7 +177,36 @@ class elf_section():
class elf_relocation(): class elf_relocation():
def __init__(self, file: 'elf_file', fields: dict[str, int], symbol_index: int, relocation_type: int, sh_info: int, index: int): """A class for representing data of a relocation
Args:
file: ELF file object
fields: Relocation header fields
symbol_index: Index of the symbol to relocate in the symbol table
relocation_type: Type of the relocation (numeric)
sh_info: Index of the section this relocation applies to
index: Absolut index of the relocation in the associated relocation section
Attributes:
file: Points to the parent ELF file object.
index: Absolut index of the relocation the associated relocation section
type: Type of the relocation
calculation: Description of the relocation calculation
target_section: Pointing to the section that is relocation applies to
fields: All relocation header fields as dict
"""
def __init__(self, file: 'elf_file', fields: dict[str, int], symbol_index: int,
relocation_type: int, sh_info: int, index: int):
self.fields = fields self.fields = fields
self.file = file self.file = file
self.index = index self.index = index
@ -122,6 +235,11 @@ class elf_relocation():
class elf_list(Generic[_T]): class elf_list(Generic[_T]):
"""A generic class for representing a list of ELF data items
Args:
data: Iterable of ELF data items
"""
def __init__(self, data: Iterable[_T]): def __init__(self, data: Iterable[_T]):
self._data = list(data) self._data = list(data)
@ -139,13 +257,43 @@ class elf_list(Generic[_T]):
def __iter__(self): def __iter__(self):
return iter(self._data) return iter(self._data)
def _repr_table(self, format: output_formatter.table_format) -> str: def _compact_table(self) -> tuple[list[str], list[list[str | int]], list[str]]:
return 'not implemented' return [], [[]], []
def to_html(self): def _repr_table(self, format: output_formatter.table_format, raw_data: bool = False):
if raw_data and len(self):
table_dict: list[dict[str, int]] = [el.__dict__.get('fields', {' ': 0}) for el in self]
columns = list(table_dict[0].keys())
data: list[list[str | int]] = [list(el.values()) for el in table_dict]
radj = columns
else:
columns, data, radj = self._compact_table()
return output_formatter.generate_table(data, columns, right_adj_col=radj, format=format)
def to_dict_list(self) -> list[dict[str, str | int]]:
"""Exporting the ELF item data table to a list of dicts. It can be used with pandas:
df = pandas.DataFrame(elements.to_dict_list())
Returns:
Table data
"""
columns, data, _ = self._compact_table()
return [{k: v for k, v in zip(columns, row)} for row in data]
def to_html(self) -> str:
"""Exporting the ELF item data table to HTML.
Returns:
HTML table
"""
return self._repr_table('html') return self._repr_table('html')
def to_markdown(self): def to_markdown(self) -> str:
"""Exporting the ELF item data table to markdown.
Returns:
Markdown table
"""
return self._repr_table('markdown') return self._repr_table('markdown')
def __repr__(self): def __repr__(self):
@ -156,35 +304,74 @@ class elf_list(Generic[_T]):
class section_list(elf_list[elf_section]): class section_list(elf_list[elf_section]):
def _repr_table(self, format: output_formatter.table_format): """A class for representing a list of ELF section
"""
def _compact_table(self):
columns = ['index', 'name', 'type', 'description'] columns = ['index', 'name', 'type', 'description']
data: list[list[str | int]] = [[item.index, item.name, item.type, data: list[list[str | int]] = [[item.index, item.name, item.type,
item.description] for item in self] item.description] for item in self]
return output_formatter.generate_table(data, columns, ['index'], format) return columns, data, ['index']
class symbol_list(elf_list[elf_symbol]): class symbol_list(elf_list[elf_symbol]):
def _repr_table(self, format: output_formatter.table_format): """A class for representing a list of ELF symbols
"""
def _compact_table(self):
columns = ['index', 'name', 'info', 'size', 'stb', 'description'] columns = ['index', 'name', 'info', 'size', 'stb', 'description']
data: list[list[str | int]] = [[item.index, item.name, item.info, item.fields['st_size'], data: list[list[str | int]] = [[item.index, item.name, item.info, item.fields['st_size'],
item.stb, item.description] for item in self] item.stb, item.description] for item in self]
return output_formatter.generate_table(data, columns, ['index', 'size'], format) return columns, data, ['index', 'size']
class relocation_list(elf_list[elf_relocation]): class relocation_list(elf_list[elf_relocation]):
def _repr_table(self, format: output_formatter.table_format): """A class for representing a list of ELF relocations
"""
def _compact_table(self):
columns = ['index', 'symbol name', 'type', 'calculation'] columns = ['index', 'symbol name', 'type', 'calculation']
data: list[list[str | int]] = [[item.index, item.symbol.name, item.type, item.calculation] for item in self] data: list[list[str | int]] = [[item.index, item.symbol.name, item.type,
return output_formatter.generate_table(data, columns, format=format) item.calculation] for item in self]
return columns, data, ['index']
class elf_file: class elf_file:
"""A class for representing data of an ELF file in a structured form
Args:
data: binary ELF data
Attributes:
byteorder: Byte order of the architecture 'little' or 'big'
(based on e_ident[EI_DATA])
bit_width: Bit with of the architecture: 32 or 64 (based on
e_ident[EI_CLASS])
architecture: Name of the architecture (based on e_machine)
fields: All ELF header fields as dict
sections: A list of all ELF sections
symbols: A list of all ELF symbols
functions: A list of all function symbols (STT_FUNC)
objects: A list of all variable/object symbols (STT_OBJECT)
code_relocations: A list of all code relocations (.rela.text and .rel.text)
symbol_table_section: The symbol table section (first section with
the type SHT_SYMTAB)
string_table_section: The string table section (first section with
the name .strtab)
"""
def __init__(self, data: bytes): def __init__(self, data: bytes):
self._data = data self._data = data
# Defaults required for function _read_int_from_elf_field # Defaults required for function _read_int_from_elf_field
self.bit_width = 32 self.bit_width = 32
self.byteorder = 'little' self.byteorder: Literal['little', 'big'] = 'little'
assert self._read_bytes_from_elf_field('e_ident[EI_MAG]') == bytes([0x7F, 0x45, 0x4c, 0x46]), 'Not an ELF file' assert self._read_bytes_from_elf_field('e_ident[EI_MAG]') == bytes([0x7F, 0x45, 0x4c, 0x46]), 'Not an ELF file'
@ -192,7 +379,7 @@ class elf_file:
byte_order = self._read_int_from_elf_field('e_ident[EI_DATA]') byte_order = self._read_int_from_elf_field('e_ident[EI_DATA]')
assert byte_order in [1, 2], 'Invalid byte order value e_ident[EI_DATA]' assert byte_order in [1, 2], 'Invalid byte order value e_ident[EI_DATA]'
self.byteorder: Literal['little', 'big'] = 'little' if byte_order == 1 else 'big' self.byteorder = 'little' if byte_order == 1 else 'big'
self.fields = {fn: self._read_int_from_elf_field(fn) for fn in fdat.elf_header_field.keys()} self.fields = {fn: self._read_int_from_elf_field(fn) for fn in fdat.elf_header_field.keys()}
@ -207,10 +394,10 @@ class elf_file:
for i, (sd, sn) in enumerate(zip(section_data, section_names))) for i, (sd, sn) in enumerate(zip(section_data, section_names)))
ret_sections = [sh for sh in self.sections if sh.type == 'SHT_SYMTAB'] ret_sections = [sh for sh in self.sections if sh.type == 'SHT_SYMTAB']
self.symbol_table = ret_sections[0] if ret_sections else None self.symbol_table_section = ret_sections[0] if ret_sections else None
ret_sections = [sh for sh in self.sections if sh.name == '.strtab'] ret_sections = [sh for sh in self.sections if sh.name == '.strtab']
self.string_table = ret_sections[0] if ret_sections else None self.string_table_section = ret_sections[0] if ret_sections else None
self.symbols = symbol_list(self._list_symbols()) self.symbols = symbol_list(self._list_symbols())
@ -225,10 +412,10 @@ class elf_file:
yield {fn: self._read_from_sh_field(offs, fn) for fn in fdat.section_header.keys()} yield {fn: self._read_from_sh_field(offs, fn) for fn in fdat.section_header.keys()}
def _list_symbols(self): def _list_symbols(self):
if self.symbol_table: if self.symbol_table_section:
offs = self.symbol_table['sh_offset'] offs = self.symbol_table_section['sh_offset']
for j, i in enumerate(range(offs, self.symbol_table['sh_size'] + offs, self.symbol_table['sh_entsize'])): for j, i in enumerate(range(offs, self.symbol_table_section['sh_size'] + offs, self.symbol_table_section['sh_entsize'])):
ret = {'st_name': self.read_int(i, 4)} ret = {'st_name': self.read_int(i, 4)}
if self.bit_width == 32: if self.bit_width == 32:
@ -247,6 +434,18 @@ class elf_file:
yield elf_symbol(self, ret, j) yield elf_symbol(self, ret, j)
def get_relocations(self, reloc_section: elf_section | str | list[str] | None = None) -> relocation_list: def get_relocations(self, reloc_section: elf_section | str | list[str] | None = None) -> relocation_list:
"""List relocations.
Args:
reloc_section: Specifies the relocation section from which the
relocations should be listed. It can be provided as
elf_section object or by its name. If not provided
(reloc_section=None) relocations from all relocation
sections are returned.
Returns:
List of relocations
"""
if isinstance(reloc_section, elf_section): if isinstance(reloc_section, elf_section):
assert reloc_section.type in ('SHT_REL', 'SHT_RELA'), f'{reloc_section.name} is not a relocation section' assert reloc_section.type in ('SHT_REL', 'SHT_RELA'), f'{reloc_section.name} is not a relocation section'
return relocation_list(self._list_relocations(reloc_section)) return relocation_list(self._list_relocations(reloc_section))
@ -262,6 +461,15 @@ class elf_file:
return relocation_list(relocations) return relocation_list(relocations)
def _list_relocations(self, sh: elf_section): def _list_relocations(self, sh: elf_section):
"""List relocations for a elf_section.
Args:
elf_section: Specifies the relocation section from which the
relocations should be listed.
Returns:
Relocations from specified elf_section
"""
offs = sh['sh_offset'] offs = sh['sh_offset']
for i, el_off in enumerate(range(offs, sh['sh_size'] + offs, sh['sh_entsize'])): for i, el_off in enumerate(range(offs, sh['sh_size'] + offs, sh['sh_entsize'])):
ret: dict[str, int] = dict() ret: dict[str, int] = dict()
@ -279,18 +487,54 @@ class elf_file:
ret['r_addend'] = self.read_int(el_off + 16, 8, True) if sh.type == 'SHT_RELA' else 0 ret['r_addend'] = self.read_int(el_off + 16, 8, True) if sh.type == 'SHT_RELA' else 0
yield elf_relocation(self, ret, r_info >> 32, r_info & 0xFFFFFFFF, sh['sh_info'], i) yield elf_relocation(self, ret, r_info >> 32, r_info & 0xFFFFFFFF, sh['sh_info'], i)
def read_bytes(self, offset: int, num_bytes: int): def read_bytes(self, offset: int, num_bytes: int) -> bytes:
"""Read bytes from ELF file.
Args:
offset: Specify first byte relative to the start of
the ELF file.
num_bytes: Specify the number of bytes to read.
Returns:
Binary data as bytes
"""
return self._data[offset:offset + num_bytes] return self._data[offset:offset + num_bytes]
def read_int(self, offset: int, num_bytes: int, signed: bool = False) -> int: def read_int(self, offset: int, num_bytes: int, signed: bool = False) -> int:
"""Read an integer from the ELF file. Byte order is
selected according to the architecture (e_ident[EI_DATA]).
Args:
offset: Specify first byte of the integer relative to
the start of the ELF file.
num_bytes: Specify the size of the integer in bytes.
signed: Select if the integer is a signed integer.
Returns:
Integer value
"""
return int.from_bytes(self._data[offset:offset + num_bytes], self.byteorder, signed=signed) return int.from_bytes(self._data[offset:offset + num_bytes], self.byteorder, signed=signed)
# def int_to_bytes(self, value: int, num_bytes: int = 4, signed: bool = False) -> int: # def int_to_bytes(self, value: int, num_bytes: int = 4, signed: bool = False) -> int:
# return value.to_bytes(length=num_bytes, byteorder=self.byteorder, signed=signed) # return value.to_bytes(length=num_bytes, byteorder=self.byteorder, signed=signed)
def read_string(self, offset: int) -> str: def read_string(self, offset: int, encoding: str = 'utf-8') -> str:
"""Read a zero-terminated text string from the ELF file.
Args:
offset: Specify first byte of the string relative to
the start of the ELF file.
encoding: Encoding used for text decoding.
Returns:
Text string
"""
str_end = self._data.find(b'\x00', offset) str_end = self._data.find(b'\x00', offset)
return self._data[offset:str_end].decode() return self._data[offset:str_end].decode(encoding)
def _read_int_from_elf_field(self, field_name: str) -> int: def _read_int_from_elf_field(self, field_name: str) -> int:
field = fdat.elf_header_field[field_name] field = fdat.elf_header_field[field_name]