diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..10c587e --- /dev/null +++ b/.flake8 @@ -0,0 +1,20 @@ +[flake8] +# Specify the maximum allowed line length +max-line-length = 88 + +# Ignore specific rules +# For example, E501: Line too long, W503: Line break before binary operator +ignore = E501, W503, W504 + +# Exclude specific files or directories +exclude = + .git, + __pycache__, + build, + dist, + .conda + .venv + +# Enable specific plugins or options +# Example: Enabling flake8-docstrings +select = C,E,F,W,D \ No newline at end of file diff --git a/src/pelfy/__init__.py b/src/pelfy/__init__.py index d269240..08ce6c5 100644 --- a/src/pelfy/__init__.py +++ b/src/pelfy/__init__.py @@ -1,11 +1,14 @@ from . import fields_data as fdat -from . import format_output as fout +from . import output_formatter from typing import TypeVar, Literal, Iterable, Generic -T = TypeVar('T') +_T = TypeVar('_T') + def open_elf_file(file_path: str): - return elf_file(file_path) + with open(file_path, mode='rb') as f: + return elf_file(f.read()) + class elf_symbol(): def __init__(self, file: 'elf_file', fields: dict[str, int], index: int): @@ -21,7 +24,7 @@ class elf_symbol(): st_info = fdat.st_info_values[fields['st_info'] & 0x0F] stb = fdat.stb_values[fields['st_info'] >> 4] - + self.stb = stb['name'] self.description = st_info['description'] self.info = st_info['name'] @@ -43,7 +46,7 @@ class elf_symbol(): if 0 <= offset < self['st_size']: ret.append(reloc) return relocation_list(ret) - + def __getitem__(self, key: str): assert key in self.fields, f'Unknown field name: {key}' return self.fields[key] @@ -54,7 +57,7 @@ class elf_symbol(): f'name {self.name}\n' +\ f'stb {self.stb} ({stb["description"]})\n' +\ f'info {self.info} ({self.description})\n' +\ - '\n'.join(f'{k:18} {v:4}' for k, v in self.fields.items()) + '\n' + '\n'.join(f'{k:18} {v:4}' for k, v in self.fields.items()) + '\n' class elf_section(): @@ -63,17 +66,15 @@ class elf_section(): self.file = file self.index = index self.name = name + self.data = self.file.read_bytes(self['sh_offset'], self['sh_size']) assert fields['sh_type'] in fdat.section_header_types, f"Unknown section type: {hex(fields['sh_type'])}" self.description = fdat.section_header_types[fields['sh_type']]['description'] self.type = fdat.section_header_types[fields['sh_type']]['name'] - def read_data(self): - return self.file.read_bytes(self['sh_offset'], self['sh_size']) + def get_data_hex(self): + return ' '.join(f'{d:02X}' for d in self.data) - def read_data_hex(self): - return ' '.join(f'{d:02X}' for d in self.read_data()) - def __getitem__(self, key: str): assert key in self.fields, f'Unknown field name: {key}' return self.fields[key] @@ -82,7 +83,7 @@ class elf_section(): return f'index {self.index}\n' +\ f'name {self.name}\n' +\ f'type {self.type} ({self.description})\n' +\ - '\n'.join(f"{k:18} {v:4} {fdat.section_header[k]['description']}" for k, v in self.fields.items()) + '\n' + '\n'.join(f"{k:18} {v:4} {fdat.section_header[k]['description']}" for k, v in self.fields.items()) + '\n' class elf_relocation(): @@ -93,7 +94,7 @@ class elf_relocation(): self.description = fdat.relocation_table_types[relocation_type]['description'] self.type = fdat.relocation_table_types[relocation_type]['name'] self.target_section = file.sections[sh_info] - + def __getitem__(self, key: str): assert key in self.fields, f'Unknown field name: {key}' return self.fields[key] @@ -101,11 +102,11 @@ class elf_relocation(): def __repr__(self): return f'symbol {self.symbol.name}\n' +\ f'relocation type {self.type} ({self.description})\n' +\ - '\n'.join(f'{k:18} {v:4}' for k, v in self.fields.items()) + '\n' + '\n'.join(f'{k:18} {v:4}' for k, v in self.fields.items()) + '\n' -class elf_list(Generic[T]): - def __init__(self, data: Iterable[T]): +class elf_list(Generic[_T]): + def __init__(self, data: Iterable[_T]): self._data = list(data) def __getitem__(self, key: int | str): @@ -113,18 +114,15 @@ class elf_list(Generic[T]): elements = [el for el in self._data if getattr(el, 'name', '') == key] assert elements, f'Unknown name: {key}' return elements[0] - #elif isinstance(key, slice): - # return elf_list(self._data.__getitem__(key)) else: return self._data.__getitem__(key) def __iter__(self): - """Return an iterator for the list.""" return iter(self._data) def _repr_table(self, format: Literal['text', 'markdown', 'html']) -> str: return 'not implemented' - + def to_html(self): return self._repr_table('html') @@ -133,7 +131,7 @@ class elf_list(Generic[T]): def __repr__(self): return self._repr_table('text') - + def _repr_html_(self): return self._repr_table('html') @@ -141,40 +139,39 @@ class elf_list(Generic[T]): class section_list(elf_list[elf_section]): def _repr_table(self, format: Literal['text', 'markdown', 'html']): - columns = ['index', 'name', 'type', 'description'] - data: list[list[str|int]] = [[item.index, item.name, item.type, + columns = ['index', 'name', 'type', 'description'] + data: list[list[str | int]] = [[item.index, item.name, item.type, item.description] for item in self] - return fout.generate_table(data, columns, ['index'], format) + return output_formatter.generate_table(data, columns, ['index'], format) class symbol_list(elf_list[elf_symbol]): def _repr_table(self, format: Literal['text', 'markdown', 'html']): - columns = ['index', 'name', 'info', 'size', 'stb', 'description'] - data: list[list[str|int]] = [[item.index, item.name, item.info, item.fields['st_size'], + columns = ['index', 'name', 'info', 'size', 'stb', 'description'] + data: list[list[str | int]] = [[item.index, item.name, item.info, item.fields['st_size'], item.stb, item.description] for item in self] - return fout.generate_table(data, columns, ['index', 'size'], format) + return output_formatter.generate_table(data, columns, ['index', 'size'], format) class relocation_list(elf_list[elf_relocation]): def _repr_table(self, format: Literal['text', 'markdown', 'html']): - columns = ['symbol name', 'type', 'description'] - data: list[list[str|int]] = [[item.symbol.name, item.type, item.description] for item in self] - return fout.generate_table(data, columns, format=format) + columns = ['symbol name', 'type', 'description'] + data: list[list[str | int]] = [[item.symbol.name, item.type, item.description] for item in self] + return output_formatter.generate_table(data, columns, format=format) class elf_file: - def __init__(self, file_path: str): - with open(file_path, mode='rb') as f: - self._data = f.read() + def __init__(self, data: bytes): + self._data = data - #Defaults required for function _read_int_from_elf_field + # Defaults required for function _read_int_from_elf_field self.bit_width = 32 self.byteorder = 'little' assert self._read_bytes_from_elf_field('e_ident[EI_MAG]') == bytes([0x7F, 0x45, 0x4c, 0x46]), 'Not an ELF file' - + self.bit_width = {1: 32, 2: 64}[self._read_int_from_elf_field('e_ident[EI_CLASS]')] byte_order = self._read_int_from_elf_field('e_ident[EI_DATA]') @@ -214,23 +211,23 @@ class elf_file: def _list_symbols(self): if self.symbol_table: offs = self.symbol_table['sh_offset'] - - for j, i in enumerate(range(offs, self.symbol_table['sh_size']+offs, self.symbol_table['sh_entsize'])): + + for j, i in enumerate(range(offs, self.symbol_table['sh_size'] + offs, self.symbol_table['sh_entsize'])): ret = {'st_name': self.read_int(i, 4)} - + if self.bit_width == 32: - ret['st_value'] = self.read_int(i+4, 4) - ret['st_size'] = self.read_int(i+8, 4) - ret['st_info'] = self.read_int(i+12, 1) - ret['st_other'] = self.read_int(i+13, 1) - ret['st_shndx'] = self.read_int(i+14, 2) + ret['st_value'] = self.read_int(i + 4, 4) + ret['st_size'] = self.read_int(i + 8, 4) + ret['st_info'] = self.read_int(i + 12, 1) + ret['st_other'] = self.read_int(i + 13, 1) + ret['st_shndx'] = self.read_int(i + 14, 2) elif self.bit_width == 64: - ret['st_info'] = self.read_int(i+4, 1) - ret['st_other'] = self.read_int(i+5, 1) - ret['st_shndx'] = self.read_int(i+6, 2) - ret['st_value'] = self.read_int(i+8, 8) - ret['st_size'] = self.read_int(i+16, 8) - + ret['st_info'] = self.read_int(i + 4, 1) + ret['st_other'] = self.read_int(i + 5, 1) + ret['st_shndx'] = self.read_int(i + 6, 2) + ret['st_value'] = self.read_int(i + 8, 8) + ret['st_size'] = self.read_int(i + 16, 8) + yield elf_symbol(self, ret, j) def get_relocations(self, reloc_section: elf_section | str | None = None) -> relocation_list: @@ -243,43 +240,42 @@ class elf_file: if sh.type == 'SHT_RELA': if sh.name == reloc_section or not isinstance(reloc_section, str): relocations += relocation_list(self._list_relocations(sh)) - + return relocation_list(relocations) def _list_relocations(self, sh: elf_section): - assert sh.type == 'SHT_RELA', \ - 'Section must be a relocation section (currently only SHT_RELA is supported)' + assert sh.type == 'SHT_RELA', 'Section must be a relocation section (currently only SHT_RELA is supported)' offs = sh['sh_offset'] - for i in range(offs, sh['sh_size']+offs, sh['sh_entsize']): + for i in range(offs, sh['sh_size'] + offs, sh['sh_entsize']): ret: dict[str, int] = dict() - + if self.bit_width == 32: ret['r_offset'] = self.read_int(i, 4) - r_info = self.read_int(i+4, 4) + r_info = self.read_int(i + 4, 4) ret['r_info'] = r_info - ret['r_addend'] = self.read_int(i+8, 4, True) + ret['r_addend'] = self.read_int(i + 8, 4, True) yield elf_relocation(self, ret, r_info >> 8, r_info & 0xFF, sh['sh_info']) elif self.bit_width == 64: ret['r_offset'] = self.read_int(i, 8) - r_info = self.read_int(i+8, 8) + r_info = self.read_int(i + 8, 8) ret['r_info'] = r_info - ret['r_addend'] = self.read_int(i+16, 8, True) + ret['r_addend'] = self.read_int(i + 16, 8, True) yield elf_relocation(self, ret, r_info >> 32, r_info & 0xFFFFFFFF, sh['sh_info']) - + def read_bytes(self, offset: int, num_bytes: int): return self._data[offset:offset + num_bytes] def read_int(self, offset: int, num_bytes: int, signed: bool = False) -> int: - return int.from_bytes(self._data[offset:offset+num_bytes], self.byteorder, signed=signed) + return int.from_bytes(self._data[offset:offset + num_bytes], self.byteorder, signed=signed) # def int_to_bytes(self, value: int, num_bytes: int = 4, signed: bool = False) -> int: # return value.to_bytes(length=num_bytes, byteorder=self.byteorder, signed=signed) - + def read_string(self, offset: int) -> str: str_end = self._data.find(b'\x00', offset) return self._data[offset:str_end].decode() - + def _read_int_from_elf_field(self, field_name: str) -> int: field = fdat.elf_header_field[field_name] offs = int(field[str(self.bit_width)], base=16) @@ -291,7 +287,7 @@ class elf_file: offs = int(field[str(self.bit_width)], base=16) byte_len = int(field['size' + str(self.bit_width)]) return self.read_bytes(offs, byte_len) - + def _read_from_sh_field(self, offset: int, field_name: str) -> int: field = fdat.section_header[field_name] offs = int(field[str(self.bit_width)], base=16) + offset @@ -305,4 +301,3 @@ class elf_file: def __getitem__(self, key: str): assert key in self.fields, f'Unknown field name: {key}' return self.fields[key] - \ No newline at end of file diff --git a/src/pelfy/fields_data.py b/src/pelfy/fields_data.py index 0ce3ac2..ec78a55 100644 --- a/src/pelfy/fields_data.py +++ b/src/pelfy/fields_data.py @@ -195,6 +195,5 @@ e_machine_dict = { 0x0032: {"name": "EM_S390", "description": "IBM S/390"}, 0x003E: {"name": "EM_X86_64", "description": "x86-64 (64-bit)"}, 0x00F3: {"name": "EM_AARCH64", "description": "ARM64 (64-bit)"}, - 0x0103: {"name": "EM_RISCV", "description": "RISC-V (32/64-bit)"}, - 0xF3: {"name": "EM_BPF", "description": "BPF (Berkeley Packet Filter)"} -} \ No newline at end of file + 0x0103: {"name": "EM_RISCV", "description": "RISC-V (32/64-bit)"} +} diff --git a/src/pelfy/format_output.py b/src/pelfy/output_formatter.py similarity index 91% rename from src/pelfy/format_output.py rename to src/pelfy/output_formatter.py index b51f976..8f37f12 100644 --- a/src/pelfy/format_output.py +++ b/src/pelfy/output_formatter.py @@ -14,32 +14,32 @@ def generate_table(data: list[list[Any]], columns: list[str], def generate_md_table(data: list[list[Any]], columns: list[str], right_adj_col: list[str] = []) -> str: column_widths = [max(len(str(item)) for item in column) for column in zip(*([columns] + data))] - + table = '' formatted_row = ' | '.join(f"{str(item):<{column_widths[i]}}" for i, item in enumerate(columns)) table += '| ' + formatted_row + ' |\n' - + table += '|-' + '-|-'.join('-' * width for width in column_widths) + '-|\n' - + for row in data: formatted_row = '' for i, item in enumerate(row): if columns[i] in right_adj_col: - formatted_row += f"| {str(item):>{column_widths[i]}} " + formatted_row += f"| {str(item):>{column_widths[i]}} " else: formatted_row += f"| {str(item):<{column_widths[i]}} " table += formatted_row + "|\n" - + return table def generate_html_table(data: list[list[Any]], columns: list[str], right_adj_col: list[str] = []) -> str: table_html = "\n" - + table_html += "\n\n" for column in columns: - table_html += f" \n" + table_html += f" \n" table_html += "\n\n" table_html += "\n" @@ -49,10 +49,10 @@ def generate_html_table(data: list[list[Any]], columns: list[str], right_adj_col if columns[i] in right_adj_col: table_html += f" \n" else: - table_html += f" \n" + table_html += f" \n" table_html += "\n" table_html += "\n" table_html += "
{column}{column}
{item}{item}{item}
\n" - return table_html \ No newline at end of file + return table_html diff --git a/tests/test_example_elfs.py b/tests/test_example_elfs.py index adacd12..48fad9f 100644 --- a/tests/test_example_elfs.py +++ b/tests/test_example_elfs.py @@ -1,11 +1,13 @@ import pelfy + def test_simple_c(): - elf = pelfy.elf_file('tests/obj/test3_o3.o') + elf = pelfy.open_elf_file('tests/obj/test3_o3.o') print(elf.sections) print(elf.symbols) print(elf.code_relocations) -if __name__ == '__main__':# - test_simple_c() \ No newline at end of file + +if __name__ == '__main__': + test_simple_c()