This commit is contained in:
Nicolas Kruse 2026-02-27 16:59:01 +00:00 committed by GitHub
commit bf1559609a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 8550 additions and 25 deletions

View File

@ -1,6 +1,6 @@
[project] [project]
name = "pelfy" name = "pelfy"
version = "1.0.7" version = "1.0.8"
authors = [ authors = [
{ name="Nicolas Kruse", email="nicolas.kruse@nonan.net" }, { name="Nicolas Kruse", email="nicolas.kruse@nonan.net" },
] ]

View File

@ -390,38 +390,51 @@ relocation_table_types = {
}, },
"EM_ARM": { "EM_ARM": {
0: ("R_ARM_NONE", 0, ""), 0: ("R_ARM_NONE", 0, ""),
1: ("R_ARM_PC24", 24, "S - P + A"), 1: ("R_ARM_PC24", 24, "S - P + A"),
2: ("R_ARM_ABS32", 32, "S + A"), 2: ("R_ARM_ABS32", 32, "S + A"),
3: ("R_ARM_REL32", 32, "S - P + A"), 3: ("R_ARM_REL32", 32, "S - P + A"),
4: ("R_ARM_PC13", 13, "S - P + A"), 4: ("R_ARM_LDR_PC_G0", 12, "S - P + A"),
5: ("R_ARM_ABS16", 16, "S + A"), 5: ("R_ARM_ABS16", 16, "S + A"),
6: ("R_ARM_ABS12", 12, "S + A"), 6: ("R_ARM_ABS12", 12, "S + A"),
7: ("R_ARM_THM_ABS5", 5, "S + A"), 7: ("R_ARM_THM_ABS5", 5, "S + A"),
8: ("R_ARM_ABS8", 8, "S + A"), 8: ("R_ARM_ABS8", 8, "S + A"),
9: ("R_ARM_SBREL32", 32, "S - B + A"), 9: ("R_ARM_SBREL32", 32, "S - B + A"),
10: ("R_ARM_THM_PC22", 22, "S - P + A"),
10: ("R_ARM_THM_CALL", 22, "S - P + A"),
11: ("R_ARM_THM_PC8", 8, "S - P + A"), 11: ("R_ARM_THM_PC8", 8, "S - P + A"),
12: ("Reserved", 0, ""), 12: ("R_ARM_BREL_ADJ", 0, ""),
13: ("R_ARM_SWI24", 24, "S + A"), 13: ("R_ARM_TLS_DESC", 0, ""),
14: ("R_ARM_THM_SWI8", 8, "S + A"), 14: ("R_ARM_THM_SWI8", 8, "S + A"),
15: ("R_ARM_XPC25", 25, ""), 15: ("R_ARM_XPC25", 25, ""),
16: ("R_ARM_THM_XPC22", 22, ""), 16: ("R_ARM_THM_XPC22", 22, ""),
28: ("R_ARM_CALL", 24, "((S + A) - P) >> 2"), 28: ("R_ARM_CALL", 24, "((S + A) - P) >> 2"),
29: ("R_ARM_JUMP24", 24, "((S + A) - P) >> 2"), 29: ("R_ARM_JUMP24", 24, "((S + A) - P) >> 2"),
30: ("R_ARM_TLS_DESC", 0, ""), 30: ("R_ARM_THM_JUMP24", 24, "((S + A) - P) >> 1"),
32: ("R_ARM_ALU_PCREL_7_0", 7, "(S - P + A) & 0x000000FF"),
33: ("R_ARM_ALU_PCREL_15_8", 15, "(S - P + A) & 0x0000FF00"), 32: ("R_ARM_ALU_PCREL_7_0", 8, "(S - P + A) & 0x000000FF"),
34: ("R_ARM_ALU_PCREL_23_15", 23, "(S - P + A) & 0x00FF0000"), 33: ("R_ARM_ALU_PCREL_15_8", 8, "(S - P + A) & 0x0000FF00"),
35: ("R_ARM_LDR_SBREL_11_0", 11, "(S - B + A) & 0x00000FFF"), 34: ("R_ARM_ALU_PCREL_23_15", 9, "(S - P + A) & 0x00FF8000"),
36: ("R_ARM_ALU_SBREL_19_12", 19, "(S - B + A) & 0x000FF000"),
37: ("R_ARM_ALU_SBREL_27_20", 27, "(S - B + A) & 0x0FF00000"), 35: ("R_ARM_LDR_SBREL_11_0", 12, "(S - B + A) & 0x00000FFF"),
38: ("R_ARM_RELABS32", 32, "S + A or S - P + A"), 36: ("R_ARM_ALU_SBREL_19_12", 8, "(S - B + A) & 0x000FF000"),
39: ("R_ARM_ROSEGREL32", 32, "S - E + A"), 37: ("R_ARM_ALU_SBREL_27_20", 8, "(S - B + A) & 0x0FF00000"),
38: ("R_ARM_TARGET1", 32, "implementation defined"),
39: ("R_ARM_SBREL31", 31, "S - B + A"),
40: ("R_ARM_V4BX", 0, ""), 40: ("R_ARM_V4BX", 0, ""),
41: ("R_ARM_STKCHK", 0, ""), 41: ("R_ARM_TARGET2", 32, "implementation defined"),
42: ("R_ARM_THM_STKCHK", 0, ""), 42: ("R_ARM_PREL31", 31, "S - P + A"),
43: ("R_ARM_MOVW_ABS_NC", 16, "S + A"), 43: ("R_ARM_MOVW_ABS_NC", 16, "S + A"),
44: ("R_ARM_MOVT_ABS", 16, "S + A") 44: ("R_ARM_MOVT_ABS", 16, "S + A"),
47: ("R_ARM_THM_MOVW_ABS_NC", 16, "S + A"),
48: ("R_ARM_THM_MOVT_ABS", 16, "S + A"),
}, },
"EM_AARCH64": { "EM_AARCH64": {
0: ("R_AARCH64_NONE", 0, ""), 0: ("R_AARCH64_NONE", 0, ""),

View File

@ -9,6 +9,7 @@ Typical usage example:
from . import _fields_data as fdat from . import _fields_data as fdat
from . import _output_formatter from . import _output_formatter
from typing import TypeVar, Literal, Iterable, Generic, Iterator, Generator, Optional, Union from typing import TypeVar, Literal, Iterable, Generic, Iterator, Generator, Optional, Union
import warnings
_T = TypeVar('_T') _T = TypeVar('_T')
@ -26,6 +27,53 @@ def open_elf_file(file_path: str) -> 'elf_file':
return elf_file(f.read()) return elf_file(f.read())
def _decode_thumb_branch_imm(field: int, bits: int) -> int:
"""
Decode Thumb-2 wide branch immediate.
bits: 22 (R_ARM_THM_PC22) or 24 (R_ARM_THM_JUMP24)
"""
h1 = (field >> 16) & 0xFFFF
h2 = field & 0xFFFF
S = (h1 >> 10) & 1
imm10 = h1 & 0x03FF
J1 = (h2 >> 13) & 1
J2 = (h2 >> 11) & 1
imm11 = h2 & 0x07FF
# Decode J1/J2 → I1/I2
I1 = (~(J1 ^ S)) & 1
I2 = (~(J2 ^ S)) & 1
if bits == 24:
imm = (
(S << 23) |
(I1 << 22) |
(I2 << 21) |
(imm10 << 11) |
(imm11 << 0)
)
sign_bit = 23
else:
assert bits == 22
imm = (
(S << 21) |
(I1 << 20) |
(I2 << 19) |
(imm10 << 9) |
(imm11 << 0)
)
sign_bit = 21
# Sign extend
if imm & (1 << sign_bit):
imm |= ~((1 << (sign_bit + 1)) - 1)
# Thumb branch offsets are halfword aligned
return imm << 1
class elf_symbol(): class elf_symbol():
"""A class for representing data of an ELF symbol """A class for representing data of an ELF symbol
@ -42,6 +90,7 @@ class elf_symbol():
offset_in_section: Position of first symbol byte offset_in_section: Position of first symbol byte
relative to section start relative to section start
offset_in_file: Position of first symbol byte in object file offset_in_file: Position of first symbol byte in object file
size: size of symbol in bytes
fields: All symbol header fields as dict fields: All symbol header fields as dict
""" """
@ -71,19 +120,17 @@ class elf_symbol():
self.thumb_mode = bool((file.architecture == 'EM_ARM') & fields['st_value'] & 1) self.thumb_mode = bool((file.architecture == 'EM_ARM') & fields['st_value'] & 1)
self.offset_in_section = fields['st_value'] & ~int(self.thumb_mode) self.offset_in_section = fields['st_value'] & ~int(self.thumb_mode)
self.offset_in_file = self.section['sh_offset'] + self.offset_in_section if self.section else 0 self.offset_in_file = self.section['sh_offset'] + self.offset_in_section if self.section else 0
self.size = self.fields['st_size']
@property @property
def data(self) -> bytes: def data(self) -> bytes:
"""Returns the binary data the symbol is pointing to. """Returns the binary data the symbol is pointing to.
The offset in the ELF file is calculated by:
sections[symbol.st_shndx].sh_offset + symbol.st_value
""" """
assert self.section, 'This symbol is not associated to a data section' assert self.section, 'This symbol is not associated to a data section'
if self.section.type == 'SHT_NOBITS': if self.section.type == 'SHT_NOBITS':
return b'\x00' * self['st_size'] return b'\x00' * self['st_size']
else: else:
offset = self.section['sh_offset'] + self['st_value'] return self.file.read_bytes(self.offset_in_file, self['st_size'])
return self.file.read_bytes(offset, self['st_size'])
@property @property
def data_hex(self) -> str: def data_hex(self) -> str:
@ -102,7 +149,7 @@ class elf_symbol():
assert self.section and self.section.type == 'SHT_PROGBITS' assert self.section and self.section.type == 'SHT_PROGBITS'
for reloc in self.file.get_relocations(): for reloc in self.file.get_relocations():
if reloc.target_section == self.section: if reloc.target_section == self.section:
offset = reloc['r_offset'] - self['st_value'] offset = reloc['r_offset'] - self.offset_in_section
if 0 <= offset < self['st_size']: if 0 <= offset < self['st_size']:
ret.append(reloc) ret.append(reloc)
return relocation_list(ret) return relocation_list(ret)
@ -528,7 +575,7 @@ class elf_file:
if reloc_types and 'A' in reloc_types[relocation_type][2]: if reloc_types and 'A' in reloc_types[relocation_type][2]:
name = reloc_types[relocation_type][0] name = reloc_types[relocation_type][0]
sh = self.sections[reloc_section['sh_info']] sh = self.sections[reloc_section['sh_info']]
field = self.read_int(r_offset + sh['sh_offset'], 4, True) field = self.read_int(r_offset + sh['sh_offset'], 4, False)
if name in ('R_386_PC32', 'R_386_32', 'R_X86_64_PC32', 'R_X86_64_PLT32', 'R_ARM_REL32', 'R_ARM_ABS32'): if name in ('R_386_PC32', 'R_386_32', 'R_X86_64_PC32', 'R_X86_64_PLT32', 'R_ARM_REL32', 'R_ARM_ABS32'):
return field return field
if name == 'R_ARM_MOVW_ABS_NC': if name == 'R_ARM_MOVW_ABS_NC':
@ -544,11 +591,24 @@ class elf_file:
if imm24 & 0x800000: if imm24 & 0x800000:
imm24 |= ~0xFFFFFF imm24 |= ~0xFFFFFF
return imm24 << 2 return imm24 << 2
if name == 'R_ARM_THM_PC22':
return _decode_thumb_branch_imm(field, 22)
if name in ('R_ARM_THM_JUMP24', 'R_ARM_THM_CALL'):
return _decode_thumb_branch_imm(field, 24)
if name == 'R_ARM_THM_MOVW_ABS_NC' or name == 'R_ARM_THM_MOVT_ABS':
i = (field >> 10) & 1
imm4 = field & 0xF
imm3 = (field >> 28) & 0x7
imm8 = (field >> 16) & 0xFF
imm16 = imm8 | (imm3 << 8) | (i << 11) | (imm4 << 12)
if name == 'R_ARM_THM_MOVT_ABS':
return imm16 << 16
return imm16
if '_THM_' in name: if '_THM_' in name:
print('Warning: Thumb relocation addend extraction is not implemented') warnings.warn(f'Thumb relocation addend extraction is for {name} not implemented', stacklevel=2)
return 0 return 0
if '_MIPS_' in name: if '_MIPS_' in name:
print('Warning: MIPS relocations addend extraction is not implemented') warnings.warn('Warning: MIPS relocations addend extraction is not implemented', stacklevel=2)
return 0 return 0
raise NotImplementedError(f"Relocation addend extraction for {name} is not implemented") raise NotImplementedError(f"Relocation addend extraction for {name} is not implemented")

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

65
tests/test_arm_addend.py Normal file
View File

@ -0,0 +1,65 @@
import pelfy._main as _main
import os
def test_arm_addend_extraction():
# Path to the test object file
obj_path = os.path.join('tests', 'obj', 'stencils_armv7_O3.o')
elf = _main.open_elf_file(obj_path)
# Collect all ARM relocations
reloc_addends: list[tuple[str, int, int, str]] = []
for reloc in elf.get_relocations():
if reloc.type.startswith('R_ARM'):
reloc_addends.append((reloc.type, reloc['r_offset'], reloc['r_addend'], reloc.symbol.name))
# Reference values from stencils_armv7_O3.asm (addend = 0 for V4BX, -8 for JUMP24/CALL)
reference = [
('R_ARM_V4BX', 0xB4, 0, ''),
('R_ARM_V4BX', 0xC0, 0, ''),
('R_ARM_V4BX', 0xD0, 0, ''),
('R_ARM_V4BX', 0x114, 0, ''),
('R_ARM_V4BX', 0x144, 0, ''),
('R_ARM_V4BX', 0x148, 0, ''),
('R_ARM_JUMP24', 0x124, -8, '__aeabi_idiv0'),
('R_ARM_JUMP24', 0x14, -8, 'auxsub_get_42'),
('R_ARM_CALL', 0x10, -8, 'result_int'),
('R_ARM_JUMP24', 0xC, -8, 'result_float_int'),
('R_ARM_JUMP24', 0xC, -8, 'result_float_float'),
('R_ARM_JUMP24', 0xC, -8, 'result_int_int'),
('R_ARM_JUMP24', 0xC, -8, 'result_int_float'),
('R_ARM_CALL', 0xC, -8, 'aux_get_42'),
('R_ARM_JUMP24', 0x14, -8, 'result_float'),
('R_ARM_CALL', 0x4, -8, 'aux_get_42'),
('R_ARM_JUMP24', 0xC, -8, 'result_float'),
('R_ARM_JUMP24', 0x4, -8, 'result_int'),
('R_ARM_JUMP24', 0x4, -8, 'result_float'),
('R_ARM_JUMP24', 0x2C, -8, 'result_float'),
('R_ARM_CALL', 0x30, -8, 'sqrtf'),
('R_ARM_JUMP24', 0x24, -8, 'result_float'),
('R_ARM_CALL', 0x28, -8, 'sqrtf'),
('R_ARM_CALL', 0xC, -8, 'expf'),
('R_ARM_JUMP24', 0x14, -8, 'result_float'),
('R_ARM_CALL', 0x4, -8, 'expf'),
('R_ARM_JUMP24', 0xC, -8, 'result_float'),
('R_ARM_CALL', 0xC, -8, 'logf'),
('R_ARM_JUMP24', 0x14, -8, 'result_float'),
('R_ARM_CALL', 0x4, -8, 'logf'),
('R_ARM_JUMP24', 0xC, -8, 'result_float'),
('R_ARM_CALL', 0xC, -8, 'sinf'),
('R_ARM_JUMP24', 0x14, -8, 'result_float'),
('R_ARM_CALL', 0x4, -8, 'sinf'),
('R_ARM_JUMP24', 0xC, -8, 'result_float'),
]
# For each reference, check that at least one matching relocation has the expected addend
for ref_type, ref_offset, ref_addend, ref_symbol in reference:
found = False
addend = None
for typ, offset, addend, symbol in reloc_addends:
if typ == ref_type and offset == ref_offset and symbol == ref_symbol and addend == ref_addend:
found = True
break
assert found, f"Missing or incorrect addend for {ref_type} offset=0x{ref_offset:X} symbol={ref_symbol} (value={addend}, expected {ref_addend})"
print(found, f"Missing or incorrect addend for {ref_type} offset=0x{ref_offset:X} symbol={ref_symbol} (value={addend}, expected {ref_addend})")
assert False

View File

@ -0,0 +1,65 @@
import pelfy._main as _main
import os
def test_thumb_addend_extraction():
# Path to the test object file
obj_path = os.path.join('tests', 'obj', 'stencils_armv7thumb_O3_THM_MOVW.o')
elf = _main.open_elf_file(obj_path)
# Collect all relocations of interest
reloc_addends: list[tuple[str, int, int, str]] = []
for reloc in elf.get_relocations():
if reloc.type.startswith('R_ARM_THM'):
reloc_addends.append((reloc.type, reloc['r_offset'], reloc['r_addend'], reloc.symbol.name))
# Reference values from the .asm file (addend = 0 for all Thumb relocations)
reference = [
('R_ARM_THM_MOVW_ABS_NC', None, 0, 'dummy_int'),
('R_ARM_THM_MOVT_ABS', None, 0, 'dummy_int'),
('R_ARM_THM_MOVW_ABS_NC', None, 0, 'dummy_float'),
('R_ARM_THM_MOVT_ABS', None, 0, 'dummy_float'),
('R_ARM_THM_JUMP24', 0x14, 0, 'auxsub_get_42'),
('R_ARM_THM_CALL', 0xA, 0, 'result_int'),
('R_ARM_THM_JUMP24', 0xA, 0, 'result_float_int'),
('R_ARM_THM_JUMP24', 0xC, 0, 'result_float_float'),
('R_ARM_THM_JUMP24', 0xA, 0, 'result_int_int'),
('R_ARM_THM_JUMP24', 0xC, 0, 'result_int_float'),
('R_ARM_THM_CALL', 0xA, 0, 'aux_get_42'),
('R_ARM_THM_JUMP24', 0x12, 0, 'result_float'),
('R_ARM_THM_CALL', 0x2, 0, 'aux_get_42'),
('R_ARM_THM_JUMP24', 0xA, 0, 'result_float'),
('R_ARM_THM_JUMP24', 0x2, 0, 'result_int'),
('R_ARM_THM_JUMP24', 0x4, 0, 'result_float'),
('R_ARM_THM_JUMP24', 0x28, 0, 'result_float'),
('R_ARM_THM_CALL', 0x2C, 0, 'sqrtf'),
('R_ARM_THM_JUMP24', 0x20, 0, 'result_float'),
('R_ARM_THM_CALL', 0x24, 0, 'sqrtf'),
('R_ARM_THM_CALL', 0xA, 0, 'expf'),
('R_ARM_THM_JUMP24', 0x12, 0, 'result_float'),
('R_ARM_THM_CALL', 0x2, 0, 'expf'),
('R_ARM_THM_JUMP24', 0xA, 0, 'result_float'),
('R_ARM_THM_CALL', 0xA, 0, 'logf'),
('R_ARM_THM_JUMP24', 0x12, 0, 'result_float'),
('R_ARM_THM_CALL', 0x2, 0, 'logf'),
('R_ARM_THM_JUMP24', 0xA, 0, 'result_float'),
('R_ARM_THM_CALL', 0xA, 0, 'sinf'),
('R_ARM_THM_JUMP24', 0x12, 0, 'result_float'),
('R_ARM_THM_CALL', 0x2, 0, 'sinf'),
('R_ARM_THM_JUMP24', 0xA, 0, 'result_float'),
('R_ARM_THM_CALL', 0xA, 0, 'cosf'),
('R_ARM_THM_JUMP24', 0x12, 0, 'result_float'),
('R_ARM_THM_CALL', 0x2, 0, 'cosf'),
('R_ARM_THM_JUMP24', 0xA, 0, 'result_float'),
('R_ARM_THM_CALL', 0xA, 0, 'tanf'),
('R_ARM_THM_JUMP24', 0x12, 0, 'result_float'),
('R_ARM_THM_CALL', 0x2, 0, 'tanf'),
]
# For each reference, check that at least one matching relocation has the expected addend
for ref_type, _, ref_addend, ref_symbol in reference:
found = False
addend = 0
for typ, offset, addend, symbol in reloc_addends:
if typ == ref_type and symbol == ref_symbol and addend == ref_addend:
found = True
break
assert found, f"Missing or incorrect addend for {ref_type} {ref_symbol} (value={addend:X}, expected {ref_addend})"