import logging
from cle.errors import CLEOperationError
from .elfreloc import ELFReloc
from .generic import (
GenericAbsoluteAddendReloc,
GenericCopyReloc,
GenericJumpslotReloc,
GenericPCRelativeAddendReloc,
GenericRelativeReloc,
GenericTLSDoffsetReloc,
GenericTLSModIdReloc,
GenericTLSOffsetReloc,
RelocGOTMixin,
RelocTruncate32Mixin,
)
log = logging.getLogger(name=__name__)
arch = "ARM"
# Reference: "ELF for the ARM Architecture ABI r2.10"
# http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044e/IHI0044E_aaelf.pdf
def _applyReloc(inst, result, mask=0xFFFFFFFF):
"""
Applies the specified mask to the relocation and verifies that the mask
is valid for the given result.
"""
try:
if result & ~mask:
raise ValueError("result & ~mask is not 0.")
except ValueError as ex:
log.warning("Relocation failed: %r", ex)
return 0 # worst case, you hook it yourself
return (inst & ~mask) | (result & mask) # pylint: disable=superfluous-parens
def _isThumbFunc(symbol, addr):
"""
Checks whether the provided symbol and address is a Thumb function by
verifying the LSB is 1 and the symbol is STT_FUNC.
"""
return (addr % 2 == 1) and symbol.is_function
[docs]
class R_ARM_CALL(ELFReloc):
"""
Relocate R_ARM_CALL symbols via instruction modification. It additionally
handles R_ARM_PC24 and R_ARM_JUMP24. The former is deprecated and is now
just the same as R_ARM_CALL.
R_ARM_JUMP24 doesn't need the Thumb check. Technically, if the Thumb check
succeeds on R_ARM_JUMP24, it's a bad call that shouldn't have been generated
by the linker, so we may as well as just treat it like R_ARM_CALL.
- Class: Static
- Type: ARM (R_ARM_CALL, R_ARM_JUMP24); Deprecated (R_ARM_PC24)
- Code: 1 (R_ARM_PC24), 28 (R_ARM_CALL), 29 (R_ARM_JUMP24)
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
P = self.rebased_addr # Location of this instruction
A = inst = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
if inst & 0x00800000:
A |= 0xFF000000 # Sign extend to 32-bits
result = ((S + (A << 2)) | T) - P # Do the initial work
imm24 = (result & 0x03FFFFFE) >> 2 # Sign_extend(inst[25:2])
if T: # Do Thumb relocation
mask = 0xFF000000
bit_h = (result & 0x02) >> 1
result = _applyReloc(inst, (0xFA | bit_h), mask)
else: # Do ARM relocation
mask = 0xFFFFFF
result = _applyReloc(inst, imm24, mask)
log.debug("%s relocated as R_ARM_CALL with new instruction: %#x", self.symbol.name, result)
return result
[docs]
class R_ARM_PREL31(ELFReloc):
"""
Relocate R_ARM_PREL31 symbols via instruction modification. The difference
between this and R_ARM_CALL/R_ARM_PC24/R_ARM_JUMP24 is that it's a data
relocation
- Class: Static
- Type: Data
- Code: 42
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
P = self.rebased_addr # Location of this instruction
A = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
if A & 0x01000000:
A |= 0xF1000000 # Sign extend 31-bits
result = ((S + A) | T) - P # Do the initial work
mask = 0x7FFFFFFF
rel31 = result & mask
result = _applyReloc(A, rel31, mask)
log.debug("%s relocated as R_ARM_PREL31 to: 0x%x", self.symbol.name, result)
return result
[docs]
class R_ARM_REL32(ELFReloc):
"""
Relocate R_ARM_REL32 symbols. This is essentially the same as
GenericPCRelativeAddendReloc with the addition of a check
for whether or not the target is Thumb.
- Class: Static
- Type: Data
- Code: 3
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
P = self.rebased_addr # Location of this instruction
A = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
result = ((S + A) | T) - P
log.debug("%s relocated as R_ARM_REL32 to: 0x%x", self.symbol.name, result)
return result
[docs]
class R_ARM_ABS32(ELFReloc):
"""
Relocate R_ARM_ABS32 symbols. This is essentially the same as
GenericAbsoluteAddendReloc with the addition of a check
for whether or not the target is Thumb.
- Class: Static
- Type: Data
- Code: 3
- Operation: (S + A) | T
- S is the address of the symbol
- A is the addend
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
A = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
result = (S + A) | T
log.debug("%s relocated as R_ARM_ABS32 to: 0x%x", self.symbol.name, result)
return result
[docs]
class R_ARM_MOVW_ABS_NC(ELFReloc):
"""
Relocate R_ARM_MOVW_ABS_NC symbols.
- Class: Static
- Type: Instruction
- Code: 43
- Operation: (S + A) | T
- S is the address of the symbol
- A is the addend
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction
"""
@property
def value(self):
inst = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = ((inst & 0xF0000) >> 4) | (inst & 0xFFF)
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
X = (S + A) | T
MaskX = X & 0xFFFF
# inst modification:
part1 = MaskX >> 12
part2 = MaskX & 0xFFF
inst &= 0xFFF0F000 # clears inst[11, 0] and inst[19, 16]
inst |= (part1 << 16) & 0xF0000 # inst[19, 16] = part1
inst |= part2 & 0xFFF # inst[11, 0] = part2
log.debug("%s relocated as R_ARM_MOVW_ABS_NC to: 0x%x", self.symbol.name, inst)
return inst
[docs]
class R_ARM_MOVT_ABS(ELFReloc):
"""
Relocate R_ARM_MOVT_ABS symbols.
- Class: Static
- Type: Instruction
- Code: 44
- Operation: S + A
- S is the address of the symbol
- A is the addend
"""
@property
def value(self):
inst = self.addend # The instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = ((inst & 0xF0000) >> 4) | (inst & 0xFFF)
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
X = S + A
MaskX = X & 0xFFFF0000
# inst modification:
part1 = (MaskX >> 16) >> 12
part2 = (MaskX >> 16) & 0xFFF
inst &= 0xFFF0F000 # clears inst[11, 0] and inst[19, 16]
inst |= (part1 << 16) & 0xF0000 # inst[19, 16] = part1
inst |= part2 & 0xFFF # inst[11, 0] = part2
log.debug("%s relocated as R_ARM_MOVT_ABS to: 0x%x", self.symbol.name, inst)
return inst
[docs]
class R_ARM_THM_CALL(ELFReloc):
"""
Relocate R_ARM_THM_CALL symbols via instruction modification.
- Class: Static
- Type: ARM (R_ARM_THM_CALL)
- Code: 10
- Operation: ((S + A) | T) - P
- S is the address of the symbol
- A is the addend
- P is the target location (place being relocated)
- T is 1 if the symbol is of type STT_FUNC and addresses a Thumb instruction (This bit is entirely irrelevant
because the 1-bit of the address gets shifted off in the encoding)
- Encoding: See http://hermes.wings.cs.wisc.edu/files/Thumb-2SupplementReferenceManual.pdf
- Page 71 (3-31) has the chart
- It appears that it mistakenly references the I1 and I2 bits as J1 and J2 in the chart (see the notes at the
bottom of the page -- the ranges don't make sense)
- However, the J1/J2 bits are XORed with !S bit in this case (see vex implementation:
https://github.com/angr/vex/blob/6d1252c7ce8fe8376318b8f8bb8034058454c841/priv/guest_arm_toIR.c#L19219 )
- Implementation appears correct with the bits placed into offset[23:22]
"""
[docs]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._insn_bytes = None
[docs]
def resolve_symbol(self, solist, **kwargs):
kwargs["thumb"] = True
super().resolve_symbol(solist, **kwargs)
@property
def value(self):
P = self.rebased_addr # Location of this instruction
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
T = _isThumbFunc(self.symbol, S)
A = 0
# Deconstruct the instruction:
# Because this 4-byte instruction is treated as two 2-byte instructions,
# the bytes are in the order `b3 b4 b1 b2`, where b4 is the most significant.
if self._insn_bytes is None:
self._insn_bytes = self.owner.memory.load(self.relative_addr, 4)
hi = (self._insn_bytes[1] << 8) | self._insn_bytes[0]
lo = (self._insn_bytes[3] << 8) | self._insn_bytes[2]
inst = (hi << 16) | lo
def gen_mask(n_bits, first_bit):
"""
Builds a mask that captures n_bits, where the first bit captured is first_bit
"""
return ((1 << n_bits) - 1) << first_bit
if self.is_rela:
A = self.addend
else:
# Build A (the initial addend)
A |= (inst & gen_mask(11, 0)) << 1 # A[11:1] = inst[10:0] (inclusive)
A |= ((inst & gen_mask(10, 16)) >> 16) << 12 # A[21:12] = inst[25:16]
sign_bit = bool(inst & gen_mask(1, 26)) & 1 # sign_bit = inst[26]
J1 = (bool(inst & gen_mask(1, 13)) & 1) ^ (not sign_bit) # J1 = inst[13] ^ !sign
J2 = (bool(inst & gen_mask(1, 11)) & 1) ^ (not sign_bit) # J2 = inst[11] ^ !sign
A |= J1 << 23 # A[23] = J1
A |= J2 << 22 # A[22] = J2
A &= 0x7FFFFF
if sign_bit:
A |= 0xFF800000
# Compute X, the new offset, from the symbol addr, S, the addend, A,
# the thumb flag, T, and PC, P.
x = (((S + A) | T) - P) & 0xFFFFFFFF # Also mask to 32 bits
# Ensure jump is in range
if x & 0xFF800000 != 0 and x & 0xFF800000 != 0xFF800000:
raise CLEOperationError(
"Jump target out of range for reloc R_ARM_THM_CALL (+- 2^23). "
"This may be due to SimProcedures being allocated outside the jump range. "
"If you believe this is the case, set 'rebase_granularity'=0x1000 in the "
"load options."
)
# Rebuild the instruction, first clearing out any previously set offset bits
# offset 1 2 offset
# 11110S [21:12] 11J?J [11:1] (if ? is 1, BL; if ? is 0, BLX)
inst &= ~0b00000111111111110010111111111111
# | | | | |
# 32 24 16 8 0
sign_bit = bool(x & gen_mask(1, 24)) & 1
J1 = (bool(x & gen_mask(1, 23)) & 1) ^ (not sign_bit)
J2 = (bool(x & gen_mask(1, 22)) & 1) ^ (not sign_bit)
inst |= sign_bit << 26
inst |= J1 << 13
inst |= J2 << 11
inst |= (x & gen_mask(11, 1)) >> 1
inst |= ((x & gen_mask(10, 12)) >> 12) << 16
# Put it back into <little endian short> <little endian short> format
raw = ((inst & 0x00FF0000) >> 16, (inst & 0xFF000000) >> 24, (inst & 0x00FF), (inst & 0xFF00) >> 8)
# The relocation handler expects a little-endian result, so flip it around.
result = (raw[3] << 24) | (raw[2] << 16) | (raw[1] << 8) | raw[0]
log.debug("%s relocated as R_ARM_THM_CALL with new instruction: %#x", self.symbol.name, result)
return result
[docs]
class R_ARM_COPY(GenericCopyReloc):
pass
[docs]
class R_ARM_GLOB_DAT(GenericJumpslotReloc):
pass
[docs]
class R_ARM_JUMP_SLOT(GenericJumpslotReloc):
pass
[docs]
class R_ARM_RELATIVE(GenericRelativeReloc):
pass
[docs]
class R_ARM_ABS32_NOI(GenericAbsoluteAddendReloc):
pass
[docs]
class R_ARM_REL32_NOI(GenericPCRelativeAddendReloc):
pass
[docs]
class R_ARM_TLS_DTPMOD32(GenericTLSModIdReloc):
pass
[docs]
class R_ARM_TLS_DTPOFF32(GenericTLSDoffsetReloc):
pass
[docs]
class R_ARM_TLS_TPOFF32(GenericTLSOffsetReloc):
pass
[docs]
class R_ARM_JUMP24(R_ARM_CALL):
pass
[docs]
class R_ARM_PC24(R_ARM_CALL):
pass
# EDG says: Implementing these the easy way.
# Inaccuracies may exist. This is ARM, after all.
[docs]
class R_ARM_THM_JUMP24(R_ARM_THM_CALL):
pass
[docs]
class R_ARM_THM_JUMP19(R_ARM_THM_CALL):
pass
[docs]
class R_ARM_THM_JUMP6(R_ARM_THM_CALL):
pass
[docs]
class R_ARM_THM_MOVW_ABS_NC(ELFReloc):
"""
((S + A) | T) & 0xffff
Ref: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
"""
@property
def value(self):
insn_bytes = self.owner.memory.load(self.relative_addr, 4)
hi = (insn_bytes[1] << 8) | insn_bytes[0]
lo = (insn_bytes[3] << 8) | insn_bytes[2]
inst = (hi << 16) | lo
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = (inst & 0b0000_0100_0000_0000_0000_0000_0000_0000) >> 26 << 15
A |= (inst & 0b0000_0000_0000_1111_0000_0000_0000_0000) >> 16 << 11
A |= (inst & 0b0000_0000_0000_0000_0111_0000_0000_0000) >> 12 << 8
A |= inst & 0b0000_0000_0000_0000_0000_0000_1111_1111
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
T = _isThumbFunc(self.symbol, S)
X = (S + A) | T
MaskX = X & 0xFFFF
# inst modification:
part1 = MaskX >> 12 # [19:16]
part2 = (MaskX >> 11) & 0x1 # [26]
part3 = (MaskX >> 8) & 0x7 # [14:12]
part4 = MaskX & 0xFF # [7:0]
inst &= 0b1111_1011_1111_0000_1000_1111_0000_0000
inst |= (part1 << 16) & 0b0000_0000_0000_1111_0000_0000_0000_0000
inst |= (part2 << 26) & 0b0000_0100_0000_0000_0000_0000_0000_0000
inst |= (part3 << 12) & 0b0000_0000_0000_0000_0111_0000_0000_0000
inst |= (part4 << 0) & 0b0000_0000_0000_0000_0000_0000_1111_1111
raw = ((inst & 0x00FF0000) >> 16, (inst & 0xFF000000) >> 24, (inst & 0x00FF), (inst & 0xFF00) >> 8)
inst = (raw[3] << 24) | (raw[2] << 16) | (raw[1] << 8) | raw[0]
log.debug("%s relocated as R_ARM_THM_MOVW_ABS_NC to: 0x%x", self.symbol.name, inst)
return inst
[docs]
class R_ARM_THM_MOVT_ABS(ELFReloc):
"""
(S + A) & 0xffff0000
Ref: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
"""
@property
def value(self):
insn_bytes = self.owner.memory.load(self.relative_addr, 4)
hi = (insn_bytes[1] << 8) | insn_bytes[0]
lo = (insn_bytes[3] << 8) | insn_bytes[2]
inst = (hi << 16) | lo
S = self.resolvedby.rebased_addr # The symbol's "value", where it points to
# initial addend is formed by interpreting the 16-bit literal field
# of the instruction as a signed value
A = (inst & 0b0000_0100_0000_0000_0000_0000_0000_0000) >> 26 << 15
A |= (inst & 0b0000_0000_0000_1111_0000_0000_0000_0000) >> 16 << 11
A |= (inst & 0b0000_0000_0000_0000_0111_0000_0000_0000) >> 12 << 8
A |= inst & 0b0000_0000_0000_0000_0000_0000_1111_1111
if A & 0x8000:
# two's complement
A = -((A ^ 0xFFFF) + 1)
X = S + A
MaskX = X & 0xFFFF0000
# inst modification:
part1 = MaskX >> 28 # [19:16]
part2 = (MaskX >> 27) & 0x1 # [26]
part3 = (MaskX >> 24) & 0x7 # [14:12]
part4 = (MaskX >> 16) & 0xFF # [7:0]
inst &= 0b1111_1011_1111_0000_1000_1111_0000_0000
inst |= (part1 << 16) & 0b0000_0000_0000_1111_0000_0000_0000_0000
inst |= (part2 << 26) & 0b0000_0100_0000_0000_0000_0000_0000_0000
inst |= (part3 << 12) & 0b0000_0000_0000_0000_0111_0000_0000_0000
inst |= (part4 << 0) & 0b0000_0000_0000_0000_0000_0000_1111_1111
raw = ((inst & 0x00FF0000) >> 16, (inst & 0xFF000000) >> 24, (inst & 0x00FF), (inst & 0xFF00) >> 8)
inst = (raw[3] << 24) | (raw[2] << 16) | (raw[1] << 8) | raw[0]
log.debug("%s relocated as R_ARM_THM_MOVT_ABS to: 0x%x", self.symbol.name, inst)
return inst
[docs]
class R_ARM_GOT_PREL(GenericPCRelativeAddendReloc, RelocTruncate32Mixin, RelocGOTMixin):
"""
GOT(S) + A - P
Ref: https://github.com/ARM-software/abi-aa/blob/main/aaelf32/aaelf32.rst
"""
__all__ = [
"arch",
"R_ARM_CALL",
"R_ARM_PREL31",
"R_ARM_REL32",
"R_ARM_ABS32",
"R_ARM_MOVW_ABS_NC",
"R_ARM_MOVT_ABS",
"R_ARM_THM_CALL",
"R_ARM_COPY",
"R_ARM_GLOB_DAT",
"R_ARM_JUMP_SLOT",
"R_ARM_RELATIVE",
"R_ARM_ABS32_NOI",
"R_ARM_REL32_NOI",
"R_ARM_TLS_DTPMOD32",
"R_ARM_TLS_DTPOFF32",
"R_ARM_TLS_TPOFF32",
"R_ARM_JUMP24",
"R_ARM_PC24",
"R_ARM_THM_JUMP24",
"R_ARM_THM_JUMP19",
"R_ARM_THM_JUMP6",
"R_ARM_THM_MOVW_ABS_NC",
"R_ARM_THM_MOVT_ABS",
"R_ARM_GOT_PREL",
]