Source code for cle.backends.elf.elf

# pylint:disable=bad-builtin
import copy
import logging
import os
import pathlib
import xml.etree.ElementTree
from collections import OrderedDict, defaultdict
from typing import Set, Tuple

import archinfo
import elftools
from elftools.common.exceptions import DWARFError, ELFError, ELFParseError
from elftools.dwarf import callframe
from elftools.dwarf.descriptions import describe_attr_value, describe_form_class
from elftools.dwarf.die import DIE
from elftools.dwarf.dwarf_expr import DWARFExprParser
from elftools.dwarf.dwarfinfo import DWARFInfo
from elftools.elf import dynamic, elffile, enums, sections
from sortedcontainers import SortedDict

from cle.address_translator import AT
from cle.backends.backend import ExceptionHandling, FunctionHint, FunctionHintSource, register_backend
from cle.errors import CLECompatibilityError, CLEError, CLEInvalidBinaryError
from cle.patched_stream import PatchedStream
from cle.utils import ALIGN_DOWN, ALIGN_UP, get_mmaped_data, stream_or_path

from .compilation_unit import CompilationUnit
from .hashtable import ELFHashTable, GNUHashTable
from .lsda import LSDAExceptionTable
from .metaelf import MetaELF, maybedecode
from .regions import ELFSection, ELFSegment
from .relocation import get_relocation
from .relocation.generic import MipsGlobalReloc, MipsLocalReloc
from .subprogram import LexicalBlock, Subprogram
from .symbol import ELFSymbol, Symbol, SymbolType
from .variable import Variable
from .variable_type import VariableType

try:
    import pypcode
except ImportError:
    pypcode = None


log = logging.getLogger(name=__name__)

__all__ = ("ELFSymbol", "ELF")


# map 'e_machine' ELF header values (represented as `short int`s) to human-readable format (string)
# There are mappings missing currently in `elftools`, so we provide them ourselves
additional_e_machine_mappings: dict[int, str] = {
    247: "Linux BPF - in-kernel virtual machine",
    252: "C-SKY",
    0x5441: "Fujitsu FR-V",
    0x9026: "DEC Alpha",
}



[docs]
class ELF(MetaELF):
    """
    The main loader class for statically loading ELF executables. Uses the pyreadelf library where useful.

    Useful backend options:

    - ``debug_symbols``: Provides the path to a separate file which contains the binary's debug symbols
    - ``discard_section_headers``: Do not parse section headers. Use this if they are corrupted or malicious.
    - ``discard_program_headers``: Do not parse program headers. Use this if the binary is for a platform whose ELF
            loader only looks at section headers, but whose toolchain generates program headers anyway.
    """

    is_default = True  # Tell CLE to automatically consider using the ELF backend


[docs]
    def __init__(
        self,
        *args,
        addend=None,
        debug_symbols=None,
        discard_section_headers=False,
        discard_program_headers=False,
        **kwargs,
    ):
        super().__init__(*args, **kwargs)
        patch_undo = []
        try:
            self._reader = elffile.ELFFile(self._binary_stream)
            list(self._reader.iter_sections())
        except Exception as e:  # pylint: disable=broad-except
            self._binary_stream.seek(4)
            ty = self._binary_stream.read(1)
            if ty not in (b"\1", b"\2"):
                raise CLECompatibilityError from e

            if ty == b"\1":
                patch_data = [(0x20, b"\0" * 4), (0x2E, b"\0" * 6)]
            else:
                patch_data = [(0x28, b"\0" * 8), (0x3A, b"\0" * 6)]

            for offset, patch in patch_data:
                self._binary_stream.seek(offset)
                patch_undo.append((offset, self._binary_stream.read(len(patch))))

            self._binary_stream = PatchedStream(self._binary_stream, patch_data)
            log.error("PyReadELF couldn't load this file. Trying again without section headers...")

            try:
                self._reader = elffile.ELFFile(self._binary_stream)
            except Exception as e1:  # pylint: disable=broad-except
                raise CLECompatibilityError from e1

        # Get an appropriate archinfo.Arch for this binary, unless the user specified one
        if self._arch is None:
            self.set_arch(self.extract_arch(self._reader))
        else:
            try:
                other_arch = self.extract_arch(self._reader)
                if other_arch != self.arch:
                    log.warning("User specified %s but autodetected %s. Proceed with caution.", self.arch, other_arch)
            except archinfo.ArchNotFound:
                pass

        self._addend = addend

        # initializers and finalizers
        self._inits_extracted = False
        self._preinit_arr = []
        self._init_func = None
        self._init_arr = []
        self._fini_func = None
        self._fini_arr = []

        # symbols
        self._nullsymbol = Symbol(self, "", 0, 0, SymbolType.TYPE_NONE)
        self._nullsymbol.is_static = True
        self._symbol_cache = {}
        self._symbols_by_name = {}
        self._desperate_for_symbols = False
        self.imports = {}
        self.resolved_imports = []
        self.hashtable = None
        self._versions = {0: "*local*", 1: "*global*"}
        self._vertable = None

        # relocations
        self.relocs = []
        self.jmprel = OrderedDict()
        self.rela_type = None
        self.__parsed_reloc_tables = set()

        # DWARF data
        self.has_dwarf_info = bool(self._reader.has_dwarf_info())
        self.build_id = None
        self.addr_to_line: "SortedDict[int, Set[Tuple[int, int]]]" = SortedDict()
        self.variables: list[Variable] | None = None
        self.compilation_units: list[CompilationUnit] | None = None

        # misc
        self._entry = self._reader.header.e_entry
        self.is_relocatable = self._reader.header.e_type == "ET_REL"
        self.pic = self.pic or self._reader.header.e_type in ("ET_REL", "ET_DYN")
        self.tls_block_offset = None  # this is an ELF-only attribute
        self._dynamic = {}
        self.deps = []

        # The linked image base should be evaluated before registering any segment or section due to
        # the fact that elffile, used by those methods, is working only with un-based virtual addresses, but Clemories
        # themselves are organized as a tree where each node backer internally uses relative addressing
        seg_addrs = (
            ALIGN_DOWN(x["p_vaddr"], self.loader.page_size)
            for x in self._reader.iter_segments()
            if x.header.p_type == "PT_LOAD" and x.header.p_memsz > 0
        )
        self.mapped_base = self.linked_base = 0
        try:
            self.mapped_base = self.linked_base = min(seg_addrs)
        except ValueError:
            log.info("no PT_LOAD segments identified")

        if not discard_program_headers:
            self.__register_segments()
        if not discard_section_headers:
            self.__register_sections()

        if not self.symbols:
            self._desperate_for_symbols = True
            self.symbols.update(self._symbol_cache.values())

        if self.has_dwarf_info and self.loader._load_debug_info:
            # load DWARF information
            try:
                dwarf = self._reader.get_dwarf_info()
            except ELFError:
                log.warning(
                    "An exception occurred in pyelftools when loading the DWARF information for %s. "
                    "Marking DWARF as not available for this binary.",
                    self.binary_basename,
                    exc_info=True,
                )
                dwarf = None
                self.has_dwarf_info = False

            if dwarf:
                # Load DIEs
                self._load_dies(dwarf)
                # Load function hints and exception handling artifacts
                if dwarf.has_EH_CFI():
                    self._load_function_hints_from_fde(dwarf, FunctionHintSource.EH_FRAME)
                    self._load_exception_handling(dwarf)
                    self._load_line_info(dwarf)

        if debug_symbols:
            self.__process_debug_file(debug_symbols)
        elif self.loader._load_debug_info:
            if self.build_id:
                debug_filename = f"/usr/lib/debug/.build-id/{self.build_id[:2]}/{self.build_id[2:]}.debug"
                if os.path.isfile(debug_filename):
                    self.__process_debug_file(debug_filename)
            if self.binary:
                debug_filename = os.path.join("/usr/lib/debug", os.path.realpath(self.binary))
                if os.path.isfile(debug_filename):
                    self.__process_debug_file(debug_filename)

        # call the methods defined by MetaELF
        self._ppc64_abiv1_entry_fix()
        self._load_plt()

        # hack: set guess_simprocs = True for object files
        if self.is_relocatable and self.imports and not self._dynamic:
            self.guess_simprocs = True

        for offset, patch in patch_undo:
            self.memory.store(AT.from_lva(self.min_addr + offset, self).to_rva(), patch)


    #
    # Properties and Public Methods
    #


[docs]
    def close(self):
        super().close()
        del self._reader



[docs]
    @classmethod
    def check_compatibility(cls, spec, obj):
        with stream_or_path(spec) as stream:
            try:
                return cls.extract_arch(elffile.ELFFile(stream)) == obj.arch
            except archinfo.ArchNotFound:
                return False



[docs]
    @classmethod
    def check_magic_compatibility(cls, stream):
        stream.seek(0)
        identstring = stream.read(0x10)
        stream.seek(0)
        return identstring.startswith(b"\x7fELF")



[docs]
    @staticmethod
    def is_compatible(stream):
        stream.seek(0)
        identstring = stream.read(0x1000)
        stream.seek(0)
        if identstring.startswith(b"\x7fELF"):
            if elftools.elf.elffile.ELFFile(stream).header["e_type"] == "ET_CORE":
                return False
            return True
        return False


    @staticmethod
    def _extract_arm_attrs(reader):
        # EDG says: Did you know ARM puts a whole special section in their elves to hold all their ABI junk?
        # As ARM architectures start to diverge a bunch, we actually need this data to pick out
        # what to do.  Particularly for us, it's whether we're doing Cortex-M or not, as our other ARM support
        # is generic

        # TODO: There's a whole pile of useful stuff in here. We should use it some day.
        attrs_sec = reader.get_section_by_name(".ARM.attributes")
        if not attrs_sec:
            return None  # No attrs here!
        attrs_sub_sec = None
        for subsec in attrs_sec.subsections:
            if isinstance(subsec, elftools.elf.sections.ARMAttributesSubsection):
                attrs_sub_sec = subsec
                break
        if not attrs_sub_sec:
            return None  # None here either
        attrs_sub_sub_sec = None
        for subsubsec in attrs_sub_sec.subsubsections:
            if isinstance(subsubsec, elftools.elf.sections.ARMAttributesSubsubsection):
                attrs_sub_sub_sec = subsubsec
                break
        if not attrs_sub_sub_sec:
            return None  # None here either
        # Ok, now we can finally look at the goods
        atts = {}
        for attobj in attrs_sub_sub_sec.attributes:
            atts[attobj.tag] = attobj.value
        return atts


[docs]
    @staticmethod
    def extract_arch(reader):
        e_machine_header_val = reader["e_machine"]
        arch_str = None
        if isinstance(e_machine_header_val, str):
            arch_str = e_machine_header_val
        elif isinstance(e_machine_header_val, int):
            if e_machine_header_val in additional_e_machine_mappings:
                arch_str = additional_e_machine_mappings[e_machine_header_val]
            else:
                raise CLECompatibilityError(
                    "The `e_machine` header value " "of the ELF file is not known: %d" % e_machine_header_val
                )
        else:
            assert False  # assumption that `reader['e_machine'] returns `str` or `int` is violated

        if "ARM" in arch_str:
            # Check the ARM attributes, if they exist
            arm_attrs = ELF._extract_arm_attrs(reader)
            if arm_attrs and "TAG_CPU_NAME" in arm_attrs:
                if arm_attrs["TAG_CPU_NAME"].endswith("-M") or "Cortex-M" in arm_attrs["TAG_CPU_NAME"]:
                    return archinfo.ArchARMCortexM("Iend_LE")
            if reader.header.e_flags & 0x200:
                return archinfo.ArchARMEL("Iend_LE" if reader.little_endian else "Iend_BE")
            elif reader.header.e_flags & 0x400:
                return archinfo.ArchARMHF("Iend_LE" if reader.little_endian else "Iend_BE")

        try:
            return archinfo.arch_from_id(arch_str, "le" if reader.little_endian else "be", reader.elfclass)
        except archinfo.ArchNotFound:
            arch = ELF._extract_pcode_arch(reader)
            if arch:
                return arch
            raise


    @staticmethod
    def _extract_pcode_arch(reader):
        if pypcode:
            languages = ELF._get_compatible_pcode_languages(reader)
            if languages:
                return archinfo.ArchPcode(languages[0])
        return None

    @property
    def initializers(self):
        if not self._inits_extracted:
            self._extract_init_fini()
        out = []

        if self.is_main_bin:
            # Preinitializers are ignored in shared objects.
            out.extend(self._preinit_arr)
        else:
            # The init func and the init array in the dynamic section are only run by the dynamic loader in shared
            # objects. In the main binary they are run by libc_csu_init (or libc_start_main in newer glibc)
            if self._init_func is not None:
                out.append(self._init_func)
            out.extend(self._init_arr)

        for i, x in enumerate(out):
            out[i] = AT.from_lva(x, self).to_mva()

        return out

    @property
    def finalizers(self):
        if not self._inits_extracted:
            self._extract_init_fini()
        out = []
        if self._fini_func is not None:
            out.append(self._fini_func)
        out.extend(self._fini_arr)
        for i, x in enumerate(out):
            out[i] = AT.from_lva(x, self).to_mva()
        return out

    @property
    def symbols_by_name(self):
        return self._symbols_by_name.copy()


[docs]
    def get_symbol(self, symid, symbol_table=None):  # pylint: disable=arguments-differ,arguments-renamed
        """
        Gets a Symbol object for the specified symbol.

        :param symid: Either an index into .dynsym or the name of a symbol.
        """
        version = None
        if isinstance(symid, int):
            if symid == 0:
                # special case the null symbol, this is important for static binaries
                return self._nullsymbol
            if symbol_table is None:
                raise TypeError("Must specify the symbol table to look up symbols by index")
            try:
                re_sym = symbol_table.get_symbol(symid)
            except Exception:  # pylint: disable=broad-except
                log.exception("Error parsing symbol %#08x", symid)
                return None
            cache_key = self._symbol_to_tuple(re_sym)
            cached = self._symbol_cache.get(cache_key, None)
            if cached is not None:
                return cached
            if self.hashtable is not None and symbol_table is self.hashtable.symtab and self._vertable is not None:
                version = self._vertable.get_symbol(symid).entry.ndx
        elif isinstance(symid, str):
            if not symid:
                log.warning("Trying to resolve a symbol by its empty name")
                return None
            cached = self._symbols_by_name.get(symid, None)
            if cached:
                return cached
            if self.hashtable is None:
                return None
            idx, re_sym = self.hashtable.get(symid)
            if re_sym is None:
                return None
            cache_key = self._symbol_to_tuple(re_sym)
            if self._vertable is not None:
                version = self._vertable.get_symbol(idx).entry.ndx
        elif isinstance(symid, sections.Symbol):
            cache_key = self._symbol_to_tuple(symid)
            cached = self._symbol_cache.get(cache_key, None)
            if cached is not None:
                return cached
            re_sym = symid
        else:
            raise CLEError(f"Bad symbol identifier: {symid!r}")

        symbol = ELFSymbol(self, re_sym)
        if version is not None and self._versions is not None:
            version = enums.ENUM_VERSYM.get(version, version) & 0x7FFF
            symbol.version = self._versions[version]
        self._symbol_cache[cache_key] = symbol
        self._cache_symbol_name(symbol)
        return symbol



[docs]
    def rebase(self, new_base):
        delta = new_base - self.linked_base
        super().rebase(new_base)

        self.addr_to_line = SortedDict((addr + delta, value) for addr, value in self.addr_to_line.items())


    #
    # Private Methods
    #

    @staticmethod
    def _symbol_to_tuple(re_sym):
        """
        Returns a tuple of properties of the given PyELF symbol.
        This is unique enough as a key for both symbol lookup and retrieval.
        """
        entry = re_sym.entry
        return (entry.st_name, entry.st_value, entry.st_size, entry.st_info.bind, entry.st_info.type, entry.st_shndx)

    def _cache_symbol_name(self, symbol):
        name = symbol.name
        if name:
            if self._desperate_for_symbols:
                idx = self.symbols.bisect_key_left(symbol.relative_addr)
                if idx >= len(self.symbols) or self.symbols[idx].name != name:
                    self.symbols.add(symbol)

            if name in self._symbols_by_name:
                old_symbol = self._symbols_by_name[name]
                if not old_symbol.is_weak and symbol.is_weak:
                    return
            self._symbols_by_name[name] = symbol

    def _extract_init_fini(self):
        # Extract the initializers and finalizers
        # the arrays are actually mvas because they are in memory and thus relocated. turn them into lvas.
        if "DT_PREINIT_ARRAY" in self._dynamic and "DT_PREINIT_ARRAYSZ" in self._dynamic:
            arr_start = AT.from_lva(self._dynamic["DT_PREINIT_ARRAY"], self).to_rva()
            arr_end = arr_start + self._dynamic["DT_PREINIT_ARRAYSZ"]
            arr_entsize = self.arch.bytes
            self._preinit_arr = list(map(self.memory.unpack_word, range(arr_start, arr_end, arr_entsize)))
            for i, x in enumerate(self._preinit_arr):
                self._preinit_arr[i] = AT.from_mva(x, self).to_lva()
        if "DT_INIT" in self._dynamic:
            self._init_func = self._dynamic["DT_INIT"]
        if "DT_INIT_ARRAY" in self._dynamic and "DT_INIT_ARRAYSZ" in self._dynamic:
            arr_start = AT.from_lva(self._dynamic["DT_INIT_ARRAY"], self).to_rva()
            arr_end = arr_start + self._dynamic["DT_INIT_ARRAYSZ"]
            arr_entsize = self.arch.bytes
            self._init_arr = list(map(self.memory.unpack_word, range(arr_start, arr_end, arr_entsize)))
        for i, x in enumerate(self._init_arr):
            self._init_arr[i] = AT.from_mva(x, self).to_lva()
        if "DT_FINI" in self._dynamic:
            self._fini_func = self._dynamic["DT_FINI"]
        if "DT_FINI_ARRAY" in self._dynamic and "DT_FINI_ARRAYSZ" in self._dynamic:
            arr_start = AT.from_lva(self._dynamic["DT_FINI_ARRAY"], self).to_rva()
            arr_end = arr_start + self._dynamic["DT_FINI_ARRAYSZ"]
            arr_entsize = self.arch.bytes
            self._fini_arr = list(map(self.memory.unpack_word, range(arr_start, arr_end, arr_entsize)))
            for i, x in enumerate(self._fini_arr):
                self._fini_arr[i] = AT.from_mva(x, self).to_lva()
        self._inits_extracted = True

    def _load_segment(self, seg):
        loaded_segment = ELFSegment(seg)
        self.segments.append(loaded_segment)

        # see https://code.woboq.org/userspace/glibc/elf/dl-load.c.html#1066
        ph = seg.header

        if ph.p_align & (self.loader.page_size - 1) != 0:
            log.error(
                "ELF file %s is loading a segment which is not page-aligned... do you need to change the page size?",
                self.binary,
            )

        if (ph.p_vaddr - ph.p_offset) & (ph.p_align - 1) != 0:
            log.warning(
                "ELF file %s is loading a segment with an inappropriate alignment. It might not work in all contexts.",
                self.binary,
            )
        if ph.p_filesz > ph.p_memsz:
            raise CLEInvalidBinaryError(
                "ELF file %s is loading a segment with an inappropriate allocation" % self.binary
            )

        mapstart = ALIGN_DOWN(ph.p_vaddr, self.loader.page_size)
        mapend = ALIGN_UP(ph.p_vaddr + ph.p_filesz, self.loader.page_size)

        dataend = ph.p_vaddr + ph.p_filesz
        allocend = ph.p_vaddr + ph.p_memsz

        mapoff = ALIGN_DOWN(ph.p_offset, self.loader.page_size)

        # patch modified addresses into ELFSegment
        loaded_segment.vaddr = mapstart
        loaded_segment.memsize = mapend - mapstart
        loaded_segment.filesize = mapend - mapstart
        loaded_segment.offset = mapoff

        # see https://code.woboq.org/userspace/glibc/elf/dl-map-segments.h.html#88
        data = get_mmaped_data(seg.stream, mapoff, mapend - mapstart, self.loader.page_size)
        if allocend > dataend:
            zero = dataend
            zeropage = (zero + self.loader.page_size - 1) & ~(self.loader.page_size - 1)

            if zeropage > zero:
                data = data[: zero - mapstart].ljust(zeropage - mapstart, b"\0")

            zeroend = ALIGN_UP(allocend, self.loader.page_size)  # mmap maps to the next page boundary
            if zeroend > zeropage:
                data = data.ljust(zeroend - mapstart, b"\0")
            loaded_segment.memsize = zeroend - mapstart
        elif not data:
            log.warning("Segment %s is empty at %#08x!", seg.header.p_type, mapstart)
            return

        self.memory.add_backer(AT.from_lva(mapstart, self).to_rva(), data, overwrite=True)

    def _make_reloc(self, readelf_reloc, symbol, dest_section: ELFSection | None = None):
        addend = readelf_reloc.entry.r_addend if readelf_reloc.is_RELA() else None
        RelocClass = get_relocation(self.arch.name, readelf_reloc.entry.r_info_type)
        if RelocClass is None:
            return None

        address = AT.from_lva(readelf_reloc.entry.r_offset, self).to_rva()
        if dest_section is not None:
            # note to the intrepid explorer: this code was changed as per cle#467
            # address += dest_section.remap_offset
            if self.is_relocatable:
                address += AT.from_mva(dest_section.vaddr, self).to_rva()

        try:
            return RelocClass(self, symbol, address, addend)
        except KeyError:
            log.error("Malformed relocation: access to unmapped %#x", readelf_reloc.entry.r_offset)
            return None

    def _load_function_hints_from_fde(self, dwarf, source):
        """
        Load frame description entries out of the .eh_frame section. These entries include function addresses and can be
        used to improve CFG recovery.

        :param dwarf:   The DWARF info object from pyelftools.
        :return:        None
        """

        try:
            for entry in dwarf.EH_CFI_entries():
                if isinstance(entry, callframe.FDE):
                    self.function_hints.append(
                        FunctionHint(
                            entry.header["initial_location"],
                            entry.header["address_range"],
                            source,
                        )
                    )
        except (DWARFError, ValueError):
            log.warning("An exception occurred in pyelftools when loading FDE information.", exc_info=True)

    def _load_exception_handling(self, dwarf):
        """
        Load exception handling information out of the .eh_frame and .gcc_except_table sections. We may support more
        types of exception handling information in the future.

        :param dwarf:   The DWARF info object from pyelftools.
        :return:        None
        """

        try:
            lsda = LSDAExceptionTable(self._binary_stream, self.arch.bits, self.arch.memory_endness == "Iend_LE")
            for entry in dwarf.EH_CFI_entries():
                if (
                    isinstance(entry, callframe.FDE)
                    and hasattr(entry, "lsda_pointer")
                    and entry.lsda_pointer is not None
                ):
                    # function address
                    func_addr = entry.header.get("initial_location", None)
                    if func_addr is None:
                        log.warning(
                            "Unexpected FDE structure: "
                            "initial_location is not specified while LSDA pointer is available."
                        )
                        continue

                    # Load and parse LSDA exception table
                    file_offset = self.addr_to_offset(entry.lsda_pointer)
                    lsda_exception_table = lsda.parse_lsda(entry.lsda_pointer, file_offset)
                    for exc in lsda_exception_table:
                        handling = ExceptionHandling(
                            func_addr + exc.cs_start,
                            exc.cs_len,
                            handler_addr=func_addr + exc.cs_lp if exc.cs_lp != 0 else None,
                            func_addr=func_addr,
                        )
                        self.exception_handlings.append(handling)

        except (DWARFError, ValueError):
            log.warning("An exception occurred in pyelftools when loading FDE information.", exc_info=True)

    def _load_line_info(self, dwarf):
        """
        Generates addr_to_line as a mapping: addr -> (filename, lineno).
        Lineno is one-indexed.
        """
        for cu in dwarf.iter_CUs():
            comp_dir = "."
            try:
                die = cu.get_top_DIE()
            except KeyError:
                # pyelftools is not very resilient
                continue
            if "DW_AT_comp_dir" in die.attributes:
                comp_dir = die.attributes["DW_AT_comp_dir"].value.decode()
            try:
                lineprog = dwarf.line_program_for_CU(cu)
            except ELFParseError:
                continue
            if lineprog is None:
                continue
            file_cache = {}
            for line in lineprog.get_entries():
                if line.state is None:
                    continue
                if line.state.file in file_cache:
                    filename = file_cache[line.state.file]
                else:
                    file_entry = lineprog.header["file_entry"][line.state.file - 1]
                    if file_entry["dir_index"] == 0:
                        filename = pathlib.PurePosixPath(comp_dir) / file_entry.name.decode()
                    else:
                        filename = (
                            pathlib.PurePosixPath(comp_dir)
                            / lineprog.header["include_directory"][file_entry["dir_index"] - 1].decode()
                            / file_entry.name.decode()
                        )
                    file_cache[line.state.file] = filename

                relocated_addr = AT.from_lva(line.state.address, self).to_mva()
                if relocated_addr not in self.addr_to_line:
                    self.addr_to_line[relocated_addr] = set()
                self.addr_to_line[relocated_addr].add((str(filename), line.state.line))

    @staticmethod
    def _load_low_high_pc_form_die(die: DIE):
        """
        Load low and high pc from a DIE.

        :param die:     The DIE object from pyelftools.
        :return:        low_pc, high_pc
        """
        if "DW_AT_low_pc" not in die.attributes:
            return None, None
        lowpc = die.attributes["DW_AT_low_pc"].value

        if "DW_AT_high_pc" not in die.attributes:
            return lowpc, None

        # DWARF v4 in section 2.17 describes how to interpret the
        # DW_AT_high_pc attribute based on the class of its form.
        # For class 'address' it's taken as an absolute address
        # (similarly to DW_AT_low_pc); for class 'constant', it's
        # an offset from DW_AT_low_pc.
        highpc_attr = die.attributes["DW_AT_high_pc"]
        highpc_attr_class = describe_form_class(highpc_attr.form)
        if highpc_attr_class == "address":
            highpc = highpc_attr.value
        elif highpc_attr_class == "constant":
            highpc = lowpc + highpc_attr.value
        else:
            log.warning("Error: invalid DW_AT_high_pc class:%s", highpc_attr_class)
            return lowpc, None
        return lowpc, highpc

    def _load_dies(self, dwarf: DWARFInfo):
        """
        Load DIEs and CUs from DWARF.

        :param dwarf:   The DWARF info object from pyelftools.
        :return:        None
        """
        compilation_units: list[CompilationUnit] = []
        type_list: dict[int, VariableType] = {}

        for cu in dwarf.iter_CUs():
            expr_parser = DWARFExprParser(cu.structs)

            # scan the whole die tree for DW_TAG_base_type
            try:
                for die in cu.iter_DIEs():
                    if VariableType.supported_die(die):
                        var_type = VariableType.read_from_die(die, self)
                        if var_type is not None:
                            type_list[die.offset] = var_type
            except KeyError:
                # pyelftools is not very resilient - we need to catch KeyErrors here
                continue

            top_die = cu.get_top_DIE()

            if top_die.tag != "DW_TAG_compile_unit":
                log.warning("ignore a top die with unexpected tag")
                continue

            die_name = top_die.attributes.get("DW_AT_name", None)
            die_comp_dir = top_die.attributes.get("DW_AT_comp_dir", None)
            die_low_pc, die_high_pc = self._load_low_high_pc_form_die(top_die)
            die_lang = top_die.attributes.get("DW_AT_language", None)

            if (
                die_name is None
                or die_comp_dir is None
                or die_low_pc is None
                or die_high_pc is None
                or die_lang is None
            ):
                continue

            die_name = die_name.value.decode("utf-8")
            die_comp_dir = die_comp_dir.value.decode("utf-8")
            die_lang = describe_attr_value(die_lang, top_die, top_die.offset)

            cu_ = CompilationUnit(die_name, die_comp_dir, die_low_pc, die_high_pc, die_lang, self)
            compilation_units.append(cu_)

            for die_child in cu.iter_DIE_children(top_die):
                if die_child.tag == "DW_TAG_variable":
                    # load global variable
                    var = Variable.from_die(die_child, expr_parser, self)
                    var.decl_file = cu_.file_path
                    cu_.global_variables.append(var)
                elif die_child.tag == "DW_TAG_subprogram":
                    # load subprogram
                    sub_prog = self._load_die_lex_block(die_child, expr_parser, type_list, cu, cu_.file_path, None)
                    if sub_prog is not None:
                        cu_.functions[sub_prog.low_pc] = sub_prog

        self.type_list = type_list
        self.compilation_units = compilation_units

    def _load_die_lex_block(self, die: DIE, expr_parser, type_list, cu, file_path, subprogram) -> LexicalBlock:
        if "DW_AT_name" in die.attributes:
            name = die.attributes["DW_AT_name"].value.decode("utf-8")
        else:
            name = None

        low_pc, high_pc = self._load_low_high_pc_form_die(die)
        if low_pc is None or high_pc is None:
            return None

        if subprogram is None:
            subprogram = block = Subprogram(name, low_pc, high_pc)
        else:
            block = LexicalBlock(low_pc, high_pc)

        for sub_die in cu.iter_DIE_children(die):
            if sub_die.tag in ["DW_TAG_variable", "DW_TAG_formal_parameter"]:
                # load local variable
                var = Variable.from_die(sub_die, expr_parser, self, block)
                var.decl_file = file_path
                subprogram.local_variables.append(var)
            elif sub_die.tag == "DW_TAG_lexical_block":
                sub_block = self._load_die_lex_block(sub_die, expr_parser, type_list, cu, file_path, subprogram)
                if sub_block is not None:
                    block.child_blocks.append(sub_block)

        return block

    #
    # Private Methods... really. Calling these out of context
    # will probably break things. Caveat emptor.
    #

    def __register_segments(self):
        self.linking = "static"
        if self.is_relocatable and self._reader.num_segments() > 0:
            # WTF?
            raise CLEError("Relocatable objects with segments are not supported.")

        type_to_seg_mapping = defaultdict(list)
        for seg_readelf in self._reader.iter_segments():
            type_to_seg_mapping[seg_readelf.header.p_type].append(seg_readelf)

        # PT_LOAD segments must be processed first so that the memory_backers for the other segments exist
        for seg in type_to_seg_mapping["PT_LOAD"]:
            self._load_segment(seg)

        # the order of processing for the other three handled segment_types should not matter, but let's have
        # it consistent
        for seg in type_to_seg_mapping["PT_DYNAMIC"]:
            self.__register_dyn(seg)
            self.linking = "dynamic"

        for seg in type_to_seg_mapping["PT_TLS"]:
            self.__register_tls(seg)

        for seg in type_to_seg_mapping["PT_GNU_STACK"]:
            self.execstack = bool(seg.header.p_flags & 1)

        for seg in type_to_seg_mapping["PT_GNU_RELRO"]:
            self.__register_relro(seg)

    def __register_dyn(self, seg_readelf):
        """
        Parse the dynamic section for dynamically linked objects.
        """
        # PATHOLOGICAL CASE
        # some elf files have a dyn with filesz = 0 but actually do contain content. this is valid. apparently.
        # this is a hack. there is certainly a better way to do this
        # ref (for fish and audrey's eyes only) https://shellphish.slack.com/archives/D08QXM76Y/p1649197435856279
        if seg_readelf.header.p_filesz == 0 and seg_readelf.header.p_memsz != 0:
            seg_readelf.header.p_filesz = seg_readelf.header.p_memsz
            seg_readelf.header.p_offset = AT.from_lva(seg_readelf.header.p_vaddr, self).to_rva()
            dynamic.Dynamic.__init__(
                seg_readelf,
                self.memory,  # YIKES
                seg_readelf.elffile,
                seg_readelf._stringtable,
                seg_readelf.header.p_offset,
                False,
            )

        runpath, rpath = "", ""
        for tag in seg_readelf.iter_tags():
            # Create a dictionary, self._dynamic, mapping DT_* strings to their values
            tagstr = self.arch.translate_dynamic_tag(tag.entry.d_tag)
            self._dynamic[tagstr] = tag.entry.d_val
            # For tags that may appear more than once, handle them here
            if tagstr == "DT_NEEDED":
                self.deps.append(maybedecode(tag.needed))
            elif tagstr == "DT_SONAME":
                self.provides = maybedecode(tag.soname)
            elif tagstr == "DT_RUNPATH":
                runpath = maybedecode(tag.runpath)
            elif tagstr == "DT_RPATH":
                rpath = maybedecode(tag.rpath)

        self.extra_load_path = self.__parse_rpath(runpath, rpath)

        strtab = seg_readelf._get_stringtable()
        if strtab is None:
            log.warning("Unexpected return value from pyelftools: stringtable object is None.")
            return
        self.__neuter_streams(strtab)

        # To extract symbols from binaries without section headers, we need to hack into pyelftools.
        # TODO: pyelftools is less bad than it used to be. how much of this can go away?
        # None of the following things make sense without a string table or a symbol table
        if "DT_STRTAB" in self._dynamic and "DT_SYMTAB" in self._dynamic and "DT_SYMENT" in self._dynamic:
            num_symbols = seg_readelf.num_symbols()  # this is not actually reliable

            # Construct our own symbol table to hack around pyreadelf assuming section headers are around
            entsize = self._dynamic["DT_SYMENT"]
            fakesymtabheader = {
                "sh_offset": AT.from_lva(self._dynamic["DT_SYMTAB"], self).to_rva(),
                "sh_entsize": entsize,
                "sh_size": entsize * num_symbols,
                "sh_flags": 0,
                "sh_addralign": 0,
            }
            dynsym = elffile.SymbolTableSection(fakesymtabheader, "symtab_cle", self._reader, strtab)
            dynsym.stream = self.memory
            dynsym.elffile = None

            # set up the hash table, preferring the gnu hash section to the old hash section
            # the hash table lets you get any symbol given its name
            if "DT_GNU_HASH" in self._dynamic:
                self.hashtable = GNUHashTable(
                    dynsym, self.memory, AT.from_lva(self._dynamic["DT_GNU_HASH"], self).to_rva(), self.arch
                )
            elif "DT_HASH" in self._dynamic:
                self.hashtable = ELFHashTable(
                    dynsym, self.memory, AT.from_lva(self._dynamic["DT_HASH"], self).to_rva(), self.arch
                )
            else:
                log.warning("No hash table available in %s", self.binary)

            # SYMBOL VERSIONING
            # REJOICE
            if "DT_VERNEED" in self._dynamic and "DT_VERNEEDNUM" in self._dynamic:
                verneed_count = self._dynamic["DT_VERNEEDNUM"]
                verneed_entsize = self._reader.structs.Elf_Verneed.sizeof()
                fake_verneed_header = {
                    "sh_offset": AT.from_lva(self._dynamic["DT_VERNEED"], self).to_rva(),
                    "sh_entsize": verneed_entsize,
                    "sh_size": verneed_entsize * verneed_count,
                    "sh_flags": 0,
                    "sh_addralign": 0,
                    "sh_info": verneed_count,
                }
                readelf_verneed = elffile.GNUVerNeedSection(fake_verneed_header, "verneed_cle", self._reader, strtab)
                for _, aux in readelf_verneed.iter_versions():
                    for vaux in aux:
                        self._versions[vaux.entry.vna_other] = vaux.name

            if "DT_VERDEF" in self._dynamic and "DT_VERDEFNUM" in self._dynamic:
                verdef_count = self._dynamic["DT_VERDEFNUM"]
                verdef_entsize = self._reader.structs.Elf_Verdef.sizeof()
                fake_verdef_header = {
                    "sh_offset": AT.from_lva(self._dynamic["DT_VERDEF"], self).to_rva(),
                    "sh_entsize": verdef_entsize,
                    "sh_size": verdef_entsize * verdef_count,
                    "sh_flags": 0,
                    "sh_addralign": 0,
                    "sh_info": verdef_count,
                }
                readelf_verdef = elffile.GNUVerDefSection(fake_verdef_header, "verdef_cle", self._reader, strtab)
                for ver, aux in readelf_verdef.iter_versions():
                    for vaux in aux:
                        self._versions[ver.entry.vd_ndx] = vaux.name
                        break  # not a typo - only the first aux entry has anything important (?)

            if "DT_VERSYM" in self._dynamic:
                versym_entsize = self._reader.structs.Elf_Versym.sizeof()
                fake_versym_header = {
                    "sh_offset": AT.from_lva(self._dynamic["DT_VERSYM"], self).to_rva(),
                    "sh_entsize": versym_entsize,
                    "sh_size": versym_entsize * num_symbols,
                    "sh_flags": 0,
                    "sh_addralign": 0,
                }
                readelf_versym = elffile.GNUVerSymSection(fake_versym_header, "versym_cle", self._reader, dynsym)
                readelf_versym.stream = self.memory
                readelf_versym.elffile = None
                self._vertable = readelf_versym

            # mips' relocations are absolutely screwed up, handle some of them here.
            self.__relocate_mips(dynsym)

            # perform a lot of checks to figure out what kind of relocation tables are around
            self.rela_type = None
            if "DT_PLTREL" in self._dynamic:
                if self._dynamic["DT_PLTREL"] == 7:
                    self.rela_type = "RELA"
                    relentsz = self._reader.structs.Elf_Rela.sizeof()
                elif self._dynamic["DT_PLTREL"] == 17:
                    self.rela_type = "REL"
                    relentsz = self._reader.structs.Elf_Rel.sizeof()
                else:
                    raise CLEInvalidBinaryError("DT_PLTREL is not REL or RELA?")
            else:
                if "DT_RELA" in self._dynamic:
                    self.rela_type = "RELA"
                    relentsz = self._reader.structs.Elf_Rela.sizeof()
                elif "DT_REL" in self._dynamic:
                    self.rela_type = "REL"
                    relentsz = self._reader.structs.Elf_Rel.sizeof()
                else:
                    return

            # try to parse relocations out of a table of type DT_REL{,A}
            rela_tag = "DT_" + self.rela_type
            relsz_tag = rela_tag + "SZ"
            if rela_tag in self._dynamic:
                reloffset = AT.from_lva(self._dynamic[rela_tag], self).to_rva()
                if relsz_tag not in self._dynamic:
                    raise CLEInvalidBinaryError(f"Dynamic section contains {rela_tag} but not {relsz_tag}")
                relsz = self._dynamic[relsz_tag]
                fakerelheader = {
                    "sh_offset": reloffset,
                    "sh_type": "SHT_" + self.rela_type,
                    "sh_entsize": relentsz,
                    "sh_size": relsz,
                    "sh_flags": 0,
                    "sh_addralign": 0,
                }
                readelf_relocsec = elffile.RelocationSection(fakerelheader, "reloc_cle", self._reader)
                # support multiple versions of pyelftools
                readelf_relocsec.stream = self.memory
                readelf_relocsec._stream = self.memory
                readelf_relocsec.elffile = None
                self.__register_relocs(readelf_relocsec, dynsym)

            # try to parse relocations out of a table of type DT_JMPREL
            if "DT_JMPREL" in self._dynamic:
                jmpreloffset = AT.from_lva(self._dynamic["DT_JMPREL"], self).to_rva()
                if "DT_PLTRELSZ" not in self._dynamic:
                    raise CLEInvalidBinaryError("Dynamic section contains DT_JMPREL but not DT_PLTRELSZ")
                jmprelsz = self._dynamic["DT_PLTRELSZ"]
                fakejmprelheader = {
                    "sh_offset": jmpreloffset,
                    "sh_type": "SHT_" + self.rela_type,
                    "sh_entsize": relentsz,
                    "sh_size": jmprelsz,
                    "sh_flags": 0,
                    "sh_addralign": 0,
                }
                readelf_jmprelsec = elffile.RelocationSection(fakejmprelheader, "jmprel_cle", self._reader)
                # support multiple versions of pyelftools
                readelf_jmprelsec.stream = self.memory
                readelf_jmprelsec._stream = self.memory
                readelf_jmprelsec.elffile = None
                self.__register_relocs(readelf_jmprelsec, dynsym, force_jmprel=True)

            self.__register_section_symbols(dynsym)

    def __parse_rpath(self, runpath, rpath):
        """
        Split RPATH/RUNPATH tags into a list of paths while expanding rpath tokens.
        """
        # DT_RUNPATH takes predence over DT_RPATH
        if len(runpath) > 0:
            pass
        else:
            runpath = rpath

        parts = []
        for part in runpath.split(":"):
            # does not handle $LIB/$PLATFORM substitutions yet
            if self.binary is not None:
                part = part.replace("$ORIGIN", os.path.dirname(self.binary))
            elif "$ORIGIN" in part:
                log.warning(
                    "DT_RUNPATH/DT_RPATH of the binary contains $ORIGIN tokens but no self.binary, "
                    "some libraries might be not found"
                )
            parts.append(part)
        return parts

    def __register_relocs(self, section, dynsym=None, force_jmprel=False):
        got_min = got_max = 0

        if not force_jmprel:
            got_sec = self._reader.get_section_by_name(".got")
            if got_sec is not None:
                got_min = got_sec.header.sh_addr
                got_max = got_min + got_sec.header.sh_size

        if section.header["sh_offset"] in self.__parsed_reloc_tables:
            return
        self.__parsed_reloc_tables.add(section.header["sh_offset"])

        # Get the target section's remapping offset for relocations
        dest_sec = None
        dest_sec_idx = section.header.get("sh_info", None)
        if dest_sec_idx is not None:
            try:
                dest_sec = self.sections[dest_sec_idx]
            except IndexError:
                log.warning("the relocation section %s refers to unknown section index: %d", section.name, dest_sec_idx)
            else:
                if dest_sec.is_active and not dest_sec.occupies_memory:
                    # The target section is not loaded into memory, so just continue
                    return

        symtab = self._reader.get_section(section.header["sh_link"]) if "sh_link" in section.header else dynsym
        if isinstance(symtab, elftools.elf.sections.NullSection):
            # Oh my god Atmel please stop
            symtab = self._reader.get_section_by_name(".symtab")
        relocs = []
        for readelf_reloc in section.iter_relocations():
            # MIPS64 is just plain old fucked up
            # https://www.sourceware.org/ml/libc-alpha/2003-03/msg00153.html
            if self.arch.name == "MIPS64":
                if not hasattr(readelf_reloc.entry, "r_info_type2") and hasattr(readelf_reloc.entry, "r_info_type3"):
                    raise CLECompatibilityError(
                        "This code relies on `pyelftools` features that are not available on versions 0.26 and below."
                    )

                type_1 = readelf_reloc.entry.r_info_type
                type_2 = readelf_reloc.entry.r_info_type2
                type_3 = readelf_reloc.entry.r_info_type3
                symbol = self.get_symbol(readelf_reloc.entry.r_info_sym, symtab)

                if type_1 != 0:
                    readelf_reloc.entry.r_info_type = type_1
                    reloc = self._make_reloc(readelf_reloc, symbol, dest_sec)
                    if reloc is not None:
                        relocs.append(reloc)
                        self.relocs.append(reloc)
                if type_2 != 0:
                    readelf_reloc.entry.r_info_type = type_2
                    reloc = self._make_reloc(readelf_reloc, symbol, dest_sec)
                    if reloc is not None:
                        relocs.append(reloc)
                        self.relocs.append(reloc)
                if type_3 != 0:
                    readelf_reloc.entry.r_info_type = type_3
                    reloc = self._make_reloc(readelf_reloc, symbol, dest_sec)
                    if reloc is not None:
                        relocs.append(reloc)
                        self.relocs.append(reloc)
            else:
                symbol = self.get_symbol(readelf_reloc.entry.r_info_sym, symtab)
                if symbol is None:
                    continue
                reloc = self._make_reloc(readelf_reloc, symbol, dest_sec)
                if reloc is not None:
                    relocs.append(reloc)
                    self.relocs.append(reloc)

        for reloc in relocs:
            if reloc.symbol.name != "" and (force_jmprel or got_min <= reloc.linked_addr < got_max):
                self.jmprel[reloc.symbol.name] = reloc

    def __register_tls(self, seg_readelf):
        self.tls_block_size = seg_readelf.header.p_memsz
        self.tls_data_size = seg_readelf.header.p_filesz
        self.tls_data_start = AT.from_lva(seg_readelf.header.p_vaddr, self).to_rva()
        if self.tls_block_size != 0 or self.tls_data_size != 0:
            self.tls_used = True

    def __register_relro(self, segment_relro):
        segment_relro = ELFSegment(segment_relro, relro=True)
        vaddr = ALIGN_DOWN(segment_relro.vaddr, self.loader.page_size)
        vaddr_end = ALIGN_UP(vaddr + segment_relro.memsize, self.loader.page_size)
        vaddr_endfile = ALIGN_UP(vaddr + segment_relro.filesize, self.loader.page_size)

        segment_relro.offset = ALIGN_DOWN(segment_relro.offset, self.loader.page_size)
        segment_relro.vaddr = vaddr
        segment_relro.memsize = vaddr_end - vaddr
        segment_relro.filesize = vaddr_endfile - vaddr

        def ___segments_overlap(seg1, seg2):
            # Re-arrange so seg1 starts first
            seg1, seg2 = (seg1, seg2) if seg1.min_addr < seg2.min_addr else (seg2, seg1)
            # seg1 and seg2 overlap if seg2 starts before seg1 ends
            return seg2.min_addr <= seg1.max_addr

        overlapping_segments = [seg for seg in self.segments if ___segments_overlap(segment_relro, seg)]

        if len(overlapping_segments) == 0:
            log.error("RELRO segment does not overlap with any loaded segment.")
            return

        if len(overlapping_segments) > 1:
            # I don't think this ever happens.  If it does, weshould
            # probably also split the RELRO segment so that each one
            # has the right permissions in case the two overlapping
            # segments have different permissions.
            log.warning("RELRO segment overlaps multiple segments.")

        for overlapping_segment in overlapping_segments:
            # We will split the overlapping segment into two pieces:
            # one for the segment below the RELRO segment, and one for
            # above.
            segment_below = copy.copy(overlapping_segment)
            segment_below.memsize = segment_relro.min_addr - overlapping_segment.min_addr
            segment_below.filesize = max(segment_below.filesize, segment_below.memsize)

            segment_above = copy.copy(overlapping_segment)
            segment_above.vaddr = segment_relro.max_addr + 1
            segment_above.memsize = overlapping_segment.max_addr - segment_above.vaddr + 1
            segment_above.filesize = max(0, segment_above.filesize - segment_relro.memsize - segment_below.memsize)
            segment_above.offset = segment_above.offset + segment_below.memsize + segment_relro.memsize

            split_segments = [segment_below, segment_relro, segment_above]
            split_segments = [seg for seg in split_segments if seg.memsize > 0]

            # Remove the original segment
            self.segments.remove(overlapping_segment)

            # Add the new ones
            for seg in split_segments:
                self.segments.append(seg)

        # Add the actual relro segment, and mark it as always
        # read-only.  We use the flags of the overlapping original
        # segment.
        segment_relro.flags = overlapping_segments[0].flags & ~2

    def __register_sections(self):
        new_addr = 0
        sec_list = []

        for sec_readelf in self._reader.iter_sections():
            remap_offset = 0
            if self.is_relocatable and sec_readelf.header["sh_flags"] & 2:  # alloc flag
                # Relocatable objects' section addresses are meaningless (they are meant to be relocated anyway)
                # We thus have to map them manually to valid virtual addresses to emulate a linker's behaviour.
                sh_addr = sec_readelf.header["sh_addr"]
                align = sec_readelf.header["sh_addralign"]
                if align > 0:
                    new_addr = (new_addr + (align - 1)) // align * align

                remap_offset = new_addr - sh_addr
                new_addr += sec_readelf.header["sh_size"]  # address for next section

            section = ELFSection(sec_readelf, remap_offset=remap_offset)
            sec_list.append((sec_readelf, section))

            # Register sections first, process later - this is required by relocatable objects
            self.sections.append(section)
            self.sections_map[section.name] = section

        for sec_readelf, section in sec_list:
            if isinstance(sec_readelf, elffile.SymbolTableSection):
                self.__register_section_symbols(sec_readelf)
            if isinstance(sec_readelf, elffile.RelocationSection) and not (
                "DT_REL" in self._dynamic or "DT_RELA" in self._dynamic or "DT_JMPREL" in self._dynamic
            ):
                self.__register_relocs(sec_readelf, dynsym=None)

            if section.occupies_memory:  # alloc flag - stick in memory maybe!
                if (
                    AT.from_lva(section.vaddr, self).to_rva() not in self.memory
                ):  # only allocate if not already allocated (i.e. by program header)
                    if section.type == "SHT_NOBITS":
                        self.memory.add_backer(
                            AT.from_lva(section.vaddr, self).to_rva(),
                            b"\0" * sec_readelf.header["sh_size"],
                            overwrite=True,
                        )
                    elif section.type == "SHT_NOTE":
                        pass  # observed this case in angr/angr#3829
                    else:  # elif section.type == 'SHT_PROGBITS':
                        self.memory.add_backer(
                            AT.from_lva(section.vaddr, self).to_rva(), sec_readelf.data(), overwrite=True
                        )

            if sec_readelf.header.sh_type == "SHT_NOTE":
                self.__register_notes(sec_readelf)

            if section.name == ".comment":
                self.__analyze_comments(sec_readelf.data())

    def __register_notes(self, sec_readelf):
        for note in sec_readelf.iter_notes():
            if note.n_type == "NT_GNU_BUILD_ID" and note.n_name == "GNU":
                if self.build_id is not None and self.build_id != note.n_desc:
                    log.error("Mismatched build IDs present")
                self.build_id = note.n_desc

    def __analyze_comments(self, data):
        try:
            data = data.decode().split("\0")
        except UnicodeDecodeError:
            return

        for line in data:
            versions = [word for word in line.replace("(", " ").replace(")", " ").split() if "." in word]
            if not versions:
                continue
            versions.sort(key=len)
            version = versions[-1]
            lline = line.lower()
            if "clang" in lline:
                compiler = "clang"
            elif "gcc" in lline:
                compiler = "gcc"
            else:
                continue

            self.compiler = compiler, version

    def __register_section_symbols(self, sec_re):
        for sym_re in sec_re.iter_symbols():
            self.symbols.add(self.get_symbol(sym_re))

    def __relocate_mips(self, symtab):
        if "DT_MIPS_BASE_ADDRESS" not in self._dynamic:
            return False
        # The MIPS GOT is an array of addresses, simple as that.
        got_local_num = self._dynamic["DT_MIPS_LOCAL_GOTNO"]  # number of local GOT entries
        # a.k.a the index of the first global GOT entry
        symtab_got_idx = self._dynamic["DT_MIPS_GOTSYM"]  # index of first symbol w/ GOT entry
        symbol_count = self._dynamic["DT_MIPS_SYMTABNO"]
        gotaddr = AT.from_lva(self._dynamic["DT_PLTGOT"], self).to_rva()
        wordsize = self.arch.bytes
        for i in range(2, got_local_num):
            reloc = MipsLocalReloc(self, None, gotaddr + i * wordsize)
            self.relocs.append(reloc)

        for i in range(symbol_count - symtab_got_idx):
            symbol = self.get_symbol(i + symtab_got_idx, symtab)
            reloc = MipsGlobalReloc(self, symbol, gotaddr + (i + got_local_num) * wordsize)
            self.relocs.append(reloc)
            self.jmprel[symbol.name] = reloc
        return True

    def __neuter_streams(self, obj):
        if isinstance(obj, elftools.elf.dynamic._DynamicStringTable):
            obj._stream = self.memory
            obj._table_offset = self._offset_to_rva(obj._table_offset)
        elif isinstance(obj, elftools.elf.sections.Section):
            if obj.header.sh_type == "SHT_NOBITS":
                obj.stream = None
                obj.elffile = None
                obj.header.sh_offset = None
            else:
                obj.stream = self.memory
                obj.elffile = None
                obj.header.sh_offset = self._offset_to_rva(obj.header.sh_offset)
        else:
            raise TypeError("Can't convert %r" % type(obj))

    def _offset_to_rva(self, offset):
        return AT.from_mva(self.offset_to_addr(offset), self).to_rva()

    def __process_debug_file(self, filename):
        with open(filename, "rb") as fp:
            try:
                elf = elffile.ELFFile(fp)
            except ELFError:
                log.warning("pyelftools failed to load debug file %s", filename, exc_info=True)
                return

            for sec_readelf in elf.iter_sections():
                if isinstance(sec_readelf, elffile.SymbolTableSection):
                    self.__register_section_symbols(sec_readelf)
                elif sec_readelf.header.sh_type == "SHT_NOTE":
                    self.__register_notes(sec_readelf)

            has_dwarf_info = bool(elf.has_dwarf_info())
            if has_dwarf_info:
                try:
                    dwarf = elf.get_dwarf_info()
                except ELFError:
                    log.warning(
                        "An exception occurred in pyelftools when loading the DWARF information on %s.",
                        filename,
                        exc_info=True,
                    )
                    dwarf = None

                # debug symbols don't have eh_frame ever from what I can tell
                if dwarf:
                    self._load_line_info(dwarf)

    @staticmethod
    def _get_pcode_elf_opinions():
        """
        Load each .opinion file and gather all ELF constraints
        """
        if pypcode is None:
            raise CLEError("pypcode is not installed")

        def flatten_constraints(node):
            constraints = []
            for child in node.findall("constraint"):
                constraints.extend(flatten_constraints(child))
            attribs = node.attrib.copy()
            if not constraints:  # Leaf
                return [attribs]
            for c in constraints:  # Apply attribs to all children
                c.update(attribs)
            return constraints

        log.info("Loading opinions...")
        SPECFILES_DIR = pypcode.SPECFILES_DIR
        elf_opinions = []
        for archname in os.listdir(SPECFILES_DIR):
            langdir = os.path.join(SPECFILES_DIR, archname, "data", "languages")
            if not (os.path.exists(langdir) and os.path.isdir(langdir)):
                continue
            for filename in os.listdir(langdir):
                if not filename.endswith(".opinion"):
                    continue
                path = os.path.join(langdir, filename)
                opinion = xml.etree.ElementTree.parse(path).getroot()
                for c in opinion.findall("constraint"):
                    if c.attrib["loader"] == "Executable and Linking Format (ELF)":
                        elf_opinions.extend(flatten_constraints(c))
        return elf_opinions

    @staticmethod
    def _get_compatible_pcode_languages(reader):
        """
        Find compatible pypcode languages for this ELF file.
        """
        if pypcode is None:
            raise CLEError("pypcode is not installed")

        e_machine = reader.header.e_machine
        if isinstance(e_machine, str):
            e_machine = enums.ENUM_E_MACHINE[e_machine]

        endian = {"ELFDATANONE": None, "ELFDATA2LSB": "little", "ELFDATA2MSB": "big"}[reader.header.e_ident.EI_DATA]

        opinions = []
        for o in ELF._get_pcode_elf_opinions():
            if "endian" in o and o["endian"] != endian:
                continue
            if e_machine not in {int(p) for p in o["primary"].split(",")}:
                continue
            if "secondary" in o:
                try:
                    value = int(o["secondary"])
                    if value != reader.header.e_type:
                        continue
                except ValueError:
                    # FIXME: Mask parsing (spaces and DC 0b .... ..1. ..0.)
                    pass
            opinions.append(o)

        log.info("Available opinions: %s", opinions)

        languages = []
        for arch in pypcode.Arch.enumerate():
            for lang in arch.languages:
                for o in opinions:
                    if (reader.elfclass == 32 and int(lang.size) > 32) or (
                        reader.elfclass == 64 and int(lang.size) <= 32
                    ):
                        continue
                    if all(k not in lang.ldef.attrib or lang.ldef.attrib[k] == v for k, v in o.items()):
                        languages.append(lang)

        log.info("Found candidate languages: %s", [lang.id for lang in languages])
        return languages



register_backend("elf", ELF)