libabigail
Loading...
Searching...
No Matches
abg-symtab-reader.cc
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2// -*- Mode: C++ -*-
3//
4// Copyright (C) 2013-2025 Red Hat, Inc.
5// Copyright (C) 2020-2025 Google, Inc.
6//
7// Author: Matthias Maennich
8
9/// @file
10///
11/// This contains the definition of the symtab reader
12
13#include <algorithm>
14#include <iostream>
15#include <unordered_map>
16#include <unordered_set>
17
18#include "abg-elf-helpers.h"
19#include "abg-fwd.h"
20#include "abg-internal.h"
21#include "abg-tools-utils.h"
22
23// Though this is an internal header, we need to export the symbols to be able
24// to test this code. TODO: find a way to export symbols just for unit tests.
25ABG_BEGIN_EXPORT_DECLARATIONS
26#include "abg-symtab-reader.h"
27ABG_END_EXPORT_DECLARATIONS
28
29namespace abigail
30{
31
32namespace symtab_reader
33{
34
35/// symtab_filter implementations
36
37/// Determine whether a symbol is matching the filter criteria of this filter
38/// object. In terms of a filter functionality, you would _not_ filter out
39/// this symbol if it passes this (i.e. returns true).
40///
41/// @param symbol The Elf symbol under test.
42///
43/// @return whether the symbol matches all relevant / required criteria
44bool
46{
47 if (functions_ && *functions_ != symbol.is_function())
48 return false;
49 if (variables_ && *variables_ != symbol.is_variable())
50 return false;
51 if (public_symbols_ && *public_symbols_ != symbol.is_public())
52 return false;
53 if (undefined_symbols_ && *undefined_symbols_ == symbol.is_defined())
54 return false;
55 if (kernel_symbols_ && *kernel_symbols_ != symbol.is_in_ksymtab())
56 return false;
57
58 return true;
59}
60
61/// symtab implementations
62
63/// Obtain a suitable default filter for iterating this symtab object.
64///
65/// The symtab_filter obtained is populated with some sensible default
66/// settings, such as public_symbols(true) and kernel_symbols(true) if the
67/// binary has been identified as Linux Kernel binary.
68///
69/// @return a symtab_filter with sensible populated defaults
72{
73 symtab_filter filter;
74 filter.set_public_symbols();
75 if (is_kernel_binary_)
76 filter.set_kernel_symbols();
77 return filter;
78}
79
80/// Get a vector of symbols that are associated with a certain name
81///
82/// @param name the name the symbols need to match
83///
84/// @return a vector of symbols, empty if no matching symbols have been found
85const elf_symbols&
86symtab::lookup_symbol(const std::string& name) const
87{
88 static const elf_symbols empty_result;
89 const auto it = name_symbol_map_.find(name);
90 if (it != name_symbol_map_.end())
91 return it->second;
92 return empty_result;
93}
94
95/// Lookup a symbol by its address
96///
97/// @param symbol_addr the starting address of the symbol
98///
99/// @return a symbol if found, else an empty sptr
100const elf_symbol_sptr&
101symtab::lookup_symbol(GElf_Addr symbol_addr) const
102{
103 static const elf_symbol_sptr empty_result;
104 const auto addr_it = addr_symbol_map_.find(symbol_addr);
105 if (addr_it != addr_symbol_map_.end())
106 return addr_it->second;
107 else
108 {
109 // check for a potential entry address mapping instead,
110 // relevant for ppc ELFv1 binaries
111 const auto entry_it = entry_addr_symbol_map_.find(symbol_addr);
112 if (entry_it != entry_addr_symbol_map_.end())
113 return entry_it->second;
114 }
115 return empty_result;
116}
117
118/// Lookup an undefined function symbol with a given name.
119///
120/// @param sym_name the name of the function symbol to lookup.
121///
122/// @return the undefined function symbol found or nil if none was
123/// found.
124const elf_symbol_sptr
126{
127 auto it = undefined_fn_symbols_.find(sym_name);
128 if (it == undefined_fn_symbols_.end())
129 return elf_symbol_sptr();
130 return it->second;
131}
132
133/// Lookup an undefined variable symbol with a given name.
134///
135/// @param sym_name the name of the variable symbol to lookup.
136///
137/// @return the undefined variable symbol found or nil if none was
138/// found.
139const elf_symbol_sptr
141{
143 f.set_functions(false);
144 f.set_public_symbols(false);
145 f.set_undefined_symbols(true);
146 f.set_variables(true);
147
148 elf_symbol_sptr result;
149 for (auto sym : filtered_symtab(*this, f))
150 if (sym_name == sym->get_name())
151 {
152 result = sym;
153 break;
154 }
155 return result;
156}
157
158/// Test if a given function symbol has been exported.
159///
160/// Note that this doesn't test if the symbol is defined or not, but
161/// assumes the symbol is defined.
162///
163/// @param name the name of the symbol we are looking for.
164///
165/// @return the elf symbol if found, or nil otherwise.
168{
169 const elf_symbols& syms = lookup_symbol(name);
170 for (auto s : syms)
171 if (s->is_function() && s->is_public())
172 return s;
173
174 return elf_symbol_sptr();
175}
176
177/// Test if a given function symbol has been exported.
178///
179/// Note that this doesn't test if the symbol is defined or not, but
180/// assumes the symbol is defined.
181///
182/// @param symbol_address the address of the symbol we are looking
183/// for. Note that this address must be a relative offset from the
184/// beginning of the .text section, just like the kind of addresses
185/// that are present in the .symtab section.
186///
187/// @return the elf symbol if found, or nil otherwise.
189symtab::function_symbol_is_exported(const GElf_Addr symbol_address)
190{
191 elf_symbol_sptr symbol = lookup_symbol(symbol_address);
192 if (!symbol)
193 return symbol;
194
195 if (!symbol->is_function() || !symbol->is_public())
196 return elf_symbol_sptr();
197
198 return symbol;
199}
200
201/// Test if a given variable symbol has been exported.
202///
203/// Note that this assumes the symbol is exported but doesn't test for
204/// it.
205///
206/// @param name the name of the symbol we are looking
207/// for.
208///
209/// @return the elf symbol if found, or nil otherwise.
212{
213 const elf_symbols& syms = lookup_symbol(name);
214 for (auto s : syms)
215 if (s->is_variable() && s->is_public())
216 return s;
217
218 return elf_symbol_sptr();
219}
220
221/// Test if a given variable symbol has been exported.
222///
223/// Note that this assumes the symbol is exported but doesn't test for
224/// it.
225///
226/// @param symbol_address the address of the symbol we are looking
227/// for. Note that this address must be a relative offset from the
228/// beginning of the .text section, just like the kind of addresses
229/// that are present in the .symtab section.
230///
231/// @return the elf symbol if found, or nil otherwise.
233symtab::variable_symbol_is_exported(const GElf_Addr symbol_address)
234{
235 elf_symbol_sptr symbol = lookup_symbol(symbol_address);
236 if (!symbol)
237 return symbol;
238
239 if (!symbol->is_variable() || !symbol->is_public())
240 return elf_symbol_sptr();
241
242 return symbol;
243}
244
245/// Test if a name is a the name of an undefined function symbol.
246///
247/// @param sym_name the symbol name to consider.
248///
249/// @return the undefined symbol if found, nil otherwise.
252{
253 collect_undefined_fns_and_vars_linkage_names();
254 if (undefined_function_linkage_names_.count(sym_name))
255 {
257 ABG_ASSERT(sym);
258 ABG_ASSERT(sym->is_function());
259 ABG_ASSERT(!sym->is_defined());
260 return sym;
261 }
262 return elf_symbol_sptr();
263}
264
265/// Test if a name is a the name of an undefined variable symbol.
266///
267/// @param sym_name the symbol name to consider.
268///
269// @return the undefined symbol if found, nil otherwise.
272{
273 collect_undefined_fns_and_vars_linkage_names();
274 if (undefined_variable_linkage_names_.count(sym_name))
275 {
277 ABG_ASSERT(sym);
278 ABG_ASSERT(sym->is_variable());
279 ABG_ASSERT(!sym->is_defined());
280 return sym;
281 }
282 return elf_symbol_sptr();
283}
284
285/// A symbol sorting functor.
286static struct
287{
288 bool
289 operator()(const elf_symbol_sptr& left, const elf_symbol_sptr& right)
290 {return left->get_id_string() < right->get_id_string();}
291} symbol_sort;
292
293/// Construct a symtab object and instantiate it from an ELF
294/// handle. Also pass in the ir::environment we are living in. If
295/// specified, the symbol_predicate will be respected when creating
296/// the full vector of symbols.
297///
298/// @param elf_handle the elf handle to load the symbol table from
299///
300/// @param env the environment we are operating in
301///
302/// @param is_suppressed a predicate function to determine if a symbol should
303/// be suppressed
304///
305/// @return a smart pointer handle to symtab, set to nullptr if the load was
306/// not completed
307symtab_ptr
308symtab::load(Elf* elf_handle,
309 const ir::environment& env,
310 symbol_predicate is_suppressed)
311{
312 ABG_ASSERT(elf_handle);
313
314 symtab_ptr result(new symtab);
315 if (!result->load_(elf_handle, env, is_suppressed))
316 return {};
317
318 return result;
319}
320
321/// Construct a symtab object from existing name->symbol lookup maps.
322/// They were possibly read from a different representation (XML maybe).
323///
324/// @param function_symbol_map a map from ELF function name to elf_symbol
325///
326/// @param variable_symbol_map a map from ELF variable name to elf_symbol
327///
328/// @return a smart pointer handle to symtab, set to nullptr if the load was
329/// not completed
330symtab_ptr
332 string_elf_symbols_map_sptr variables_symbol_map)
333{
334 symtab_ptr result(new symtab);
335 if (!result->load_(function_symbol_map, variables_symbol_map))
336 return {};
337
338 return result;
339}
340
341/// Default constructor of the @ref symtab type.
342symtab::symtab()
343 : is_kernel_binary_(false), has_ksymtab_entries_(false),
344 cached_undefined_symbol_names_(false)
345{}
346
347/// Load the symtab representation from an Elf binary presented to us by an
348/// Elf* handle.
349///
350/// This method iterates over the entries of .symtab and collects all
351/// interesting symbols (functions and variables).
352///
353/// In case of a Linux Kernel binary, it also collects information about the
354/// symbols exported via EXPORT_SYMBOL in the Kernel that would then end up
355/// having a corresponding __ksymtab entry.
356///
357/// Symbols that are suppressed will be omitted from the symbols_ vector, but
358/// still be discoverable through the name->symbol and addr->symbol lookup
359/// maps.
360///
361/// @param elf_handle the elf handle to load the symbol table from
362///
363/// @param env the environment we are operating in
364///
365/// @param is_suppressed a predicate function to determine if a symbol should
366/// be suppressed
367///
368/// @return true if the load succeeded
369bool
370symtab::load_(Elf* elf_handle,
371 const ir::environment& env,
372 symbol_predicate is_suppressed)
373{
374 GElf_Ehdr ehdr_mem;
375 GElf_Ehdr* header = gelf_getehdr(elf_handle, &ehdr_mem);
376 if (!header)
377 {
378 std::cerr << "Could not get ELF header: Skipping symtab load.\n";
379 return false;
380 }
381
382 Elf_Scn* symtab_section = elf_helpers::find_symbol_table_section(elf_handle);
383 if (!symtab_section)
384 {
385 std::cerr << "No symbol table found: Skipping symtab load.\n";
386 return false;
387 }
388
389 GElf_Shdr symtab_sheader;
390 gelf_getshdr(symtab_section, &symtab_sheader);
391
392 // check for bogus section header
393 if (symtab_sheader.sh_entsize == 0)
394 {
395 std::cerr << "Invalid symtab header found: Skipping symtab load.\n";
396 return false;
397 }
398
399 const size_t number_syms =
400 symtab_sheader.sh_size / symtab_sheader.sh_entsize;
401
402 Elf_Data* symtab = elf_getdata(symtab_section, 0);
403 if (!symtab)
404 {
405 std::cerr << "Could not load elf symtab: Skipping symtab load.\n";
406 return false;
407 }
408
409 // The __kstrtab_strings sections is basically an ELF strtab but does not
410 // support elf_strptr lookups. A single call to elf_getdata gives a handle to
411 // washed section data.
412 //
413 // The value of a __kstrtabns_FOO (or other similar) symbol is an address
414 // within the __kstrtab_strings section. To look up the string value, we need
415 // to translate from vmlinux load address to section offset by subtracting the
416 // base address of the section. This adjustment is not needed for loadable
417 // modules which are relocatable and so identifiable by ELF type ET_REL.
418 Elf_Scn* strings_section = elf_helpers::find_ksymtab_strings_section(elf_handle);
419 size_t strings_offset = 0;
420 const char* strings_data = nullptr;
421 size_t strings_size = 0;
422 if (strings_section)
423 {
424 GElf_Shdr strings_sheader;
425 gelf_getshdr(strings_section, &strings_sheader);
426 strings_offset = header->e_type == ET_REL ? 0 : strings_sheader.sh_addr;
427 Elf_Data* data = elf_getdata(strings_section, nullptr);
428 ABG_ASSERT(data->d_off == 0);
429 strings_data = reinterpret_cast<const char *>(data->d_buf);
430 strings_size = data->d_size;
431 }
432
433 const bool is_kernel = elf_helpers::is_linux_kernel(elf_handle);
434 std::unordered_set<std::string> exported_kernel_symbols;
435 std::unordered_map<std::string, uint32_t> crc_values;
436 std::unordered_map<std::string, std::string> namespaces;
437
438 for (size_t i = 0; i < number_syms; ++i)
439 {
440 GElf_Sym *sym, sym_mem;
441 sym = gelf_getsym(symtab, i, &sym_mem);
442 if (!sym)
443 {
444 std::cerr << "Could not load symbol with index " << i
445 << ": Skipping symtab load.\n";
446 return false;
447 }
448
449 const char* const name_str =
450 elf_strptr(elf_handle, symtab_sheader.sh_link, sym->st_name);
451
452 // no name, no game
453 if (!name_str)
454 continue;
455
456 const std::string name = name_str;
457 if (name.empty())
458 continue;
459
460 // Handle ksymtab entries. Every symbol entry that starts with __ksymtab_
461 // indicates that the symbol in question is exported through ksymtab. We
462 // do not know whether this is ksymtab_gpl or ksymtab, but that is good
463 // enough for now.
464 //
465 // We could follow up with this entry:
466 //
467 // symbol_value -> ksymtab_entry in either ksymtab_gpl or ksymtab
468 // -> addr/name/namespace (in case of PREL32: offset)
469 //
470 // That way we could also detect ksymtab<>ksymtab_gpl changes or changes
471 // of the symbol namespace.
472 //
473 // As of now this lookup is fragile, as occasionally ksymtabs are empty
474 // (seen so far for kernel modules and LTO builds). Hence we stick to the
475 // fairly safe assumption that ksymtab exported entries are having an
476 // appearence as __ksymtab_<symbol> in the symtab.
477 if (is_kernel && name.rfind("__ksymtab_", 0) == 0)
478 {
479 ABG_ASSERT(exported_kernel_symbols.insert(name.substr(10)).second);
480 continue;
481 }
482 if (is_kernel && name.rfind("__crc_", 0) == 0)
483 {
484 uint32_t crc_value;
485 ABG_ASSERT(elf_helpers::get_crc_for_symbol(elf_handle,
486 sym, crc_value));
487 ABG_ASSERT(crc_values.emplace(name.substr(6), crc_value).second);
488 continue;
489 }
490 if (strings_section && is_kernel && name.rfind("__kstrtabns_", 0) == 0)
491 {
492 // This symbol lives in the __ksymtab_strings section but st_value may
493 // be a vmlinux load address so we need to subtract the offset before
494 // looking it up in that section.
495 const size_t value = sym->st_value;
496 const size_t offset = value - strings_offset;
497 // check offset
498 ABG_ASSERT(offset < strings_size);
499 // find the terminating NULL
500 const char* first = strings_data + offset;
501 const char* last = strings_data + strings_size;
502 const char* limit = std::find(first, last, 0);
503 // check NULL found
504 ABG_ASSERT(limit < last);
505 // interpret the empty namespace name as no namespace name
506 if (first < limit)
507 ABG_ASSERT(namespaces.emplace(
508 name.substr(12), std::string(first, limit - first)).second);
509 continue;
510 }
511
512 // filter out uninteresting entries and only keep functions/variables for
513 // now. The rest might be interesting in the future though.
514 const int sym_type = GELF_ST_TYPE(sym->st_info);
515 if (!(sym_type == STT_FUNC
516 || sym_type == STT_GNU_IFUNC
517 // If the symbol is for an OBJECT, the index of the
518 // section it refers to cannot be absolute.
519 // Otherwise that OBJECT is not a variable.
520 || (sym_type == STT_OBJECT && sym->st_shndx != SHN_ABS)
521 // Undefined global variable symbols have symbol type
522 // STT_NOTYPE. No idea why.
523 || (sym_type == STT_NOTYPE && sym->st_shndx == SHN_UNDEF)
524 || sym_type == STT_TLS))
525 continue;
526
527 const bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
528 // this occurs in relocatable files.
529 const bool sym_is_common = sym->st_shndx == SHN_COMMON;
530
531 elf_symbol::version ver;
532 elf_helpers::get_version_for_symbol(elf_handle, i, sym_is_defined, ver);
533
534 const elf_symbol_sptr& symbol_sptr =
536 (env, i, sym->st_size, name,
537 elf_helpers::stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info)),
538 elf_helpers::stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info)),
539 sym_is_defined, sym_is_common, ver,
540 elf_helpers::stv_to_elf_symbol_visibility
541 (GELF_ST_VISIBILITY(sym->st_other)));
542
543 // add to the name->symbol lookup
544 name_symbol_map_[name].push_back(symbol_sptr);
545
546 // add to the addr->symbol lookup
547 if (symbol_sptr->is_common_symbol())
548 {
549 const auto it = name_symbol_map_.find(name);
550 ABG_ASSERT(it != name_symbol_map_.end());
551 const elf_symbols& common_sym_instances = it->second;
552 ABG_ASSERT(!common_sym_instances.empty());
553 if (common_sym_instances.size() > 1)
554 {
555 elf_symbol_sptr main_common_sym = common_sym_instances[0];
556 ABG_ASSERT(main_common_sym->get_name() == name);
557 ABG_ASSERT(main_common_sym->is_common_symbol());
558 ABG_ASSERT(symbol_sptr.get() != main_common_sym.get());
559 main_common_sym->add_common_instance(symbol_sptr);
560 }
561 }
562 else if (symbol_sptr->is_defined())
563 setup_symbol_lookup_tables(elf_handle, sym, symbol_sptr);
564 }
565
566 // Now that symbols aliases have been constructed, let's determine
567 // what symbol has been suppressed or not. Suppression takes into
568 // account
569 for (auto& elem : name_symbol_map_)
570 {
571 auto& symbols = elem.second;
572 for (auto& symbol : symbols)
573 {
574 // We do not take suppressed symbols into our symbol vector
575 // to avoid accidental leakage. But we ensure supressed
576 // symbols are otherwise set up for lookup.
577 if (!(is_suppressed && is_suppressed(symbol)))
578 {
579 // add to the symbol vector
580 symbols_.push_back(symbol);
581 if (!symbol->is_defined())
582 {
583 if (symbol->is_function())
584 undefined_fn_symbols_[symbol->get_name()] = symbol;
585 else if (symbol->is_variable())
586 undefined_var_symbols_[symbol->get_name()] = symbol;
587 }
588 }
589 else
590 symbol->set_is_suppressed(true);
591 }
592 }
593
594 add_alternative_address_lookups(elf_handle);
595
596 is_kernel_binary_ = elf_helpers::is_linux_kernel(elf_handle);
597
598 // Now apply the ksymtab_exported attribute to the symbols we collected.
599 for (const auto& symbol : exported_kernel_symbols)
600 {
601 const auto r = name_symbol_map_.find(symbol);
602 if (r == name_symbol_map_.end())
603 continue;
604
605 for (const auto& elf_symbol : r->second)
606 if (elf_symbol->is_public())
607 elf_symbol->set_is_in_ksymtab(true);
608 has_ksymtab_entries_ = true;
609 }
610
611 // Now add the CRC values
612 for (const auto& crc_entry : crc_values)
613 {
614 const auto r = name_symbol_map_.find(crc_entry.first);
615 if (r == name_symbol_map_.end())
616 continue;
617
618 for (const auto& symbol : r->second)
619 symbol->set_crc(crc_entry.second);
620 }
621
622 // Now add the namespaces
623 for (const auto& namespace_entry : namespaces)
624 {
625 const auto r = name_symbol_map_.find(namespace_entry.first);
626 if (r == name_symbol_map_.end())
627 continue;
628
629 for (const auto& symbol : r->second)
630 symbol->set_namespace(namespace_entry.second);
631 }
632
633 // sort the symbols for deterministic output
634 std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
635
636 return true;
637}
638
639/// Load the symtab representation from a function/variable lookup map pair.
640///
641/// This method assumes the lookup maps are correct and sets up the data
642/// vector as well as the name->symbol lookup map. The addr->symbol lookup
643/// map cannot be set up in this case.
644///
645/// @param function_symbol_map a map from ELF function name to elf_symbol
646///
647/// @param variable_symbol_map a map from ELF variable name to elf_symbol
648///
649/// @return true if the load succeeded
650bool
651symtab::load_(string_elf_symbols_map_sptr function_symbol_map,
652 string_elf_symbols_map_sptr variables_symbol_map)
653
654{
655 if (function_symbol_map)
656 for (const auto& symbol_map_entry : *function_symbol_map)
657 {
658 for (const auto& symbol : symbol_map_entry.second)
659 {
660 if (!symbol->is_suppressed())
661 {
662 symbols_.push_back(symbol);
663 if (!symbol->is_defined())
664 undefined_fn_symbols_[symbol->get_name()] = symbol;
665 }
666 }
667 ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
668 }
669
670 if (variables_symbol_map)
671 for (const auto& symbol_map_entry : *variables_symbol_map)
672 {
673 for (const auto& symbol : symbol_map_entry.second)
674 {
675 if (!symbol->is_suppressed())
676 {
677 symbols_.push_back(symbol);
678 if (!symbol->is_defined())
679 undefined_var_symbols_[symbol->get_name()] = symbol;
680 }
681 }
682 ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
683 }
684
685 // sort the symbols for deterministic output
686 std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
687
688 return true;
689}
690
691/// Notify the symtab about the name of the main symbol at a given address.
692///
693/// From just alone the symtab we can't guess the main symbol of a bunch of
694/// aliased symbols that all point to the same address. During processing of
695/// additional information (such as DWARF), this information becomes apparent
696/// and we can adjust the addr->symbol lookup map as well as the alias
697/// reference of the symbol objects.
698///
699/// @param addr the addr that we are updating the main symbol for
700/// @param name the name of the main symbol
701void
702symtab::update_main_symbol(GElf_Addr addr, const std::string& name)
703{
704 // get one symbol (i.e. the current main symbol)
705 elf_symbol_sptr symbol = lookup_symbol(addr);
706
707 // The caller might not know whether the addr is associated to an ELF symbol
708 // that we care about. E.g. the addr could be associated to an ELF symbol,
709 // but not one in .dynsym when looking at a DSO. Hence, early exit if the
710 // lookup failed.
711 if (!symbol)
712 return;
713
714 // determine the new main symbol by attempting an update
715 elf_symbol_sptr new_main = symbol->update_main_symbol(name);
716
717 // also update the default symbol we return when looked up by address
718 if (new_main)
719 addr_symbol_map_[addr] = new_main;
720}
721
722/// Various adjustments and bookkeeping may be needed to provide a correct
723/// interpretation (one that matches DWARF addresses) of raw symbol values.
724///
725/// This is a sub-routine for symtab::load_ and
726/// symtab::add_alternative_address_lookups and must be called only
727/// once (per symbol) during the execution of the former.
728///
729/// @param elf_handle the ELF handle
730///
731/// @param elf_symbol the ELF symbol
732///
733/// @param symbol_sptr the libabigail symbol
734///
735/// @return a possibly-adjusted symbol value
736GElf_Addr
737symtab::setup_symbol_lookup_tables(Elf* elf_handle,
738 GElf_Sym* elf_symbol,
739 const elf_symbol_sptr& symbol_sptr)
740{
741 const bool is_arm32 = elf_helpers::architecture_is_arm32(elf_handle);
742 const bool is_arm64 = elf_helpers::architecture_is_arm64(elf_handle);
743 const bool is_ppc64 = elf_helpers::architecture_is_ppc64(elf_handle);
744 const bool is_ppc32 = elf_helpers::architecture_is_ppc32(elf_handle);
745
746 GElf_Addr symbol_value =
747 elf_helpers::maybe_adjust_et_rel_sym_addr_to_abs_addr(elf_handle,
748 elf_symbol);
749
750 if (is_arm32 && symbol_sptr->is_function())
751 // Clear bit zero of ARM32 addresses as per "ELF for the Arm
752 // Architecture" section 5.5.3.
753 // https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
754 symbol_value &= ~1;
755
756 if (is_arm64)
757 // Copy bit 55 over bits 56 to 63 which may be tag information.
758 symbol_value = symbol_value & (1ULL<<55)
759 ? symbol_value | (0xffULL<<56)
760 : symbol_value &~ (0xffULL<<56);
761
762 if (symbol_sptr->is_defined())
763 {
764 const auto result =
765 addr_symbol_map_.emplace(symbol_value, symbol_sptr);
766 if (!result.second)
767 // A symbol with the same address already exists. This
768 // means this symbol is an alias of the main symbol with
769 // that address. So let's register this new alias as such.
770 result.first->second->get_main_symbol()->add_alias(symbol_sptr);
771 }
772
773 // Please note that update_function_entry_address_symbol_map depends
774 // on the symbol aliases been setup. This is why, the
775 // elf_symbol::add_alias call is done above BEFORE this point.
776 if ((is_ppc64 || is_ppc32) && symbol_sptr->is_function())
777 update_function_entry_address_symbol_map(elf_handle, elf_symbol,
778 symbol_sptr);
779
780 return symbol_value;
781}
782
783/// Update the function entry symbol map to later allow lookups of this symbol
784/// by entry address as well. This is relevant for ppc64 ELFv1 binaries.
785///
786/// For ppc64 ELFv1 binaries, we need to build a function entry point address
787/// -> function symbol map. This is in addition to the function pointer ->
788/// symbol map. This is because on ppc64 ELFv1, a function pointer is
789/// different from a function entry point address.
790///
791/// On ppc64 ELFv1, the DWARF DIE of a function references the address of the
792/// entry point of the function symbol; whereas the value of the function
793/// symbol is the function pointer. As these addresses are different, if I we
794/// want to get to the symbol of a function from its entry point address (as
795/// referenced by DWARF function DIEs) we must have the two maps I mentionned
796/// right above.
797///
798/// In other words, we need a map that associates a function entry point
799/// address with the symbol of that function, to be able to get the function
800/// symbol that corresponds to a given function DIE, on ppc64.
801///
802/// The value of the function pointer (the value of the symbol) usually refers
803/// to the offset of a table in the .opd section. But sometimes, for a symbol
804/// named "foo", the corresponding symbol named ".foo" (note the dot before
805/// foo) which value is the entry point address of the function; that entry
806/// point address refers to a region in the .text section.
807///
808/// So we are only interested in values of the symbol that are in the .opd
809/// section.
810///
811/// @param elf_handle the ELF handle to operate on
812///
813/// @param native_symbol the native Elf symbol to update the entry for
814///
815/// @param symbol_sptr the internal symbol to associte the entry address with
816void
817symtab::update_function_entry_address_symbol_map(
818 Elf* elf_handle, GElf_Sym* native_symbol, const elf_symbol_sptr& symbol_sptr)
819{
820 const GElf_Addr fn_desc_addr = native_symbol->st_value;
821 const GElf_Addr fn_entry_point_addr =
822 elf_helpers::lookup_ppc64_elf_fn_entry_point_address(elf_handle,
823 fn_desc_addr);
824
825 const std::pair<addr_symbol_map_type::const_iterator, bool>& result =
826 entry_addr_symbol_map_.emplace(fn_entry_point_addr, symbol_sptr);
827
828 const addr_symbol_map_type::const_iterator it = result.first;
829 const bool was_inserted = result.second;
830 if (!was_inserted
831 && elf_helpers::address_is_in_opd_section(elf_handle, fn_desc_addr))
832 {
833 // Either
834 //
835 // 'symbol' must have been registered as an alias for
836 // it->second->get_main_symbol()
837 //
838 // Or
839 //
840 // if the name of 'symbol' is foo, then the name of it2->second is
841 // ".foo". That is, foo is the name of the symbol when it refers to the
842 // function descriptor in the .opd section and ".foo" is an internal name
843 // for the address of the entry point of foo.
844 //
845 // In the latter case, we just want to keep a reference to "foo" as .foo
846 // is an internal name.
847
848 const bool two_symbols_alias =
849 it->second->get_main_symbol()->does_alias(*symbol_sptr);
850 const bool symbol_is_foo_and_prev_symbol_is_dot_foo =
851 (it->second->get_name() == std::string(".") + symbol_sptr->get_name());
852
853 ABG_ASSERT(two_symbols_alias
854 || symbol_is_foo_and_prev_symbol_is_dot_foo);
855
856 if (symbol_is_foo_and_prev_symbol_is_dot_foo)
857 // Let's just keep a reference of the symbol that the user sees in the
858 // source code (the one named foo). The symbol which name is prefixed
859 // with a "dot" is an artificial one.
860 entry_addr_symbol_map_[fn_entry_point_addr] = symbol_sptr;
861 }
862}
863
864/// Fill up the lookup maps with alternative keys
865///
866/// Due to special features like Control-Flow-Integrity (CFI), the symbol
867/// lookup could be done indirectly. E.g. enabling CFI causes clang to
868/// associate the DWARF information with the actual CFI protected function
869/// (suffix .cfi) instead of with the entry symbol in the symtab.
870///
871/// This function adds additional lookup keys to compensate for that.
872///
873/// So far, this only implements CFI support, by adding addr->symbol pairs
874/// where
875/// addr : symbol value of the <foo>.cfi value
876/// symbol : symbol_sptr looked up via "<foo>"
877///
878/// @param elf_handle the ELF handle to operate on
879void
880symtab::add_alternative_address_lookups(Elf* elf_handle)
881{
882 Elf_Scn* symtab_section = elf_helpers::find_symtab_section(elf_handle);
883 if (!symtab_section)
884 return;
885 GElf_Shdr symtab_sheader;
886 gelf_getshdr(symtab_section, &symtab_sheader);
887
888 const size_t number_syms =
889 symtab_sheader.sh_size / symtab_sheader.sh_entsize;
890
891 Elf_Data* symtab = elf_getdata(symtab_section, 0);
892
893 for (size_t i = 0; i < number_syms; ++i)
894 {
895 GElf_Sym *sym, sym_mem;
896 sym = gelf_getsym(symtab, i, &sym_mem);
897 if (!sym)
898 {
899 std::cerr << "Could not load symbol with index " << i
900 << ": Skipping alternative symbol load.\n";
901 continue;
902 }
903
904 const char* const name_str =
905 elf_strptr(elf_handle, symtab_sheader.sh_link, sym->st_name);
906
907 // no name, no game
908 if (!name_str)
909 continue;
910
911 const std::string name = name_str;
912 if (name.empty())
913 continue;
914
915 // Add alternative lookup addresses for CFI symbols
916 static const std::string cfi = ".cfi";
917 if (name.size() > cfi.size()
918 && name.compare(name.size() - cfi.size(), cfi.size(), cfi) == 0)
919 // ... name.ends_with(".cfi")
920 {
921 const auto candidate_name = name.substr(0, name.size() - cfi.size());
922
923 auto symbols = lookup_symbol(candidate_name);
924 // lookup_symbol returns a vector of symbols. For this case we handle
925 // only the case that there has been exactly one match. Otherwise we
926 // can't reasonably handle it and need to bail out.
927 ABG_ASSERT(symbols.size() <= 1);
928 if (symbols.size() == 1)
929 {
930 const auto& symbol_sptr = symbols[0];
931 setup_symbol_lookup_tables(elf_handle, sym, symbol_sptr);
932 }
933 }
934 }
935}
936
937/// Collect the names of the variable and function symbols that are
938/// undefined. Cache those names into sets to speed up their lookup.
939///
940/// Once the names are cached into sets, subsequent invocations of
941/// this function are essentially a no-op.
942void
943symtab::collect_undefined_fns_and_vars_linkage_names()
944{
945 if (!cached_undefined_symbol_names_)
946 {
947 {
948 symtab_filter f = make_filter();
949 f.set_variables(false);
950 f.set_functions(true);
951 f.set_public_symbols(false);
952 f.set_undefined_symbols(true);
953 for (auto sym : filtered_symtab(*this, f))
954 undefined_function_linkage_names_.insert(sym->get_name());
955 }
956
957 {
958 symtab_filter f = make_filter();
959 f.set_variables(true);
960 f.set_functions(false);
961 f.set_public_symbols(false);
962 f.set_undefined_symbols(true);
963 for (auto sym : filtered_symtab(*this, f))
964 undefined_variable_linkage_names_.insert(sym->get_name());
965 }
966 }
967 cached_undefined_symbol_names_ = true;
968}
969} // end namespace symtab_reader
970} // end namespace abigail
This contains a set of ELF utilities used by the dwarf reader.
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects,...
Definition abg-fwd.h:1743
This contains the declarations for the symtab reader.
Abstraction of an elf symbol.
Definition abg-ir.h:961
bool is_variable() const
Test if the current instance of elf_symbol is a variable symbol or not.
Definition abg-ir.cc:2299
bool is_function() const
Test if the current instance of elf_symbol is a function symbol or not.
Definition abg-ir.cc:2290
static elf_symbol_sptr create(const environment &e, size_t i, size_t s, const string &n, type t, binding b, bool d, bool c, const version &ve, visibility vi, bool is_in_ksymtab=false, const abg_compat::optional< uint32_t > &crc={}, const abg_compat::optional< std::string > &ns={}, bool is_suppressed=false)
Factory of instances of elf_symbol.
Definition abg-ir.cc:2063
bool is_public() const
Test if the current instance of elf_symbol is public or not.
Definition abg-ir.cc:2274
bool is_in_ksymtab() const
Getter of the 'is-in-ksymtab' property.
Definition abg-ir.cc:2313
bool is_defined() const
Test if the current instance of elf_symbol is defined or not.
Definition abg-ir.cc:2252
This is an abstraction of the set of resources necessary to manage several aspects of the internal re...
Definition abg-ir.h:148
Helper class to allow range-for loops on symtabs for C++11 and later code. It serves as a proxy for t...
The symtab filter is the object passed to the symtab object in order to iterate over the symbols in t...
void set_public_symbols(bool new_value=true)
Enable or disable public symbol filtering.
bool matches(const elf_symbol &symbol) const
symtab_filter implementations
void set_functions(bool new_value=true)
Enable or disable function filtering.
void set_kernel_symbols(bool new_value=true)
Enable or disable kernel symbol filtering.
void set_variables(bool new_value=true)
Enable or disable variable filtering.
void set_undefined_symbols(bool new_value=true)
Enable or disable undefined symbol filtering.
symtab is the actual data container of the symtab_reader implementation.
const elf_symbol_sptr lookup_undefined_variable_symbol(const std::string &name)
Lookup an undefined variable symbol with a given name.
const elf_symbols & lookup_symbol(const std::string &name) const
Get a vector of symbols that are associated with a certain name.
symtab_filter make_filter() const
symtab implementations
static symtab_ptr load(Elf *elf_handle, const ir::environment &env, symbol_predicate is_suppressed=NULL)
Construct a symtab object and instantiate it from an ELF handle. Also pass in the ir::environment we ...
elf_symbol_sptr function_symbol_is_undefined(const string &)
Test if a name is a the name of an undefined function symbol.
elf_symbol_sptr variable_symbol_is_undefined(const string &)
Test if a name is a the name of an undefined variable symbol.
elf_symbol_sptr function_symbol_is_exported(const string &)
Test if a given function symbol has been exported.
elf_symbol_sptr variable_symbol_is_exported(const string &)
Test if a given variable symbol has been exported.
const elf_symbol_sptr lookup_undefined_function_symbol(const std::string &name)
Lookup an undefined function symbol with a given name.
void update_main_symbol(GElf_Addr addr, const std::string &name)
Notify the symtab about the name of the main symbol at a given address.
shared_ptr< elf_symbol > elf_symbol_sptr
A convenience typedef for a shared pointer to elf_symbol.
Definition abg-ir.h:926
std::vector< elf_symbol_sptr > elf_symbols
Convenience typedef for a vector of elf_symbol.
Definition abg-ir.h:942
string get_name(const type_or_decl_base *tod, bool qualified)
Build and return a copy of the name of an ABI artifact that is either a type or a decl.
Definition abg-ir.cc:8686
shared_ptr< string_elf_symbols_map_type > string_elf_symbols_map_sptr
Convenience typedef for a shared pointer to string_elf_symbols_map_type.
Definition abg-ir.h:951
Toplevel namespace for libabigail.