object/read/macho/
symbol.rs

1use alloc::vec::Vec;
2use core::fmt::Debug;
3use core::{fmt, slice, str};
4
5use crate::endian::{self, Endianness};
6use crate::macho;
7use crate::pod::Pod;
8use crate::read::util::StringTable;
9use crate::read::{
10    self, ObjectMap, ObjectMapEntry, ObjectMapFile, ObjectSymbol, ObjectSymbolTable, ReadError,
11    ReadRef, Result, SectionIndex, SectionKind, SymbolFlags, SymbolIndex, SymbolKind, SymbolMap,
12    SymbolMapEntry, SymbolScope, SymbolSection,
13};
14
15use super::{MachHeader, MachOFile};
16
17/// A table of symbol entries in a Mach-O file.
18///
19/// Also includes the string table used for the symbol names.
20///
21/// Returned by [`macho::SymtabCommand::symbols`].
22#[derive(Debug, Clone, Copy)]
23pub struct SymbolTable<'data, Mach: MachHeader, R = &'data [u8]>
24where
25    R: ReadRef<'data>,
26{
27    symbols: &'data [Mach::Nlist],
28    strings: StringTable<'data, R>,
29}
30
31impl<'data, Mach: MachHeader, R: ReadRef<'data>> Default for SymbolTable<'data, Mach, R> {
32    fn default() -> Self {
33        SymbolTable {
34            symbols: &[],
35            strings: Default::default(),
36        }
37    }
38}
39
40impl<'data, Mach: MachHeader, R: ReadRef<'data>> SymbolTable<'data, Mach, R> {
41    #[inline]
42    pub(super) fn new(symbols: &'data [Mach::Nlist], strings: StringTable<'data, R>) -> Self {
43        SymbolTable { symbols, strings }
44    }
45
46    /// Return the string table used for the symbol names.
47    #[inline]
48    pub fn strings(&self) -> StringTable<'data, R> {
49        self.strings
50    }
51
52    /// Iterate over the symbols.
53    #[inline]
54    pub fn iter(&self) -> slice::Iter<'data, Mach::Nlist> {
55        self.symbols.iter()
56    }
57
58    /// Return true if the symbol table is empty.
59    #[inline]
60    pub fn is_empty(&self) -> bool {
61        self.symbols.is_empty()
62    }
63
64    /// The number of symbols.
65    #[inline]
66    pub fn len(&self) -> usize {
67        self.symbols.len()
68    }
69
70    /// Return the symbol at the given index.
71    pub fn symbol(&self, index: SymbolIndex) -> Result<&'data Mach::Nlist> {
72        self.symbols
73            .get(index.0)
74            .read_error("Invalid Mach-O symbol index")
75    }
76
77    /// Construct a map from addresses to a user-defined map entry.
78    pub fn map<Entry: SymbolMapEntry, F: Fn(&'data Mach::Nlist) -> Option<Entry>>(
79        &self,
80        f: F,
81    ) -> SymbolMap<Entry> {
82        let mut symbols = Vec::new();
83        for nlist in self.symbols {
84            if !nlist.is_definition() {
85                continue;
86            }
87            if let Some(entry) = f(nlist) {
88                symbols.push(entry);
89            }
90        }
91        SymbolMap::new(symbols)
92    }
93
94    /// Construct a map from addresses to symbol names and object file names.
95    pub fn object_map(&self, endian: Mach::Endian) -> ObjectMap<'data> {
96        let mut symbols = Vec::new();
97        let mut objects = Vec::new();
98        let mut object = None;
99        let mut current_function = None;
100        // Each module starts with one or two N_SO symbols (path, or directory + filename)
101        // and one N_OSO symbol. The module is terminated by an empty N_SO symbol.
102        for nlist in self.symbols {
103            let n_type = nlist.n_type();
104            if n_type & macho::N_STAB == 0 {
105                continue;
106            }
107            // TODO: includes global symbols too (N_GSYM). These may need to get their
108            // address from regular symbols though.
109            match n_type {
110                macho::N_SO => {
111                    object = None;
112                }
113                macho::N_OSO => {
114                    object = None;
115                    if let Ok(name) = nlist.name(endian, self.strings) {
116                        if !name.is_empty() {
117                            object = Some(objects.len());
118                            // `N_OSO` symbol names can be either `/path/to/object.o`
119                            // or `/path/to/archive.a(object.o)`.
120                            let (path, member) = name
121                                .split_last()
122                                .and_then(|(last, head)| {
123                                    if *last != b')' {
124                                        return None;
125                                    }
126                                    let index = head.iter().position(|&x| x == b'(')?;
127                                    let (archive, rest) = head.split_at(index);
128                                    Some((archive, Some(&rest[1..])))
129                                })
130                                .unwrap_or((name, None));
131                            objects.push(ObjectMapFile::new(path, member));
132                        }
133                    }
134                }
135                macho::N_FUN => {
136                    if let Ok(name) = nlist.name(endian, self.strings) {
137                        if !name.is_empty() {
138                            current_function = Some((name, nlist.n_value(endian).into()))
139                        } else if let Some((name, address)) = current_function.take() {
140                            if let Some(object) = object {
141                                symbols.push(ObjectMapEntry {
142                                    address,
143                                    size: nlist.n_value(endian).into(),
144                                    name,
145                                    object,
146                                });
147                            }
148                        }
149                    }
150                }
151                macho::N_STSYM => {
152                    // Static symbols have a single entry with the address of the symbol
153                    // but no size
154                    if let Ok(name) = nlist.name(endian, self.strings) {
155                        if let Some(object) = object {
156                            symbols.push(ObjectMapEntry {
157                                address: nlist.n_value(endian).into(),
158                                size: 0,
159                                name,
160                                object,
161                            })
162                        }
163                    }
164                }
165                _ => {}
166            }
167        }
168        ObjectMap {
169            symbols: SymbolMap::new(symbols),
170            objects,
171        }
172    }
173}
174
175/// A symbol table in a [`MachOFile32`](super::MachOFile32).
176pub type MachOSymbolTable32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
177    MachOSymbolTable<'data, 'file, macho::MachHeader32<Endian>, R>;
178/// A symbol table in a [`MachOFile64`](super::MachOFile64).
179pub type MachOSymbolTable64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
180    MachOSymbolTable<'data, 'file, macho::MachHeader64<Endian>, R>;
181
182/// A symbol table in a [`MachOFile`].
183#[derive(Debug, Clone, Copy)]
184pub struct MachOSymbolTable<'data, 'file, Mach, R = &'data [u8]>
185where
186    Mach: MachHeader,
187    R: ReadRef<'data>,
188{
189    pub(super) file: &'file MachOFile<'data, Mach, R>,
190}
191
192impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbolTable<'data, 'file, Mach, R>
193where
194    Mach: MachHeader,
195    R: ReadRef<'data>,
196{
197}
198
199impl<'data, 'file, Mach, R> ObjectSymbolTable<'data> for MachOSymbolTable<'data, 'file, Mach, R>
200where
201    Mach: MachHeader,
202    R: ReadRef<'data>,
203{
204    type Symbol = MachOSymbol<'data, 'file, Mach, R>;
205    type SymbolIterator = MachOSymbolIterator<'data, 'file, Mach, R>;
206
207    fn symbols(&self) -> Self::SymbolIterator {
208        MachOSymbolIterator::new(self.file)
209    }
210
211    fn symbol_by_index(&self, index: SymbolIndex) -> Result<Self::Symbol> {
212        let nlist = self.file.symbols.symbol(index)?;
213        MachOSymbol::new(self.file, index, nlist).read_error("Unsupported Mach-O symbol index")
214    }
215}
216
217/// An iterator for the symbols in a [`MachOFile32`](super::MachOFile32).
218pub type MachOSymbolIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
219    MachOSymbolIterator<'data, 'file, macho::MachHeader32<Endian>, R>;
220/// An iterator for the symbols in a [`MachOFile64`](super::MachOFile64).
221pub type MachOSymbolIterator64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
222    MachOSymbolIterator<'data, 'file, macho::MachHeader64<Endian>, R>;
223
224/// An iterator for the symbols in a [`MachOFile`].
225pub struct MachOSymbolIterator<'data, 'file, Mach, R = &'data [u8]>
226where
227    Mach: MachHeader,
228    R: ReadRef<'data>,
229{
230    file: &'file MachOFile<'data, Mach, R>,
231    index: SymbolIndex,
232}
233
234impl<'data, 'file, Mach, R> MachOSymbolIterator<'data, 'file, Mach, R>
235where
236    Mach: MachHeader,
237    R: ReadRef<'data>,
238{
239    pub(super) fn new(file: &'file MachOFile<'data, Mach, R>) -> Self {
240        MachOSymbolIterator {
241            file,
242            index: SymbolIndex(0),
243        }
244    }
245
246    pub(super) fn empty(file: &'file MachOFile<'data, Mach, R>) -> Self {
247        MachOSymbolIterator {
248            file,
249            index: SymbolIndex(file.symbols.len()),
250        }
251    }
252}
253
254impl<'data, 'file, Mach, R> fmt::Debug for MachOSymbolIterator<'data, 'file, Mach, R>
255where
256    Mach: MachHeader,
257    R: ReadRef<'data>,
258{
259    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
260        f.debug_struct("MachOSymbolIterator").finish()
261    }
262}
263
264impl<'data, 'file, Mach, R> Iterator for MachOSymbolIterator<'data, 'file, Mach, R>
265where
266    Mach: MachHeader,
267    R: ReadRef<'data>,
268{
269    type Item = MachOSymbol<'data, 'file, Mach, R>;
270
271    fn next(&mut self) -> Option<Self::Item> {
272        loop {
273            let index = self.index;
274            let nlist = self.file.symbols.symbols.get(index.0)?;
275            self.index.0 += 1;
276            if let Some(symbol) = MachOSymbol::new(self.file, index, nlist) {
277                return Some(symbol);
278            }
279        }
280    }
281}
282
283/// A symbol in a [`MachOFile32`](super::MachOFile32).
284pub type MachOSymbol32<'data, 'file, Endian = Endianness, R = &'data [u8]> =
285    MachOSymbol<'data, 'file, macho::MachHeader32<Endian>, R>;
286/// A symbol in a [`MachOFile64`](super::MachOFile64).
287pub type MachOSymbol64<'data, 'file, Endian = Endianness, R = &'data [u8]> =
288    MachOSymbol<'data, 'file, macho::MachHeader64<Endian>, R>;
289
290/// A symbol in a [`MachOFile`].
291///
292/// Most functionality is provided by the [`ObjectSymbol`] trait implementation.
293#[derive(Debug, Clone, Copy)]
294pub struct MachOSymbol<'data, 'file, Mach, R = &'data [u8]>
295where
296    Mach: MachHeader,
297    R: ReadRef<'data>,
298{
299    file: &'file MachOFile<'data, Mach, R>,
300    index: SymbolIndex,
301    nlist: &'data Mach::Nlist,
302}
303
304impl<'data, 'file, Mach, R> MachOSymbol<'data, 'file, Mach, R>
305where
306    Mach: MachHeader,
307    R: ReadRef<'data>,
308{
309    pub(super) fn new(
310        file: &'file MachOFile<'data, Mach, R>,
311        index: SymbolIndex,
312        nlist: &'data Mach::Nlist,
313    ) -> Option<Self> {
314        if nlist.n_type() & macho::N_STAB != 0 {
315            return None;
316        }
317        Some(MachOSymbol { file, index, nlist })
318    }
319
320    /// Get the Mach-O file containing this symbol.
321    pub fn macho_file(&self) -> &'file MachOFile<'data, Mach, R> {
322        self.file
323    }
324
325    /// Get the raw Mach-O symbol structure.
326    pub fn macho_symbol(&self) -> &'data Mach::Nlist {
327        self.nlist
328    }
329}
330
331impl<'data, 'file, Mach, R> read::private::Sealed for MachOSymbol<'data, 'file, Mach, R>
332where
333    Mach: MachHeader,
334    R: ReadRef<'data>,
335{
336}
337
338impl<'data, 'file, Mach, R> ObjectSymbol<'data> for MachOSymbol<'data, 'file, Mach, R>
339where
340    Mach: MachHeader,
341    R: ReadRef<'data>,
342{
343    #[inline]
344    fn index(&self) -> SymbolIndex {
345        self.index
346    }
347
348    fn name_bytes(&self) -> Result<&'data [u8]> {
349        self.nlist.name(self.file.endian, self.file.symbols.strings)
350    }
351
352    fn name(&self) -> Result<&'data str> {
353        let name = self.name_bytes()?;
354        str::from_utf8(name)
355            .ok()
356            .read_error("Non UTF-8 Mach-O symbol name")
357    }
358
359    #[inline]
360    fn address(&self) -> u64 {
361        self.nlist.n_value(self.file.endian).into()
362    }
363
364    #[inline]
365    fn size(&self) -> u64 {
366        0
367    }
368
369    fn kind(&self) -> SymbolKind {
370        self.section()
371            .index()
372            .and_then(|index| self.file.section_internal(index).ok())
373            .map(|section| match section.kind {
374                SectionKind::Text => SymbolKind::Text,
375                SectionKind::Data
376                | SectionKind::ReadOnlyData
377                | SectionKind::ReadOnlyString
378                | SectionKind::UninitializedData
379                | SectionKind::Common => SymbolKind::Data,
380                SectionKind::Tls | SectionKind::UninitializedTls | SectionKind::TlsVariables => {
381                    SymbolKind::Tls
382                }
383                _ => SymbolKind::Unknown,
384            })
385            .unwrap_or(SymbolKind::Unknown)
386    }
387
388    fn section(&self) -> SymbolSection {
389        match self.nlist.n_type() & macho::N_TYPE {
390            macho::N_UNDF => SymbolSection::Undefined,
391            macho::N_ABS => SymbolSection::Absolute,
392            macho::N_SECT => {
393                let n_sect = self.nlist.n_sect();
394                if n_sect != 0 {
395                    SymbolSection::Section(SectionIndex(n_sect as usize))
396                } else {
397                    SymbolSection::Unknown
398                }
399            }
400            _ => SymbolSection::Unknown,
401        }
402    }
403
404    #[inline]
405    fn is_undefined(&self) -> bool {
406        self.nlist.n_type() & macho::N_TYPE == macho::N_UNDF
407    }
408
409    #[inline]
410    fn is_definition(&self) -> bool {
411        self.nlist.is_definition()
412    }
413
414    #[inline]
415    fn is_common(&self) -> bool {
416        // Mach-O common symbols are based on section, not symbol
417        false
418    }
419
420    #[inline]
421    fn is_weak(&self) -> bool {
422        self.nlist.n_desc(self.file.endian) & (macho::N_WEAK_REF | macho::N_WEAK_DEF) != 0
423    }
424
425    fn scope(&self) -> SymbolScope {
426        let n_type = self.nlist.n_type();
427        if n_type & macho::N_TYPE == macho::N_UNDF {
428            SymbolScope::Unknown
429        } else if n_type & macho::N_EXT == 0 {
430            SymbolScope::Compilation
431        } else if n_type & macho::N_PEXT != 0 {
432            SymbolScope::Linkage
433        } else {
434            SymbolScope::Dynamic
435        }
436    }
437
438    #[inline]
439    fn is_global(&self) -> bool {
440        self.scope() != SymbolScope::Compilation
441    }
442
443    #[inline]
444    fn is_local(&self) -> bool {
445        self.scope() == SymbolScope::Compilation
446    }
447
448    #[inline]
449    fn flags(&self) -> SymbolFlags<SectionIndex, SymbolIndex> {
450        let n_desc = self.nlist.n_desc(self.file.endian);
451        SymbolFlags::MachO { n_desc }
452    }
453}
454
455/// A trait for generic access to [`macho::Nlist32`] and [`macho::Nlist64`].
456#[allow(missing_docs)]
457pub trait Nlist: Debug + Pod {
458    type Word: Into<u64>;
459    type Endian: endian::Endian;
460
461    fn n_strx(&self, endian: Self::Endian) -> u32;
462    fn n_type(&self) -> u8;
463    fn n_sect(&self) -> u8;
464    fn n_desc(&self, endian: Self::Endian) -> u16;
465    fn n_value(&self, endian: Self::Endian) -> Self::Word;
466
467    fn name<'data, R: ReadRef<'data>>(
468        &self,
469        endian: Self::Endian,
470        strings: StringTable<'data, R>,
471    ) -> Result<&'data [u8]> {
472        strings
473            .get(self.n_strx(endian))
474            .read_error("Invalid Mach-O symbol name offset")
475    }
476
477    /// Return true if this is a STAB symbol.
478    ///
479    /// This determines the meaning of the `n_type` field.
480    fn is_stab(&self) -> bool {
481        self.n_type() & macho::N_STAB != 0
482    }
483
484    /// Return true if this is an undefined symbol.
485    fn is_undefined(&self) -> bool {
486        let n_type = self.n_type();
487        n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_UNDF
488    }
489
490    /// Return true if the symbol is a definition of a function or data object.
491    fn is_definition(&self) -> bool {
492        let n_type = self.n_type();
493        n_type & macho::N_STAB == 0 && n_type & macho::N_TYPE == macho::N_SECT
494    }
495
496    /// Return the library ordinal.
497    ///
498    /// This is either a 1-based index into the dylib load commands,
499    /// or a special ordinal.
500    #[inline]
501    fn library_ordinal(&self, endian: Self::Endian) -> u8 {
502        (self.n_desc(endian) >> 8) as u8
503    }
504}
505
506impl<Endian: endian::Endian> Nlist for macho::Nlist32<Endian> {
507    type Word = u32;
508    type Endian = Endian;
509
510    fn n_strx(&self, endian: Self::Endian) -> u32 {
511        self.n_strx.get(endian)
512    }
513    fn n_type(&self) -> u8 {
514        self.n_type
515    }
516    fn n_sect(&self) -> u8 {
517        self.n_sect
518    }
519    fn n_desc(&self, endian: Self::Endian) -> u16 {
520        self.n_desc.get(endian)
521    }
522    fn n_value(&self, endian: Self::Endian) -> Self::Word {
523        self.n_value.get(endian)
524    }
525}
526
527impl<Endian: endian::Endian> Nlist for macho::Nlist64<Endian> {
528    type Word = u64;
529    type Endian = Endian;
530
531    fn n_strx(&self, endian: Self::Endian) -> u32 {
532        self.n_strx.get(endian)
533    }
534    fn n_type(&self) -> u8 {
535        self.n_type
536    }
537    fn n_sect(&self) -> u8 {
538        self.n_sect
539    }
540    fn n_desc(&self, endian: Self::Endian) -> u16 {
541        self.n_desc.get(endian)
542    }
543    fn n_value(&self, endian: Self::Endian) -> Self::Word {
544        self.n_value.get(endian)
545    }
546}