thorin/
lib.rs

1pub extern crate object;
2
3use std::{
4    borrow::Cow,
5    collections::HashSet,
6    fmt,
7    path::{Path, PathBuf},
8};
9
10use gimli::{EndianSlice, Reader};
11use object::{write::Object as WritableObject, FileKind, Object, ObjectSection};
12use tracing::{debug, trace};
13
14use crate::{
15    error::Result,
16    ext::EndianityExt,
17    index::Bucketable,
18    package::{dwo_identifier_of_unit, DwarfObject, InProgressDwarfPackage},
19    relocate::{add_relocations, Relocate, RelocationMap},
20};
21
22mod error;
23mod ext;
24mod index;
25mod package;
26mod relocate;
27mod strings;
28
29pub use crate::error::Error;
30
31/// `Session` is expected to be implemented by users of `thorin`, allowing users of `thorin` to
32/// decide how to manage data, rather than `thorin` having arenas internally.
33pub trait Session<Relocations> {
34    /// Returns a reference to `data`'s contents with lifetime `'session`.
35    fn alloc_data<'session>(&'session self, data: Vec<u8>) -> &'session [u8];
36
37    /// Returns a reference to `data`'s contents with lifetime `'input`.
38    ///
39    /// If `Cow` is borrowed, then return the contained reference (`'input`). If `Cow` is owned,
40    /// then calls `alloc_data` to return a reference of lifetime `'session`, which is guaranteed
41    /// to be longer than `'input`, so can be returned.
42    fn alloc_owned_cow<'input, 'session: 'input>(
43        &'session self,
44        data: Cow<'input, [u8]>,
45    ) -> &'input [u8] {
46        match data {
47            Cow::Borrowed(data) => data,
48            Cow::Owned(data) => self.alloc_data(data),
49        }
50    }
51
52    /// Returns a reference to `relocation` with lifetime `'session`.
53    fn alloc_relocation<'session>(&'session self, data: Relocations) -> &'session Relocations;
54
55    /// Returns a reference to contents of file at `path` with lifetime `'session`.
56    fn read_input<'session>(&'session self, path: &Path) -> std::io::Result<&'session [u8]>;
57}
58
59/// Should missing DWARF objects referenced by executables be skipped or result in an error?
60///
61/// Referenced objects that are still missing when the DWARF package is finished will result in
62/// an error.
63#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
64pub enum MissingReferencedObjectBehaviour {
65    /// Skip missing referenced DWARF objects - useful if this is expected, i.e. the path in the
66    /// executable is wrong, but the referenced object will be found because it is an input.
67    Skip,
68    /// Error when encountering missing referenced DWARF objects.
69    Error,
70}
71
72impl MissingReferencedObjectBehaviour {
73    /// Should missing referenced objects be skipped?
74    pub fn skip_missing(&self) -> bool {
75        match *self {
76            MissingReferencedObjectBehaviour::Skip => true,
77            MissingReferencedObjectBehaviour::Error => false,
78        }
79    }
80}
81
82/// Builder for DWARF packages, add input objects/packages with `add_input_object` or input objects
83/// referenced by an executable with `add_executable` before accessing the completed object with
84/// `finish`.
85pub struct DwarfPackage<'output, 'session: 'output, Sess: Session<RelocationMap>> {
86    sess: &'session Sess,
87    maybe_in_progress: Option<InProgressDwarfPackage<'output>>,
88    targets: HashSet<DwarfObject>,
89}
90
91impl<'output, 'session: 'output, Sess> fmt::Debug for DwarfPackage<'output, 'session, Sess>
92where
93    Sess: Session<RelocationMap>,
94{
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        f.debug_struct("DwarfPackage")
97            .field("in_progress", &self.maybe_in_progress)
98            .field("target_count", &self.targets.len())
99            .finish()
100    }
101}
102
103impl<'output, 'session: 'output, Sess> DwarfPackage<'output, 'session, Sess>
104where
105    Sess: Session<RelocationMap>,
106{
107    /// Create a new `DwarfPackage` with the provided `Session` implementation.
108    pub fn new(sess: &'session Sess) -> Self {
109        Self { sess, maybe_in_progress: None, targets: HashSet::new() }
110    }
111
112    /// Add an input object to the in-progress package.
113    #[tracing::instrument(level = "trace", skip(obj))]
114    fn process_input_object<'input>(&mut self, obj: &'input object::File<'input>) -> Result<()> {
115        if self.maybe_in_progress.is_none() {
116            self.maybe_in_progress =
117                Some(InProgressDwarfPackage::new(obj.architecture(), obj.endianness()));
118        }
119
120        let encoding = if let Some(section) = obj.section_by_name(".debug_info.dwo") {
121            let data = section.compressed_data()?.decompress()?;
122            let data_ref = self.sess.alloc_owned_cow(data);
123            let debug_info = gimli::DebugInfo::new(data_ref, obj.endianness().as_runtime_endian());
124            debug_info
125                .units()
126                .next()
127                .map_err(Error::ParseUnitHeader)?
128                .map(|root_header| root_header.encoding())
129                .ok_or(Error::NoCompilationUnits)?
130        } else {
131            debug!("no `.debug_info.dwo` in input dwarf object");
132            return Ok(());
133        };
134
135        let sess = self.sess;
136        self.maybe_in_progress
137            .as_mut()
138            .expect("`process_input_object` is broken")
139            .add_input_object(sess, obj, encoding)
140    }
141
142    /// Add input objects referenced by executable to the DWARF package.
143    #[tracing::instrument(level = "trace")]
144    pub fn add_executable(
145        &mut self,
146        path: &Path,
147        missing_behaviour: MissingReferencedObjectBehaviour,
148    ) -> Result<()> {
149        let data = self.sess.read_input(path).map_err(Error::ReadInput)?;
150        let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?;
151
152        let mut load_section = |id: gimli::SectionId| -> Result<_> {
153            let mut relocations = RelocationMap::default();
154            let data = match obj.section_by_name(&id.name()) {
155                Some(ref section) => {
156                    add_relocations(&mut relocations, &obj, section)?;
157                    section.compressed_data()?.decompress()?
158                }
159                // Use a non-zero capacity so that `ReaderOffsetId`s are unique.
160                None => Cow::Owned(Vec::with_capacity(1)),
161            };
162
163            let data_ref = self.sess.alloc_owned_cow(data);
164            let reader = EndianSlice::new(data_ref, obj.endianness().as_runtime_endian());
165            let section = reader;
166            let relocations = self.sess.alloc_relocation(relocations);
167            Ok(Relocate { relocations, section, reader })
168        };
169
170        let dwarf = gimli::Dwarf::load(&mut load_section)?;
171
172        let mut iter = dwarf.units();
173        while let Some(header) = iter.next().map_err(Error::ParseUnitHeader)? {
174            let unit = dwarf.unit(header).map_err(Error::ParseUnit)?;
175
176            let target = match dwo_identifier_of_unit(&dwarf.debug_abbrev, &unit.header)? {
177                Some(target) => target,
178                None => {
179                    debug!("no target");
180                    continue;
181                }
182            };
183
184            let dwo_name = {
185                let mut cursor = unit.header.entries(&unit.abbreviations);
186                cursor.next_dfs()?;
187                let root = cursor.current().expect("unit w/out root debugging information entry");
188
189                let dwo_name = if let Some(val) = root.attr_value(gimli::DW_AT_dwo_name)? {
190                    // DWARF 5
191                    val
192                } else if let Some(val) = root.attr_value(gimli::DW_AT_GNU_dwo_name)? {
193                    // GNU Extension
194                    val
195                } else {
196                    return Err(Error::MissingDwoName(target.index()));
197                };
198
199                dwarf.attr_string(&unit, dwo_name)?.to_string()?.into_owned()
200            };
201
202            // Prepend the compilation directory if it exists.
203            let mut path = if let Some(comp_dir) = &unit.comp_dir {
204                PathBuf::from(comp_dir.to_string()?.into_owned())
205            } else {
206                PathBuf::new()
207            };
208            path.push(dwo_name);
209
210            // Only add `DwoId`s to the targets, not `DebugTypeSignature`s. There doesn't
211            // appear to be a "skeleton type unit" to find the corresponding unit of (there are
212            // normal type units in an executable, but should we expect to find a corresponding
213            // split type unit for those?).
214            if matches!(target, DwarfObject::Compilation(_)) {
215                // Input objects are processed first, if a DWARF object referenced by this
216                // executable was already found then don't add it to the target and try to add it
217                // again.
218                if let Some(package) = &self.maybe_in_progress {
219                    if package.contained_units().contains(&target) {
220                        continue;
221                    }
222                }
223
224                debug!(?target, "adding target");
225                self.targets.insert(target);
226            }
227
228            match self.add_input_object(&path) {
229                Ok(()) => (),
230                Err(Error::ReadInput(..)) if missing_behaviour.skip_missing() => (),
231                Err(e) => return Err(e),
232            }
233        }
234
235        Ok(())
236    }
237
238    /// Add an input object to the DWARF package.
239    ///
240    /// Input object must be an archive or an elf object.
241    #[tracing::instrument(level = "trace")]
242    pub fn add_input_object(&mut self, path: &Path) -> Result<()> {
243        let data = self.sess.read_input(&path).map_err(Error::ReadInput)?;
244
245        let kind = FileKind::parse(data).map_err(Error::ParseFileKind)?;
246        trace!(?kind);
247        match kind {
248            FileKind::Archive => {
249                let archive = object::read::archive::ArchiveFile::parse(data)
250                    .map_err(Error::ParseArchiveFile)?;
251
252                for member in archive.members() {
253                    let member = member.map_err(Error::ParseArchiveMember)?;
254                    let data = member.data(data)?;
255
256                    let kind = if let Ok(kind) = FileKind::parse(data) {
257                        kind
258                    } else {
259                        trace!("skipping non-elf archive member");
260                        continue;
261                    };
262
263                    trace!(?kind, "archive member");
264                    match kind {
265                        FileKind::Elf32 | FileKind::Elf64 => {
266                            let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?;
267                            self.process_input_object(&obj)?;
268                        }
269                        _ => {
270                            trace!("skipping non-elf archive member");
271                        }
272                    }
273                }
274
275                Ok(())
276            }
277            FileKind::Elf32 | FileKind::Elf64 => {
278                let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?;
279                self.process_input_object(&obj)
280            }
281            _ => Err(Error::InvalidInputKind),
282        }
283    }
284
285    /// Returns the `object::write::Object` containing the created DWARF package.
286    ///
287    /// Returns an `Error::MissingReferencedUnit` if DWARF objects referenced by executables were
288    /// not subsequently found.
289    /// Returns an `Error::NoOutputObjectCreated` if no input objects or executables were provided.
290    #[tracing::instrument(level = "trace")]
291    pub fn finish(self) -> Result<WritableObject<'output>> {
292        match self.maybe_in_progress {
293            Some(package) => {
294                if let Some(missing) = self.targets.difference(package.contained_units()).next() {
295                    return Err(Error::MissingReferencedUnit(missing.index()));
296                }
297
298                package.finish()
299            }
300            None if !self.targets.is_empty() => {
301                let first_missing_unit = self
302                    .targets
303                    .iter()
304                    .next()
305                    .copied()
306                    .expect("non-empty map doesn't have first element");
307                Err(Error::MissingReferencedUnit(first_missing_unit.index()))
308            }
309            None => Err(Error::NoOutputObjectCreated),
310        }
311    }
312}