thorin/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
pub extern crate object;

use std::{
    borrow::Cow,
    collections::HashSet,
    fmt,
    path::{Path, PathBuf},
};

use gimli::{EndianSlice, Reader};
use object::{write::Object as WritableObject, FileKind, Object, ObjectSection};
use tracing::{debug, trace};

use crate::{
    error::Result,
    ext::EndianityExt,
    index::Bucketable,
    package::{dwo_identifier_of_unit, DwarfObject, InProgressDwarfPackage},
    relocate::{add_relocations, Relocate, RelocationMap},
};

mod error;
mod ext;
mod index;
mod package;
mod relocate;
mod strings;

pub use crate::error::Error;

/// `Session` is expected to be implemented by users of `thorin`, allowing users of `thorin` to
/// decide how to manage data, rather than `thorin` having arenas internally.
pub trait Session<Relocations> {
    /// Returns a reference to `data`'s contents with lifetime `'session`.
    fn alloc_data<'session>(&'session self, data: Vec<u8>) -> &'session [u8];

    /// Returns a reference to `data`'s contents with lifetime `'input`.
    ///
    /// If `Cow` is borrowed, then return the contained reference (`'input`). If `Cow` is owned,
    /// then calls `alloc_data` to return a reference of lifetime `'session`, which is guaranteed
    /// to be longer than `'input`, so can be returned.
    fn alloc_owned_cow<'input, 'session: 'input>(
        &'session self,
        data: Cow<'input, [u8]>,
    ) -> &'input [u8] {
        match data {
            Cow::Borrowed(data) => data,
            Cow::Owned(data) => self.alloc_data(data),
        }
    }

    /// Returns a reference to `relocation` with lifetime `'session`.
    fn alloc_relocation<'session>(&'session self, data: Relocations) -> &'session Relocations;

    /// Returns a reference to contents of file at `path` with lifetime `'session`.
    fn read_input<'session>(&'session self, path: &Path) -> std::io::Result<&'session [u8]>;
}

/// Should missing DWARF objects referenced by executables be skipped or result in an error?
///
/// Referenced objects that are still missing when the DWARF package is finished will result in
/// an error.
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum MissingReferencedObjectBehaviour {
    /// Skip missing referenced DWARF objects - useful if this is expected, i.e. the path in the
    /// executable is wrong, but the referenced object will be found because it is an input.
    Skip,
    /// Error when encountering missing referenced DWARF objects.
    Error,
}

impl MissingReferencedObjectBehaviour {
    /// Should missing referenced objects be skipped?
    pub fn skip_missing(&self) -> bool {
        match *self {
            MissingReferencedObjectBehaviour::Skip => true,
            MissingReferencedObjectBehaviour::Error => false,
        }
    }
}

/// Builder for DWARF packages, add input objects/packages with `add_input_object` or input objects
/// referenced by an executable with `add_executable` before accessing the completed object with
/// `finish`.
pub struct DwarfPackage<'output, 'session: 'output, Sess: Session<RelocationMap>> {
    sess: &'session Sess,
    maybe_in_progress: Option<InProgressDwarfPackage<'output>>,
    targets: HashSet<DwarfObject>,
}

impl<'output, 'session: 'output, Sess> fmt::Debug for DwarfPackage<'output, 'session, Sess>
where
    Sess: Session<RelocationMap>,
{
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("DwarfPackage")
            .field("in_progress", &self.maybe_in_progress)
            .field("target_count", &self.targets.len())
            .finish()
    }
}

impl<'output, 'session: 'output, Sess> DwarfPackage<'output, 'session, Sess>
where
    Sess: Session<RelocationMap>,
{
    /// Create a new `DwarfPackage` with the provided `Session` implementation.
    pub fn new(sess: &'session Sess) -> Self {
        Self { sess, maybe_in_progress: None, targets: HashSet::new() }
    }

    /// Add an input object to the in-progress package.
    #[tracing::instrument(level = "trace", skip(obj))]
    fn process_input_object<'input>(&mut self, obj: &'input object::File<'input>) -> Result<()> {
        if self.maybe_in_progress.is_none() {
            self.maybe_in_progress =
                Some(InProgressDwarfPackage::new(obj.architecture(), obj.endianness()));
        }

        let encoding = if let Some(section) = obj.section_by_name(".debug_info.dwo") {
            let data = section.compressed_data()?.decompress()?;
            let data_ref = self.sess.alloc_owned_cow(data);
            let debug_info = gimli::DebugInfo::new(data_ref, obj.endianness().as_runtime_endian());
            debug_info
                .units()
                .next()
                .map_err(Error::ParseUnitHeader)?
                .map(|root_header| root_header.encoding())
                .ok_or(Error::NoCompilationUnits)?
        } else {
            debug!("no `.debug_info.dwo` in input dwarf object");
            return Ok(());
        };

        let sess = self.sess;
        self.maybe_in_progress
            .as_mut()
            .expect("`process_input_object` is broken")
            .add_input_object(sess, obj, encoding)
    }

    /// Add input objects referenced by executable to the DWARF package.
    #[tracing::instrument(level = "trace")]
    pub fn add_executable(
        &mut self,
        path: &Path,
        missing_behaviour: MissingReferencedObjectBehaviour,
    ) -> Result<()> {
        let data = self.sess.read_input(path).map_err(Error::ReadInput)?;
        let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?;

        let mut load_section = |id: gimli::SectionId| -> Result<_> {
            let mut relocations = RelocationMap::default();
            let data = match obj.section_by_name(&id.name()) {
                Some(ref section) => {
                    add_relocations(&mut relocations, &obj, section)?;
                    section.compressed_data()?.decompress()?
                }
                // Use a non-zero capacity so that `ReaderOffsetId`s are unique.
                None => Cow::Owned(Vec::with_capacity(1)),
            };

            let data_ref = self.sess.alloc_owned_cow(data);
            let reader = EndianSlice::new(data_ref, obj.endianness().as_runtime_endian());
            let section = reader;
            let relocations = self.sess.alloc_relocation(relocations);
            Ok(Relocate { relocations, section, reader })
        };

        let dwarf = gimli::Dwarf::load(&mut load_section)?;

        let mut iter = dwarf.units();
        while let Some(header) = iter.next().map_err(Error::ParseUnitHeader)? {
            let unit = dwarf.unit(header).map_err(Error::ParseUnit)?;

            let target = match dwo_identifier_of_unit(&dwarf.debug_abbrev, &unit.header)? {
                Some(target) => target,
                None => {
                    debug!("no target");
                    continue;
                }
            };

            let dwo_name = {
                let mut cursor = unit.header.entries(&unit.abbreviations);
                cursor.next_dfs()?;
                let root = cursor.current().expect("unit w/out root debugging information entry");

                let dwo_name = if let Some(val) = root.attr_value(gimli::DW_AT_dwo_name)? {
                    // DWARF 5
                    val
                } else if let Some(val) = root.attr_value(gimli::DW_AT_GNU_dwo_name)? {
                    // GNU Extension
                    val
                } else {
                    return Err(Error::MissingDwoName(target.index()));
                };

                dwarf.attr_string(&unit, dwo_name)?.to_string()?.into_owned()
            };

            // Prepend the compilation directory if it exists.
            let mut path = if let Some(comp_dir) = &unit.comp_dir {
                PathBuf::from(comp_dir.to_string()?.into_owned())
            } else {
                PathBuf::new()
            };
            path.push(dwo_name);

            // Only add `DwoId`s to the targets, not `DebugTypeSignature`s. There doesn't
            // appear to be a "skeleton type unit" to find the corresponding unit of (there are
            // normal type units in an executable, but should we expect to find a corresponding
            // split type unit for those?).
            if matches!(target, DwarfObject::Compilation(_)) {
                // Input objects are processed first, if a DWARF object referenced by this
                // executable was already found then don't add it to the target and try to add it
                // again.
                if let Some(package) = &self.maybe_in_progress {
                    if package.contained_units().contains(&target) {
                        continue;
                    }
                }

                debug!(?target, "adding target");
                self.targets.insert(target);
            }

            match self.add_input_object(&path) {
                Ok(()) => (),
                Err(Error::ReadInput(..)) if missing_behaviour.skip_missing() => (),
                Err(e) => return Err(e),
            }
        }

        Ok(())
    }

    /// Add an input object to the DWARF package.
    ///
    /// Input object must be an archive or an elf object.
    #[tracing::instrument(level = "trace")]
    pub fn add_input_object(&mut self, path: &Path) -> Result<()> {
        let data = self.sess.read_input(&path).map_err(Error::ReadInput)?;

        let kind = FileKind::parse(data).map_err(Error::ParseFileKind)?;
        trace!(?kind);
        match kind {
            FileKind::Archive => {
                let archive = object::read::archive::ArchiveFile::parse(data)
                    .map_err(Error::ParseArchiveFile)?;

                for member in archive.members() {
                    let member = member.map_err(Error::ParseArchiveMember)?;
                    let data = member.data(data)?;

                    let kind = if let Ok(kind) = FileKind::parse(data) {
                        kind
                    } else {
                        trace!("skipping non-elf archive member");
                        continue;
                    };

                    trace!(?kind, "archive member");
                    match kind {
                        FileKind::Elf32 | FileKind::Elf64 => {
                            let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?;
                            self.process_input_object(&obj)?;
                        }
                        _ => {
                            trace!("skipping non-elf archive member");
                        }
                    }
                }

                Ok(())
            }
            FileKind::Elf32 | FileKind::Elf64 => {
                let obj = object::File::parse(data).map_err(Error::ParseObjectFile)?;
                self.process_input_object(&obj)
            }
            _ => Err(Error::InvalidInputKind),
        }
    }

    /// Returns the `object::write::Object` containing the created DWARF package.
    ///
    /// Returns an `Error::MissingReferencedUnit` if DWARF objects referenced by executables were
    /// not subsequently found.
    /// Returns an `Error::NoOutputObjectCreated` if no input objects or executables were provided.
    #[tracing::instrument(level = "trace")]
    pub fn finish(self) -> Result<WritableObject<'output>> {
        match self.maybe_in_progress {
            Some(package) => {
                if let Some(missing) = self.targets.difference(package.contained_units()).next() {
                    return Err(Error::MissingReferencedUnit(missing.index()));
                }

                package.finish()
            }
            None if !self.targets.is_empty() => {
                let first_missing_unit = self
                    .targets
                    .iter()
                    .next()
                    .copied()
                    .expect("non-empty map doesn't have first element");
                Err(Error::MissingReferencedUnit(first_missing_unit.index()))
            }
            None => Err(Error::NoOutputObjectCreated),
        }
    }
}