thorin/strings.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
use gimli::{
write::{EndianVec, Writer},
DebugStrOffsetsBase, DebugStrOffsetsIndex, DwarfFileType, Encoding, EndianSlice, Format,
Section,
};
use hashbrown::HashMap;
use tracing::debug;
use crate::{
error::{Error, Result},
ext::PackageFormatExt,
};
/// New-type'd offset into `.debug_str` section.
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub(crate) struct PackageStringOffset(usize);
/// DWARF packages need to merge the `.debug_str` sections of input DWARF objects.
/// `.debug_str_offsets` sections then need to be rebuilt with offsets into the new merged
/// `.debug_str` section and then concatenated (indices into each dwarf object's offset list will
/// therefore still refer to the same string).
///
/// Gimli's `StringTable` produces a `.debug_str` section with a single `.debug_str_offsets`
/// section, but `PackageStringTable` accumulates a single `.debug_str` section and can be used to
/// produce multiple `.debug_str_offsets` sections (which will be concatenated) which all offset
/// into the same `.debug_str`.
pub(crate) struct PackageStringTable {
data: Vec<u8>,
strings: HashMap<Vec<u8>, PackageStringOffset>,
}
impl PackageStringTable {
/// Create a new `PackageStringTable` with a given endianity.
pub(crate) fn new() -> Self {
Self { data: Vec::new(), strings: HashMap::new() }
}
/// Insert a string into the string table and return its offset in the table. If the string is
/// already in the table, returns its offset.
pub(crate) fn get_or_insert(&mut self, bytes: &[u8]) -> PackageStringOffset {
debug_assert!(!bytes.contains(&0));
if let Some(offset) = self.strings.get(bytes) {
return *offset;
}
// Keep track of the offset for this string, it might be referenced by the next compilation
// unit too.
let offset = PackageStringOffset(self.data.len());
self.strings.insert(bytes.into(), offset);
// Insert into the string table.
self.data.extend_from_slice(bytes);
self.data.push(0);
offset
}
/// Adds strings from input `.debug_str_offsets` and `.debug_str` into the string table, returns
/// data for a equivalent `.debug_str_offsets` section with offsets pointing into the new
/// `.debug_str` section.
pub(crate) fn remap_str_offsets_section<E: gimli::Endianity>(
&mut self,
debug_str: gimli::DebugStr<EndianSlice<E>>,
debug_str_offsets: gimli::DebugStrOffsets<EndianSlice<E>>,
section_size: u64,
endian: E,
encoding: Encoding,
) -> Result<EndianVec<E>> {
let entry_size = match encoding.format {
Format::Dwarf32 => 4,
Format::Dwarf64 => 8,
};
// Reduce the number of allocations needed.
self.data.reserve(debug_str.reader().len());
let mut data = EndianVec::new(endian);
// `DebugStrOffsetsBase` knows to skip past the header with DWARF 5.
let base: gimli::DebugStrOffsetsBase<usize> =
DebugStrOffsetsBase::default_for_encoding_and_file(encoding, DwarfFileType::Dwo);
if encoding.is_std_dwarf_package_format() {
match encoding.format {
Format::Dwarf32 => {
// Unit length (4 bytes): size of the offsets section without this
// header (8 bytes total).
data.write_u32(
(section_size - 8)
.try_into()
.expect("section size w/out header larger than u32"),
)?;
}
Format::Dwarf64 => {
// Unit length (4 bytes then 8 bytes): size of the offsets section without
// this header (16 bytes total).
data.write_u32(u32::MAX)?;
data.write_u64(section_size - 16)?;
}
};
// Version (2 bytes): DWARF 5
data.write_u16(5)?;
// Reserved padding (2 bytes)
data.write_u16(0)?;
}
debug!(?base);
let base_offset: u64 = base.0.try_into().expect("base offset larger than u64");
let num_elements = (section_size - base_offset) / entry_size;
debug!(?section_size, ?base_offset, ?num_elements);
for i in 0..num_elements {
let dwo_index = DebugStrOffsetsIndex(i as usize);
let dwo_offset = debug_str_offsets
.get_str_offset(encoding.format, base, dwo_index)
.map_err(|e| Error::OffsetAtIndex(e, i))?;
let dwo_str =
debug_str.get_str(dwo_offset).map_err(|e| Error::StrAtOffset(e, dwo_offset.0))?;
let dwp_offset = self.get_or_insert(&dwo_str);
match encoding.format {
Format::Dwarf32 => {
let dwp_offset =
dwp_offset.0.try_into().expect("string offset larger than u32");
data.write_u32(dwp_offset)?;
}
Format::Dwarf64 => {
let dwp_offset =
dwp_offset.0.try_into().expect("string offset larger than u64");
data.write_u64(dwp_offset)?;
}
}
}
Ok(data)
}
/// Returns the accumulated `.debug_str` section data
pub(crate) fn finish(self) -> Vec<u8> {
self.data
}
}