object/write/string.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
use alloc::vec::Vec;
#[cfg(feature = "write_std")]
type IndexSet<K> = indexmap::IndexSet<K>;
#[cfg(not(feature = "write_std"))]
type IndexSet<K> = indexmap::IndexSet<K, hashbrown::DefaultHashBuilder>;
/// An identifier for an entry in a string table.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct StringId(usize);
#[derive(Debug, Default)]
pub(crate) struct StringTable<'a> {
strings: IndexSet<&'a [u8]>,
offsets: Vec<usize>,
}
impl<'a> StringTable<'a> {
/// Add a string to the string table.
///
/// Panics if the string table has already been written, or
/// if the string contains a null byte.
pub fn add(&mut self, string: &'a [u8]) -> StringId {
assert!(self.offsets.is_empty());
assert!(!string.contains(&0));
let id = self.strings.insert_full(string).0;
StringId(id)
}
/// Return the id of the given string.
///
/// Panics if the string is not in the string table.
#[allow(dead_code)]
pub fn get_id(&self, string: &[u8]) -> StringId {
let id = self.strings.get_index_of(string).unwrap();
StringId(id)
}
/// Return the string for the given id.
///
/// Panics if the string is not in the string table.
#[allow(dead_code)]
pub fn get_string(&self, id: StringId) -> &'a [u8] {
self.strings.get_index(id.0).unwrap()
}
/// Return the offset of the given string.
///
/// Panics if the string table has not been written, or
/// if the string is not in the string table.
pub fn get_offset(&self, id: StringId) -> usize {
self.offsets[id.0]
}
/// Append the string table to the given `Vec`, and
/// calculate the list of string offsets.
///
/// `base` is the initial string table offset. For example,
/// this should be 1 for ELF, to account for the initial
/// null byte (which must have been written by the caller).
///
/// Panics if the string table has already been written.
pub fn write(&mut self, base: usize, w: &mut Vec<u8>) {
assert!(self.offsets.is_empty());
let mut ids: Vec<_> = (0..self.strings.len()).collect();
sort(&mut ids, 1, &self.strings);
self.offsets = vec![0; ids.len()];
let mut offset = base;
let mut previous = &[][..];
for id in ids {
let string = self.strings.get_index(id).unwrap();
if previous.ends_with(string) {
self.offsets[id] = offset - string.len() - 1;
} else {
self.offsets[id] = offset;
w.extend_from_slice(string);
w.push(0);
offset += string.len() + 1;
previous = string;
}
}
}
/// Calculate the size in bytes of the string table.
///
/// `base` is the initial string table offset. For example,
/// this should be 1 for ELF, to account for the initial
/// null byte.
#[allow(dead_code)]
pub fn size(&self, base: usize) -> usize {
// TODO: cache this result?
let mut ids: Vec<_> = (0..self.strings.len()).collect();
sort(&mut ids, 1, &self.strings);
let mut size = base;
let mut previous = &[][..];
for id in ids {
let string = self.strings.get_index(id).unwrap();
if !previous.ends_with(string) {
size += string.len() + 1;
previous = string;
}
}
size
}
}
// Multi-key quicksort.
//
// Ordering is such that if a string is a suffix of at least one other string,
// then it is placed immediately after one of those strings. That is:
// - comparison starts at the end of the string
// - shorter strings come later
//
// Based on the implementation in LLVM.
fn sort(mut ids: &mut [usize], mut pos: usize, strings: &IndexSet<&[u8]>) {
loop {
if ids.len() <= 1 {
return;
}
let pivot = byte(ids[0], pos, strings);
let mut lower = 0;
let mut upper = ids.len();
let mut i = 1;
while i < upper {
let b = byte(ids[i], pos, strings);
if b > pivot {
ids.swap(lower, i);
lower += 1;
i += 1;
} else if b < pivot {
upper -= 1;
ids.swap(upper, i);
} else {
i += 1;
}
}
sort(&mut ids[..lower], pos, strings);
sort(&mut ids[upper..], pos, strings);
if pivot == 0 {
return;
}
ids = &mut ids[lower..upper];
pos += 1;
}
}
fn byte(id: usize, pos: usize, strings: &IndexSet<&[u8]>) -> u8 {
let string = strings.get_index(id).unwrap();
let len = string.len();
if len >= pos {
string[len - pos]
} else {
// We know the strings don't contain null bytes.
0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn string_table() {
let mut table = StringTable::default();
let id0 = table.add(b"");
let id1 = table.add(b"foo");
let id2 = table.add(b"bar");
let id3 = table.add(b"foobar");
let mut data = Vec::new();
data.push(0);
table.write(1, &mut data);
assert_eq!(data, b"\0foobar\0foo\0");
assert_eq!(table.get_offset(id0), 11);
assert_eq!(table.get_offset(id1), 8);
assert_eq!(table.get_offset(id2), 4);
assert_eq!(table.get_offset(id3), 1);
}
}