raw_string/str/
utf8chunks.rs

1use std::iter::FusedIterator;
2use std::str::{from_utf8, from_utf8_unchecked};
3
4/// An iterator over chunks of valid UTF-8 in a RawStr.
5///
6/// See [`RawStr::utf8_chunks`](struct.RawStr.html#method.utf8_chunks).
7pub struct Utf8ChunksIter<'a> {
8	pub(super) bytes: &'a [u8],
9}
10
11/// A chunk of valid UTF-8, possibly followed by a broken character encoding.
12pub struct Utf8Chunk<'a> {
13	/// A valid UTF-8 piece, at the start, end, or between broken chars.
14	///
15	/// Empty between adjacent broken chars.
16	pub valid: &'a str,
17
18	/// A broken char.
19	///
20	/// Can only be empty in the last chunk.
21	///
22	/// Should be replaced by a single unicode replacement character, if not empty.
23	pub broken: &'a [u8],
24}
25
26impl<'a> Iterator for Utf8ChunksIter<'a> {
27	type Item = Utf8Chunk<'a>;
28
29	fn next(&mut self) -> Option<Utf8Chunk<'a>> {
30		if self.bytes.is_empty() {
31			return None;
32		}
33		match from_utf8(self.bytes) {
34			Ok(s) => {
35				self.bytes = &self.bytes[s.len()..];
36				Some(Utf8Chunk {
37					valid: s,
38					broken: &self.bytes[..0],
39				})
40			}
41			Err(e) => {
42				let (valid, rest) = self.bytes.split_at(e.valid_up_to());
43				let valid = unsafe { from_utf8_unchecked(valid) };
44				let (broken, rest) = rest.split_at(e.error_len().unwrap_or(rest.len()));
45				self.bytes = rest;
46				Some(Utf8Chunk { valid, broken })
47			}
48		}
49	}
50
51	#[inline]
52	fn size_hint(&self) -> (usize, Option<usize>) {
53		if self.bytes.is_empty() {
54			(0, Some(0))
55		} else {
56			(1, None)
57		}
58	}
59}
60
61impl<'a> FusedIterator for Utf8ChunksIter<'a> {}