aboutsummaryrefslogtreecommitdiffstats
path: root/components/script/dom/servoparser/mod.rs
diff options
context:
space:
mode:
authorAnthony Ramine <n.oxyde@gmail.com>2018-12-05 13:44:16 +0100
committerAnthony Ramine <n.oxyde@gmail.com>2018-12-05 16:39:07 +0100
commitb4448a9fe7cf1bfc031fbb1b1911406a22f8a6cf (patch)
tree5226771f297877ce7261c9b01c343e779e9a0ae6 /components/script/dom/servoparser/mod.rs
parent08bbf4f93a7d6ad5fdf8049fd063db8c93e733a6 (diff)
downloadservo-b4448a9fe7cf1bfc031fbb1b1911406a22f8a6cf.tar.gz
servo-b4448a9fe7cf1bfc031fbb1b1911406a22f8a6cf.zip
Use Utf8LossyDecoder instead of IncompleteUtf8
Diffstat (limited to 'components/script/dom/servoparser/mod.rs')
-rw-r--r--components/script/dom/servoparser/mod.rs91
1 files changed, 66 insertions, 25 deletions
diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs
index 3c7e45f91de..41895832a7c 100644
--- a/components/script/dom/servoparser/mod.rs
+++ b/components/script/dom/servoparser/mod.rs
@@ -38,7 +38,9 @@ use crate::script_thread::ScriptThread;
use dom_struct::dom_struct;
use embedder_traits::resources::{self, Resource};
use html5ever::buffer_queue::BufferQueue;
-use html5ever::tendril::{ByteTendril, IncompleteUtf8, StrTendril};
+use html5ever::tendril::fmt::UTF8;
+use html5ever::tendril::stream::Utf8LossyDecoder;
+use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink};
use html5ever::{Attribute, ExpandedName, LocalName, QualName};
use hyper_serde::Serde;
@@ -78,12 +80,11 @@ pub struct ServoParser {
reflector: Reflector,
/// The document associated with this parser.
document: Dom<Document>,
+ /// The decoder used for the network input.
+ network_decoder: DomRefCell<Option<NetworkDecoder>>,
/// Input received from network.
#[ignore_malloc_size_of = "Defined in html5ever"]
network_input: DomRefCell<BufferQueue>,
- /// Part of an UTF-8 code point spanning input chunks
- #[ignore_malloc_size_of = "Defined in html5ever"]
- incomplete_utf8: DomRefCell<Option<IncompleteUtf8>>,
/// Input received from script. Used only to support document.write().
#[ignore_malloc_size_of = "Defined in html5ever"]
script_input: DomRefCell<BufferQueue>,
@@ -401,7 +402,7 @@ impl ServoParser {
ServoParser {
reflector: Reflector::new(),
document: Dom::from_ref(document),
- incomplete_utf8: DomRefCell::new(None),
+ network_decoder: DomRefCell::new(Some(NetworkDecoder::new())),
network_input: DomRefCell::new(BufferQueue::new()),
script_input: DomRefCell::new(BufferQueue::new()),
tokenizer: DomRefCell::new(tokenizer),
@@ -433,22 +434,15 @@ impl ServoParser {
}
fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
- let mut chunk = ByteTendril::from(&*chunk);
- let mut network_input = self.network_input.borrow_mut();
- let mut incomplete_utf8 = self.incomplete_utf8.borrow_mut();
-
- if let Some(mut incomplete) = incomplete_utf8.take() {
- let result = incomplete.try_complete(chunk, |s| network_input.push_back(s));
- match result {
- Err(()) => {
- *incomplete_utf8 = Some(incomplete);
- return;
- },
- Ok(remaining) => chunk = remaining,
- }
+ let chunk = self
+ .network_decoder
+ .borrow_mut()
+ .as_mut()
+ .unwrap()
+ .decode(chunk);
+ if !chunk.is_empty() {
+ self.network_input.borrow_mut().push_back(chunk);
}
-
- *incomplete_utf8 = chunk.decode_utf8_lossy(|s| network_input.push_back(s));
}
fn push_string_input_chunk(&self, chunk: String) {
@@ -481,10 +475,11 @@ impl ServoParser {
// the parser remains unsuspended.
if self.last_chunk_received.get() {
- if let Some(_) = self.incomplete_utf8.borrow_mut().take() {
- self.network_input
- .borrow_mut()
- .push_back(StrTendril::from("\u{FFFD}"))
+ if let Some(decoder) = self.network_decoder.borrow_mut().take() {
+ let chunk = decoder.finish();
+ if !chunk.is_empty() {
+ self.network_input.borrow_mut().push_back(chunk);
+ }
}
}
self.tokenize(|tokenizer| tokenizer.feed(&mut *self.network_input.borrow_mut()));
@@ -552,7 +547,7 @@ impl ServoParser {
assert!(self.last_chunk_received.get());
assert!(self.script_input.borrow().is_empty());
assert!(self.network_input.borrow().is_empty());
- assert!(self.incomplete_utf8.borrow().is_none());
+ assert!(self.network_decoder.borrow().is_none());
// Step 1.
self.document
@@ -1200,3 +1195,49 @@ fn create_element_for_token(
// Step 13.
element
}
+
+#[derive(JSTraceable, MallocSizeOf)]
+struct NetworkDecoder {
+ #[ignore_malloc_size_of = "Defined in html5ever"]
+ decoder: Utf8LossyDecoder<NetworkSink>,
+}
+
+impl NetworkDecoder {
+ fn new() -> Self {
+ Self {
+ decoder: Utf8LossyDecoder::new(Default::default()),
+ }
+ }
+
+ fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
+ self.decoder.process(ByteTendril::from(&*chunk));
+ mem::replace(&mut self.decoder.inner_sink.output, Default::default())
+ }
+
+ fn finish(self) -> StrTendril {
+ self.decoder.finish()
+ }
+}
+
+#[derive(Default, JSTraceable)]
+struct NetworkSink {
+ output: StrTendril,
+}
+
+impl TendrilSink<UTF8> for NetworkSink {
+ type Output = StrTendril;
+
+ fn process(&mut self, t: StrTendril) {
+ if self.output.is_empty() {
+ self.output = t;
+ } else {
+ self.output.push_tendril(&t);
+ }
+ }
+
+ fn error(&mut self, _desc: Cow<'static, str>) {}
+
+ fn finish(self) -> Self::Output {
+ self.output
+ }
+}