aboutsummaryrefslogtreecommitdiffstats
path: root/components/script/dom/servohtmlparser.rs
diff options
context:
space:
mode:
authorJosh Matthews <josh@joshmatthews.net>2015-03-11 10:44:59 -0400
committerJosh Matthews <josh@joshmatthews.net>2015-05-20 14:22:09 -0400
commit8082df7d0da97f1951ae125956b962b92c98e69f (patch)
tree86131e200a39a6f85afcfff7fa8f6f904e94cc0b /components/script/dom/servohtmlparser.rs
parente52197d1261055527a838f74b353a1124d6b077a (diff)
downloadservo-8082df7d0da97f1951ae125956b962b92c98e69f.tar.gz
servo-8082df7d0da97f1951ae125956b962b92c98e69f.zip
Make external script sources load asynchronously, yet still block further parsing. Hook up document loading to async networking events.
Diffstat (limited to 'components/script/dom/servohtmlparser.rs')
-rw-r--r--components/script/dom/servohtmlparser.rs237
1 files changed, 225 insertions, 12 deletions
diff --git a/components/script/dom/servohtmlparser.rs b/components/script/dom/servohtmlparser.rs
index 199c06dca35..c4779a5666d 100644
--- a/components/script/dom/servohtmlparser.rs
+++ b/components/script/dom/servohtmlparser.rs
@@ -5,24 +5,35 @@
//! The bulk of the HTML parser integration is in `script::parse::html`.
//! This module is mostly about its interaction with DOM memory management.
+use document_loader::LoadType;
use dom::bindings::cell::DOMRefCell;
use dom::bindings::codegen::Bindings::ServoHTMLParserBinding;
use dom::bindings::global::GlobalRef;
use dom::bindings::trace::JSTraceable;
use dom::bindings::js::{JS, JSRef, Rootable, Temporary};
+use dom::bindings::refcounted::Trusted;
use dom::bindings::utils::{Reflectable, Reflector, reflect_dom_object};
use dom::document::{Document, DocumentHelpers};
-use dom::node::Node;
+use dom::node::{window_from_node, Node};
+use dom::window::Window;
+use network_listener::PreInvoke;
use parse::Parser;
+use script_task::{ScriptTask, ScriptChan};
-use util::task_state;
+use msg::constellation_msg::{PipelineId, SubpageId};
+use net_traits::{Metadata, AsyncResponseListener};
+use encoding::all::UTF_8;
+use encoding::types::{Encoding, DecoderTrap};
+use std::cell::{Cell, RefCell};
use std::default::Default;
use url::Url;
use js::jsapi::JSTracer;
use html5ever::tokenizer;
use html5ever::tree_builder;
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
+use hyper::header::ContentType;
+use hyper::mime::{Mime, TopLevel, SubLevel};
#[must_root]
#[jstraceable]
@@ -41,6 +52,110 @@ pub struct FragmentContext<'a> {
pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;
+/// The context required for asynchronously fetching a document and parsing it progressively.
+pub struct ParserContext {
+ /// The parser that initiated the request.
+ parser: RefCell<Option<Trusted<ServoHTMLParser>>>,
+ /// Is this document a synthesized document for a single image?
+ is_image_document: Cell<bool>,
+ /// The pipeline associated with this document.
+ id: PipelineId,
+ /// The subpage associated with this document.
+ subpage: Option<SubpageId>,
+ /// The target event loop for the response notifications.
+ script_chan: Box<ScriptChan+Send>,
+ /// The URL for this document.
+ url: Url,
+}
+
+impl ParserContext {
+ pub fn new(id: PipelineId, subpage: Option<SubpageId>, script_chan: Box<ScriptChan+Send>,
+ url: Url) -> ParserContext {
+ ParserContext {
+ parser: RefCell::new(None),
+ is_image_document: Cell::new(false),
+ id: id,
+ subpage: subpage,
+ script_chan: script_chan,
+ url: url,
+ }
+ }
+}
+
+impl AsyncResponseListener for ParserContext {
+ fn headers_available(&self, metadata: Metadata) {
+ let content_type = metadata.content_type.clone();
+
+ let parser = ScriptTask::page_fetch_complete(self.id.clone(), self.subpage.clone(),
+ metadata);
+ let parser = match parser {
+ Some(parser) => parser,
+ None => return,
+ }.root();
+
+ let parser = parser.r();
+ let win = parser.window().root();
+ *self.parser.borrow_mut() = Some(Trusted::new(win.r().get_cx(), parser,
+ self.script_chan.clone()));
+
+ match content_type {
+ Some(ContentType(Mime(TopLevel::Image, _, _))) => {
+ self.is_image_document.set(true);
+ let page = format!("<html><body><img src='{}' /></body></html>",
+ self.url.serialize());
+ parser.pending_input.borrow_mut().push(page);
+ parser.parse_sync();
+ }
+ Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
+ // FIXME: When servo/html5ever#109 is fixed remove <plaintext> usage and
+ // replace with fix from that issue.
+
+ // text/plain documents require setting the tokenizer into PLAINTEXT mode.
+ // This is done by using a <plaintext> element as the html5ever tokenizer
+ // provides no other way to change to that state.
+ // Spec for text/plain handling is:
+ // https://html.spec.whatwg.org/multipage/#read-text
+ let page = format!("<pre>\u{000A}<plaintext>");
+ parser.pending_input.borrow_mut().push(page);
+ parser.parse_sync();
+ },
+ _ => {}
+ }
+ }
+
+ fn data_available(&self, payload: Vec<u8>) {
+ if !self.is_image_document.get() {
+ // FIXME: use Vec<u8> (html5ever #34)
+ let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
+ let parser = match self.parser.borrow().as_ref() {
+ Some(parser) => parser.to_temporary(),
+ None => return,
+ }.root();
+ parser.r().parse_chunk(data);
+ }
+ }
+
+ fn response_complete(&self, status: Result<(), String>) {
+ let parser = match self.parser.borrow().as_ref() {
+ Some(parser) => parser.to_temporary(),
+ None => return,
+ }.root();
+ let doc = parser.r().document.root();
+ doc.r().finish_load(LoadType::PageSource(self.url.clone()));
+
+ if let Err(err) = status {
+ debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err);
+ // TODO(Savago): we should send a notification to callers #5463.
+ }
+
+ parser.r().last_chunk_received.set(true);
+ parser.r().parse_sync();
+ }
+}
+
+impl PreInvoke for ParserContext {
+}
+
// NB: JSTraceable is *not* auto-derived.
// You must edit the impl below if you add fields!
#[must_root]
@@ -48,20 +163,46 @@ pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;
pub struct ServoHTMLParser {
reflector_: Reflector,
tokenizer: DOMRefCell<Tokenizer>,
+ /// Input chunks received but not yet passed to the parser.
+ pending_input: DOMRefCell<Vec<String>>,
+ /// The document associated with this parser.
+ document: JS<Document>,
+ /// True if this parser should avoid passing any further data to the tokenizer.
+ suspended: Cell<bool>,
+ /// Whether to expect any further input from the associated network request.
+ last_chunk_received: Cell<bool>,
+ /// The pipeline associated with this parse, unavailable if this parse does not
+ /// correspond to a page load.
+ pipeline: Option<PipelineId>,
}
-impl Parser for ServoHTMLParser{
- fn parse_chunk(&self, input: String) {
- self.tokenizer().borrow_mut().feed(input);
+impl<'a> Parser for JSRef<'a, ServoHTMLParser> {
+ fn parse_chunk(self, input: String) {
+ self.document.root().r().set_current_parser(Some(self));
+ self.pending_input.borrow_mut().push(input);
+ self.parse_sync();
}
- fn finish(&self){
+
+ fn finish(self) {
+ assert!(!self.suspended.get());
+ assert!(self.pending_input.borrow().is_empty());
+
self.tokenizer().borrow_mut().end();
+ debug!("finished parsing");
+
+ let document = self.document.root();
+ document.r().set_current_parser(None);
+
+ if let Some(pipeline) = self.pipeline {
+ ScriptTask::parsing_complete(pipeline);
+ }
}
}
impl ServoHTMLParser {
#[allow(unrooted_must_root)]
- pub fn new(base_url: Option<Url>, document: JSRef<Document>) -> Temporary<ServoHTMLParser> {
+ pub fn new(base_url: Option<Url>, document: JSRef<Document>, pipeline: Option<PipelineId>)
+ -> Temporary<ServoHTMLParser> {
let window = document.window().root();
let sink = Sink {
base_url: base_url,
@@ -78,6 +219,11 @@ impl ServoHTMLParser {
let parser = ServoHTMLParser {
reflector_: Reflector::new(),
tokenizer: DOMRefCell::new(tok),
+ pending_input: DOMRefCell::new(vec!()),
+ document: JS::from_rooted(document),
+ suspended: Cell::new(false),
+ last_chunk_received: Cell::new(false),
+ pipeline: pipeline,
};
reflect_dom_object(box parser, GlobalRef::Window(window.r()),
@@ -111,6 +257,11 @@ impl ServoHTMLParser {
let parser = ServoHTMLParser {
reflector_: Reflector::new(),
tokenizer: DOMRefCell::new(tok),
+ pending_input: DOMRefCell::new(vec!()),
+ document: JS::from_rooted(document),
+ suspended: Cell::new(false),
+ last_chunk_received: Cell::new(true),
+ pipeline: None,
};
reflect_dom_object(box parser, GlobalRef::Window(window.r()),
@@ -129,6 +280,73 @@ impl Reflectable for ServoHTMLParser {
}
}
+trait PrivateServoHTMLParserHelpers {
+ /// Synchronously run the tokenizer parse loop until explicitly suspended or
+ /// the tokenizer runs out of input.
+ fn parse_sync(self);
+ /// Retrieve the window object associated with this parser.
+ fn window(self) -> Temporary<Window>;
+}
+
+impl<'a> PrivateServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> {
+ fn parse_sync(self) {
+ let mut first = true;
+
+ // This parser will continue to parse while there is either pending input or
+ // the parser remains unsuspended.
+ loop {
+ if self.suspended.get() {
+ return;
+ }
+
+ if self.pending_input.borrow().is_empty() && !first {
+ break;
+ }
+
+ let mut pending_input = self.pending_input.borrow_mut();
+ if !pending_input.is_empty() {
+ let chunk = pending_input.remove(0);
+ self.tokenizer.borrow_mut().feed(chunk);
+ } else {
+ self.tokenizer.borrow_mut().run();
+ }
+
+ first = false;
+ }
+
+ if self.last_chunk_received.get() {
+ self.finish();
+ }
+ }
+
+ fn window(self) -> Temporary<Window> {
+ let doc = self.document.root();
+ window_from_node(doc.r())
+ }
+}
+
+pub trait ServoHTMLParserHelpers {
+ /// Cause the parser to interrupt next time the tokenizer reaches a quiescent state.
+ /// No further parsing will occur after that point until the `resume` method is called.
+ /// Panics if the parser is already suspended.
+ fn suspend(self);
+ /// Immediately resume a suspended parser. Panics if the parser is not suspended.
+ fn resume(self);
+}
+
+impl<'a> ServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> {
+ fn suspend(self) {
+ assert!(!self.suspended.get());
+ self.suspended.set(true);
+ }
+
+ fn resume(self) {
+ assert!(self.suspended.get());
+ self.suspended.set(false);
+ self.parse_sync();
+ }
+}
+
struct Tracer {
trc: *mut JSTracer,
}
@@ -152,11 +370,6 @@ impl JSTraceable for ServoHTMLParser {
let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>;
unsafe {
- // Assertion: If the parser is mutably borrowed, we're in the
- // parsing code paths.
- debug_assert!(task_state::get().contains(task_state::IN_HTML_PARSER)
- || !self.tokenizer.is_mutably_borrowed());
-
let tokenizer = self.tokenizer.borrow_for_gc_trace();
let tree_builder = tokenizer.sink();
tree_builder.trace_handles(tracer);