From 8082df7d0da97f1951ae125956b962b92c98e69f Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Wed, 11 Mar 2015 10:44:59 -0400 Subject: Make external script sources load asynchronously, yet still block further parsing. Hook up document loading to async networking events. --- components/script/dom/servohtmlparser.rs | 237 +++++++++++++++++++++++++++++-- 1 file changed, 225 insertions(+), 12 deletions(-) (limited to 'components/script/dom/servohtmlparser.rs') diff --git a/components/script/dom/servohtmlparser.rs b/components/script/dom/servohtmlparser.rs index 199c06dca35..c4779a5666d 100644 --- a/components/script/dom/servohtmlparser.rs +++ b/components/script/dom/servohtmlparser.rs @@ -5,24 +5,35 @@ //! The bulk of the HTML parser integration is in `script::parse::html`. //! This module is mostly about its interaction with DOM memory management. +use document_loader::LoadType; use dom::bindings::cell::DOMRefCell; use dom::bindings::codegen::Bindings::ServoHTMLParserBinding; use dom::bindings::global::GlobalRef; use dom::bindings::trace::JSTraceable; use dom::bindings::js::{JS, JSRef, Rootable, Temporary}; +use dom::bindings::refcounted::Trusted; use dom::bindings::utils::{Reflectable, Reflector, reflect_dom_object}; use dom::document::{Document, DocumentHelpers}; -use dom::node::Node; +use dom::node::{window_from_node, Node}; +use dom::window::Window; +use network_listener::PreInvoke; use parse::Parser; +use script_task::{ScriptTask, ScriptChan}; -use util::task_state; +use msg::constellation_msg::{PipelineId, SubpageId}; +use net_traits::{Metadata, AsyncResponseListener}; +use encoding::all::UTF_8; +use encoding::types::{Encoding, DecoderTrap}; +use std::cell::{Cell, RefCell}; use std::default::Default; use url::Url; use js::jsapi::JSTracer; use html5ever::tokenizer; use html5ever::tree_builder; use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts}; +use hyper::header::ContentType; +use hyper::mime::{Mime, TopLevel, SubLevel}; #[must_root] #[jstraceable] @@ -41,6 +52,110 @@ pub struct FragmentContext<'a> { pub type Tokenizer = tokenizer::Tokenizer, Sink>>; +/// The context required for asynchronously fetching a document and parsing it progressively. +pub struct ParserContext { + /// The parser that initiated the request. + parser: RefCell>>, + /// Is this document a synthesized document for a single image? + is_image_document: Cell, + /// The pipeline associated with this document. + id: PipelineId, + /// The subpage associated with this document. + subpage: Option, + /// The target event loop for the response notifications. + script_chan: Box, + /// The URL for this document. + url: Url, +} + +impl ParserContext { + pub fn new(id: PipelineId, subpage: Option, script_chan: Box, + url: Url) -> ParserContext { + ParserContext { + parser: RefCell::new(None), + is_image_document: Cell::new(false), + id: id, + subpage: subpage, + script_chan: script_chan, + url: url, + } + } +} + +impl AsyncResponseListener for ParserContext { + fn headers_available(&self, metadata: Metadata) { + let content_type = metadata.content_type.clone(); + + let parser = ScriptTask::page_fetch_complete(self.id.clone(), self.subpage.clone(), + metadata); + let parser = match parser { + Some(parser) => parser, + None => return, + }.root(); + + let parser = parser.r(); + let win = parser.window().root(); + *self.parser.borrow_mut() = Some(Trusted::new(win.r().get_cx(), parser, + self.script_chan.clone())); + + match content_type { + Some(ContentType(Mime(TopLevel::Image, _, _))) => { + self.is_image_document.set(true); + let page = format!("", + self.url.serialize()); + parser.pending_input.borrow_mut().push(page); + parser.parse_sync(); + } + Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => { + // FIXME: When servo/html5ever#109 is fixed remove usage and + // replace with fix from that issue. + + // text/plain documents require setting the tokenizer into PLAINTEXT mode. + // This is done by using a <plaintext> element as the html5ever tokenizer + // provides no other way to change to that state. + // Spec for text/plain handling is: + // https://html.spec.whatwg.org/multipage/#read-text + let page = format!("<pre>\u{000A}<plaintext>"); + parser.pending_input.borrow_mut().push(page); + parser.parse_sync(); + }, + _ => {} + } + } + + fn data_available(&self, payload: Vec<u8>) { + if !self.is_image_document.get() { + // FIXME: use Vec<u8> (html5ever #34) + let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap(); + let parser = match self.parser.borrow().as_ref() { + Some(parser) => parser.to_temporary(), + None => return, + }.root(); + parser.r().parse_chunk(data); + } + } + + fn response_complete(&self, status: Result<(), String>) { + let parser = match self.parser.borrow().as_ref() { + Some(parser) => parser.to_temporary(), + None => return, + }.root(); + let doc = parser.r().document.root(); + doc.r().finish_load(LoadType::PageSource(self.url.clone())); + + if let Err(err) = status { + debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err); + // TODO(Savago): we should send a notification to callers #5463. + } + + parser.r().last_chunk_received.set(true); + parser.r().parse_sync(); + } +} + +impl PreInvoke for ParserContext { +} + // NB: JSTraceable is *not* auto-derived. // You must edit the impl below if you add fields! #[must_root] @@ -48,20 +163,46 @@ pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>; pub struct ServoHTMLParser { reflector_: Reflector, tokenizer: DOMRefCell<Tokenizer>, + /// Input chunks received but not yet passed to the parser. + pending_input: DOMRefCell<Vec<String>>, + /// The document associated with this parser. + document: JS<Document>, + /// True if this parser should avoid passing any further data to the tokenizer. + suspended: Cell<bool>, + /// Whether to expect any further input from the associated network request. + last_chunk_received: Cell<bool>, + /// The pipeline associated with this parse, unavailable if this parse does not + /// correspond to a page load. + pipeline: Option<PipelineId>, } -impl Parser for ServoHTMLParser{ - fn parse_chunk(&self, input: String) { - self.tokenizer().borrow_mut().feed(input); +impl<'a> Parser for JSRef<'a, ServoHTMLParser> { + fn parse_chunk(self, input: String) { + self.document.root().r().set_current_parser(Some(self)); + self.pending_input.borrow_mut().push(input); + self.parse_sync(); } - fn finish(&self){ + + fn finish(self) { + assert!(!self.suspended.get()); + assert!(self.pending_input.borrow().is_empty()); + self.tokenizer().borrow_mut().end(); + debug!("finished parsing"); + + let document = self.document.root(); + document.r().set_current_parser(None); + + if let Some(pipeline) = self.pipeline { + ScriptTask::parsing_complete(pipeline); + } } } impl ServoHTMLParser { #[allow(unrooted_must_root)] - pub fn new(base_url: Option<Url>, document: JSRef<Document>) -> Temporary<ServoHTMLParser> { + pub fn new(base_url: Option<Url>, document: JSRef<Document>, pipeline: Option<PipelineId>) + -> Temporary<ServoHTMLParser> { let window = document.window().root(); let sink = Sink { base_url: base_url, @@ -78,6 +219,11 @@ impl ServoHTMLParser { let parser = ServoHTMLParser { reflector_: Reflector::new(), tokenizer: DOMRefCell::new(tok), + pending_input: DOMRefCell::new(vec!()), + document: JS::from_rooted(document), + suspended: Cell::new(false), + last_chunk_received: Cell::new(false), + pipeline: pipeline, }; reflect_dom_object(box parser, GlobalRef::Window(window.r()), @@ -111,6 +257,11 @@ impl ServoHTMLParser { let parser = ServoHTMLParser { reflector_: Reflector::new(), tokenizer: DOMRefCell::new(tok), + pending_input: DOMRefCell::new(vec!()), + document: JS::from_rooted(document), + suspended: Cell::new(false), + last_chunk_received: Cell::new(true), + pipeline: None, }; reflect_dom_object(box parser, GlobalRef::Window(window.r()), @@ -129,6 +280,73 @@ impl Reflectable for ServoHTMLParser { } } +trait PrivateServoHTMLParserHelpers { + /// Synchronously run the tokenizer parse loop until explicitly suspended or + /// the tokenizer runs out of input. + fn parse_sync(self); + /// Retrieve the window object associated with this parser. + fn window(self) -> Temporary<Window>; +} + +impl<'a> PrivateServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> { + fn parse_sync(self) { + let mut first = true; + + // This parser will continue to parse while there is either pending input or + // the parser remains unsuspended. + loop { + if self.suspended.get() { + return; + } + + if self.pending_input.borrow().is_empty() && !first { + break; + } + + let mut pending_input = self.pending_input.borrow_mut(); + if !pending_input.is_empty() { + let chunk = pending_input.remove(0); + self.tokenizer.borrow_mut().feed(chunk); + } else { + self.tokenizer.borrow_mut().run(); + } + + first = false; + } + + if self.last_chunk_received.get() { + self.finish(); + } + } + + fn window(self) -> Temporary<Window> { + let doc = self.document.root(); + window_from_node(doc.r()) + } +} + +pub trait ServoHTMLParserHelpers { + /// Cause the parser to interrupt next time the tokenizer reaches a quiescent state. + /// No further parsing will occur after that point until the `resume` method is called. + /// Panics if the parser is already suspended. + fn suspend(self); + /// Immediately resume a suspended parser. Panics if the parser is not suspended. + fn resume(self); +} + +impl<'a> ServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> { + fn suspend(self) { + assert!(!self.suspended.get()); + self.suspended.set(true); + } + + fn resume(self) { + assert!(self.suspended.get()); + self.suspended.set(false); + self.parse_sync(); + } +} + struct Tracer { trc: *mut JSTracer, } @@ -152,11 +370,6 @@ impl JSTraceable for ServoHTMLParser { let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>; unsafe { - // Assertion: If the parser is mutably borrowed, we're in the - // parsing code paths. - debug_assert!(task_state::get().contains(task_state::IN_HTML_PARSER) - || !self.tokenizer.is_mutably_borrowed()); - let tokenizer = self.tokenizer.borrow_for_gc_trace(); let tree_builder = tokenizer.sink(); tree_builder.trace_handles(tracer); -- cgit v1.2.3