diff options
author | bors-servo <metajack+bors@gmail.com> | 2015-05-21 12:37:06 -0500 |
---|---|---|
committer | bors-servo <metajack+bors@gmail.com> | 2015-05-21 12:37:06 -0500 |
commit | dd319c1a998bbd3eeb84fdc4ca8a41ee7877ca37 (patch) | |
tree | 086281d82bf24cdf113cf78f329a7fb7a1bc329a /components/script/dom | |
parent | a0fccea670124d5ccfef1c13fe1b5d2e58891236 (diff) | |
parent | 512927eb343eb477cea45ebcf6d6820a2fe70a55 (diff) | |
download | servo-dd319c1a998bbd3eeb84fdc4ca8a41ee7877ca37.tar.gz servo-dd319c1a998bbd3eeb84fdc4ca8a41ee7877ca37.zip |
Auto merge of #5727 - jdm:parserinterrupt2, r=mbrubeck
...r parsing. Hook up document loading to async networking events.
Relies on https://github.com/servo/html5ever/pull/107, so we'll likely need to backport it rather than wait for the next rustc upgrade.
<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/5727)
<!-- Reviewable:end -->
Diffstat (limited to 'components/script/dom')
-rw-r--r-- | components/script/dom/document.rs | 25 | ||||
-rw-r--r-- | components/script/dom/domparser.rs | 4 | ||||
-rw-r--r-- | components/script/dom/htmlscriptelement.rs | 107 | ||||
-rw-r--r-- | components/script/dom/servohtmlparser.rs | 237 |
4 files changed, 336 insertions, 37 deletions
diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs index 9fabde71dd9..6001cbc24ad 100644 --- a/components/script/dom/document.rs +++ b/components/script/dom/document.rs @@ -61,6 +61,7 @@ use dom::nodelist::NodeList; use dom::text::Text; use dom::processinginstruction::ProcessingInstruction; use dom::range::Range; +use dom::servohtmlparser::ServoHTMLParser; use dom::treewalker::TreeWalker; use dom::uievent::UIEvent; use dom::window::{Window, WindowHelpers, ReflowReason}; @@ -73,7 +74,7 @@ use msg::constellation_msg::{ConstellationChan, FocusType, Key, KeyState, KeyMod use msg::constellation_msg::{SUPER, ALT, SHIFT, CONTROL}; use net_traits::CookieSource::NonHTTP; use net_traits::ControlMsg::{SetCookiesForUrl, GetCookiesForUrl}; -use net_traits::{Metadata, LoadResponse, PendingAsyncLoad}; +use net_traits::{Metadata, PendingAsyncLoad, AsyncResponseTarget}; use script_task::Runnable; use script_traits::{MouseButton, UntrustedNodeAddress}; use util::opts; @@ -96,7 +97,7 @@ use std::ascii::AsciiExt; use std::cell::{Cell, Ref, RefMut, RefCell}; use std::default::Default; use std::ptr; -use std::sync::mpsc::{Receiver, channel}; +use std::sync::mpsc::channel; use time; #[derive(PartialEq)] @@ -145,6 +146,8 @@ pub struct Document { animation_frame_list: RefCell<HashMap<i32, Box<Fn(f64)>>>, /// Tracks all outstanding loads related to this document. loader: DOMRefCell<DocumentLoader>, + /// The current active HTML parser, to allow resuming after interruptions. + current_parser: MutNullableHeap<JS<ServoHTMLParser>>, } impl DocumentDerived for EventTarget { @@ -263,9 +266,11 @@ pub trait DocumentHelpers<'a> { /// http://w3c.github.io/animation-timing/#dfn-invoke-callbacks-algorithm fn invoke_animation_callbacks(self); fn prepare_async_load(self, load: LoadType) -> PendingAsyncLoad; - fn load_async(self, load: LoadType) -> Receiver<LoadResponse>; + fn load_async(self, load: LoadType, listener: Box<AsyncResponseTarget + Send>); fn load_sync(self, load: LoadType) -> Result<(Metadata, Vec<u8>), String>; fn finish_load(self, load: LoadType); + fn set_current_parser(self, script: Option<JSRef<ServoHTMLParser>>); + fn get_current_parser(self) -> Option<Temporary<ServoHTMLParser>>; } impl<'a> DocumentHelpers<'a> for JSRef<'a, Document> { @@ -892,9 +897,9 @@ impl<'a> DocumentHelpers<'a> for JSRef<'a, Document> { loader.prepare_async_load(load) } - fn load_async(self, load: LoadType) -> Receiver<LoadResponse> { + fn load_async(self, load: LoadType, listener: Box<AsyncResponseTarget + Send>) { let mut loader = self.loader.borrow_mut(); - loader.load_async(load) + loader.load_async(load, listener) } fn load_sync(self, load: LoadType) -> Result<(Metadata, Vec<u8>), String> { @@ -906,6 +911,14 @@ impl<'a> DocumentHelpers<'a> for JSRef<'a, Document> { let mut loader = self.loader.borrow_mut(); loader.finish_load(load); } + + fn set_current_parser(self, script: Option<JSRef<ServoHTMLParser>>) { + self.current_parser.set(script.map(JS::from_rooted)); + } + + fn get_current_parser(self) -> Option<Temporary<ServoHTMLParser>> { + self.current_parser.get().map(Temporary::from_rooted) + } } pub enum MouseEventType { @@ -914,6 +927,7 @@ pub enum MouseEventType { MouseUp, } + #[derive(PartialEq)] pub enum DocumentSource { FromParser, @@ -987,6 +1001,7 @@ impl Document { animation_frame_ident: Cell::new(0), animation_frame_list: RefCell::new(HashMap::new()), loader: DOMRefCell::new(doc_loader), + current_parser: Default::default(), } } diff --git a/components/script/dom/domparser.rs b/components/script/dom/domparser.rs index 5685c3f324f..40c9d8f436c 100644 --- a/components/script/dom/domparser.rs +++ b/components/script/dom/domparser.rs @@ -15,7 +15,7 @@ use dom::bindings::utils::{Reflector, reflect_dom_object}; use dom::document::{Document, DocumentHelpers, IsHTMLDocument}; use dom::document::DocumentSource; use dom::window::{Window, WindowHelpers}; -use parse::html::{HTMLInput, parse_html}; +use parse::html::{ParseContext, parse_html}; use util::str::DOMString; use std::borrow::ToOwned; @@ -64,7 +64,7 @@ impl<'a> DOMParserMethods for JSRef<'a, DOMParser> { None, DocumentSource::FromParser, loader).root(); - parse_html(document.r(), HTMLInput::InputString(s), &url, None); + parse_html(document.r(), s, &url, ParseContext::Owner(None)); document.r().set_ready_state(DocumentReadyState::Complete); Ok(Temporary::from_rooted(document.r())) } diff --git a/components/script/dom/htmlscriptelement.rs b/components/script/dom/htmlscriptelement.rs index 2f7a52ec272..2011a31bf66 100644 --- a/components/script/dom/htmlscriptelement.rs +++ b/components/script/dom/htmlscriptelement.rs @@ -28,17 +28,21 @@ use dom::event::{Event, EventBubbles, EventCancelable, EventHelpers}; use dom::element::ElementTypeId; use dom::htmlelement::{HTMLElement, HTMLElementTypeId}; use dom::node::{Node, NodeHelpers, NodeTypeId, document_from_node, window_from_node, CloneChildrenFlag}; +use dom::servohtmlparser::ServoHTMLParserHelpers; use dom::virtualmethods::VirtualMethods; use dom::window::{WindowHelpers, ScriptHelpers}; -use script_task::{ScriptMsg, Runnable}; +use network_listener::{NetworkListener, PreInvoke}; +use script_task::{ScriptChan, ScriptMsg, Runnable}; use encoding::all::UTF_8; use encoding::label::encoding_from_whatwg_label; use encoding::types::{Encoding, EncodingRef, DecoderTrap}; -use net_traits::Metadata; +use net_traits::{Metadata, AsyncResponseListener}; use util::str::{DOMString, HTML_SPACE_CHARACTERS, StaticStringVec}; -use std::borrow::ToOwned; -use std::cell::Cell; +use html5ever::tree_builder::NextParserState; +use std::cell::{RefCell, Cell}; +use std::mem; +use std::sync::{Arc, Mutex}; use string_cache::Atom; use url::{Url, UrlParser}; @@ -99,7 +103,7 @@ impl HTMLScriptElement { pub trait HTMLScriptElementHelpers { /// Prepare a script (<https://www.whatwg.org/html/#prepare-a-script>) - fn prepare(self); + fn prepare(self) -> NextParserState; /// [Execute a script block] /// (https://html.spec.whatwg.org/multipage/#execute-the-script-block) @@ -153,12 +157,57 @@ pub enum ScriptOrigin { External(Result<(Metadata, Vec<u8>), String>), } +/// The context required for asynchronously loading an external script source. +struct ScriptContext { + /// The element that initiated the request. + elem: Trusted<HTMLScriptElement>, + /// The response body received to date. + data: RefCell<Vec<u8>>, + /// The response metadata received to date. + metadata: RefCell<Option<Metadata>>, + /// Whether the owning document's parser should resume once the response completes. + resume_on_completion: bool, + /// The initial URL requested. + url: Url, +} + +impl AsyncResponseListener for ScriptContext { + fn headers_available(&self, metadata: Metadata) { + *self.metadata.borrow_mut() = Some(metadata); + } + + fn data_available(&self, payload: Vec<u8>) { + let mut payload = payload; + self.data.borrow_mut().append(&mut payload); + } + + fn response_complete(&self, status: Result<(), String>) { + let load = status.map(|_| { + let data = mem::replace(&mut *self.data.borrow_mut(), vec!()); + let metadata = self.metadata.borrow_mut().take().unwrap(); + (metadata, data) + }); + let elem = self.elem.to_temporary().root(); + + elem.r().execute(ScriptOrigin::External(load)); + + let document = document_from_node(elem.r()).root(); + document.r().finish_load(LoadType::Script(self.url.clone())); + + if self.resume_on_completion { + document.r().get_current_parser().unwrap().root().r().resume(); + } + } +} + +impl PreInvoke for ScriptContext {} + impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { - fn prepare(self) { + fn prepare(self) -> NextParserState { // https://html.spec.whatwg.org/multipage/#prepare-a-script // Step 1. if self.already_started.get() { - return; + return NextParserState::Continue; } // Step 2. let was_parser_inserted = self.parser_inserted.get(); @@ -172,16 +221,16 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { // Step 4. let text = self.Text(); if text.len() == 0 && !element.has_attribute(&atom!("src")) { - return; + return NextParserState::Continue; } // Step 5. let node: JSRef<Node> = NodeCast::from_ref(self); if !node.is_in_doc() { - return; + return NextParserState::Continue; } // Step 6, 7. if !self.is_javascript() { - return; + return NextParserState::Continue; } // Step 8. if was_parser_inserted { @@ -195,12 +244,12 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { let document_from_node_ref = document_from_node(self).root(); let document_from_node_ref = document_from_node_ref.r(); if self.parser_inserted.get() && self.parser_document.root().r() != document_from_node_ref { - return; + return NextParserState::Continue; } // Step 11. if !document_from_node_ref.is_scripting_enabled() { - return; + return NextParserState::Continue; } // Step 12. @@ -212,13 +261,13 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { .to_ascii_lowercase(); let for_value = for_value.trim_matches(HTML_SPACE_CHARACTERS); if for_value != "window" { - return; + return NextParserState::Continue; } let event_value = event_attribute.Value().to_ascii_lowercase(); let event_value = event_value.trim_matches(HTML_SPACE_CHARACTERS); if event_value != "onload" && event_value != "onload()" { - return; + return NextParserState::Continue; } }, (_, _) => (), @@ -245,7 +294,7 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { // Step 14.2 if src.is_empty() { self.queue_error_event(); - return; + return NextParserState::Continue; } // Step 14.3 @@ -254,7 +303,7 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { // Step 14.4 error!("error parsing URL for script {}", src); self.queue_error_event(); - return; + return NextParserState::Continue; } Ok(url) => { // Step 14.5 @@ -263,8 +312,29 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { // the origin of the script element's node document, and the default origin // behaviour set to taint. let doc = document_from_node(self).root(); - let contents = doc.r().load_sync(LoadType::Script(url)); - ScriptOrigin::External(contents) + + let script_chan = window.script_chan(); + let elem = Trusted::new(window.get_cx(), self, script_chan.clone()); + + let context = Arc::new(Mutex::new(ScriptContext { + elem: elem, + data: RefCell::new(vec!()), + metadata: RefCell::new(None), + resume_on_completion: self.parser_inserted.get(), + url: url.clone(), + })); + + let listener = box NetworkListener { + context: context, + script_chan: script_chan, + }; + + doc.r().load_async(LoadType::Script(url), listener); + + if self.parser_inserted.get() { + doc.r().get_current_parser().unwrap().root().r().suspend(); + } + return NextParserState::Suspend; } } }, @@ -275,6 +345,7 @@ impl<'a> HTMLScriptElementHelpers for JSRef<'a, HTMLScriptElement> { // TODO: Add support for the `defer` and `async` attributes. (For now, we fetch all // scripts synchronously and execute them immediately.) self.execute(load); + NextParserState::Continue } fn execute(self, load: ScriptOrigin) { diff --git a/components/script/dom/servohtmlparser.rs b/components/script/dom/servohtmlparser.rs index 199c06dca35..c4779a5666d 100644 --- a/components/script/dom/servohtmlparser.rs +++ b/components/script/dom/servohtmlparser.rs @@ -5,24 +5,35 @@ //! The bulk of the HTML parser integration is in `script::parse::html`. //! This module is mostly about its interaction with DOM memory management. +use document_loader::LoadType; use dom::bindings::cell::DOMRefCell; use dom::bindings::codegen::Bindings::ServoHTMLParserBinding; use dom::bindings::global::GlobalRef; use dom::bindings::trace::JSTraceable; use dom::bindings::js::{JS, JSRef, Rootable, Temporary}; +use dom::bindings::refcounted::Trusted; use dom::bindings::utils::{Reflectable, Reflector, reflect_dom_object}; use dom::document::{Document, DocumentHelpers}; -use dom::node::Node; +use dom::node::{window_from_node, Node}; +use dom::window::Window; +use network_listener::PreInvoke; use parse::Parser; +use script_task::{ScriptTask, ScriptChan}; -use util::task_state; +use msg::constellation_msg::{PipelineId, SubpageId}; +use net_traits::{Metadata, AsyncResponseListener}; +use encoding::all::UTF_8; +use encoding::types::{Encoding, DecoderTrap}; +use std::cell::{Cell, RefCell}; use std::default::Default; use url::Url; use js::jsapi::JSTracer; use html5ever::tokenizer; use html5ever::tree_builder; use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts}; +use hyper::header::ContentType; +use hyper::mime::{Mime, TopLevel, SubLevel}; #[must_root] #[jstraceable] @@ -41,6 +52,110 @@ pub struct FragmentContext<'a> { pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>; +/// The context required for asynchronously fetching a document and parsing it progressively. +pub struct ParserContext { + /// The parser that initiated the request. + parser: RefCell<Option<Trusted<ServoHTMLParser>>>, + /// Is this document a synthesized document for a single image? + is_image_document: Cell<bool>, + /// The pipeline associated with this document. + id: PipelineId, + /// The subpage associated with this document. + subpage: Option<SubpageId>, + /// The target event loop for the response notifications. + script_chan: Box<ScriptChan+Send>, + /// The URL for this document. + url: Url, +} + +impl ParserContext { + pub fn new(id: PipelineId, subpage: Option<SubpageId>, script_chan: Box<ScriptChan+Send>, + url: Url) -> ParserContext { + ParserContext { + parser: RefCell::new(None), + is_image_document: Cell::new(false), + id: id, + subpage: subpage, + script_chan: script_chan, + url: url, + } + } +} + +impl AsyncResponseListener for ParserContext { + fn headers_available(&self, metadata: Metadata) { + let content_type = metadata.content_type.clone(); + + let parser = ScriptTask::page_fetch_complete(self.id.clone(), self.subpage.clone(), + metadata); + let parser = match parser { + Some(parser) => parser, + None => return, + }.root(); + + let parser = parser.r(); + let win = parser.window().root(); + *self.parser.borrow_mut() = Some(Trusted::new(win.r().get_cx(), parser, + self.script_chan.clone())); + + match content_type { + Some(ContentType(Mime(TopLevel::Image, _, _))) => { + self.is_image_document.set(true); + let page = format!("<html><body><img src='{}' /></body></html>", + self.url.serialize()); + parser.pending_input.borrow_mut().push(page); + parser.parse_sync(); + } + Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => { + // FIXME: When servo/html5ever#109 is fixed remove <plaintext> usage and + // replace with fix from that issue. + + // text/plain documents require setting the tokenizer into PLAINTEXT mode. + // This is done by using a <plaintext> element as the html5ever tokenizer + // provides no other way to change to that state. + // Spec for text/plain handling is: + // https://html.spec.whatwg.org/multipage/#read-text + let page = format!("<pre>\u{000A}<plaintext>"); + parser.pending_input.borrow_mut().push(page); + parser.parse_sync(); + }, + _ => {} + } + } + + fn data_available(&self, payload: Vec<u8>) { + if !self.is_image_document.get() { + // FIXME: use Vec<u8> (html5ever #34) + let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap(); + let parser = match self.parser.borrow().as_ref() { + Some(parser) => parser.to_temporary(), + None => return, + }.root(); + parser.r().parse_chunk(data); + } + } + + fn response_complete(&self, status: Result<(), String>) { + let parser = match self.parser.borrow().as_ref() { + Some(parser) => parser.to_temporary(), + None => return, + }.root(); + let doc = parser.r().document.root(); + doc.r().finish_load(LoadType::PageSource(self.url.clone())); + + if let Err(err) = status { + debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err); + // TODO(Savago): we should send a notification to callers #5463. + } + + parser.r().last_chunk_received.set(true); + parser.r().parse_sync(); + } +} + +impl PreInvoke for ParserContext { +} + // NB: JSTraceable is *not* auto-derived. // You must edit the impl below if you add fields! #[must_root] @@ -48,20 +163,46 @@ pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>; pub struct ServoHTMLParser { reflector_: Reflector, tokenizer: DOMRefCell<Tokenizer>, + /// Input chunks received but not yet passed to the parser. + pending_input: DOMRefCell<Vec<String>>, + /// The document associated with this parser. + document: JS<Document>, + /// True if this parser should avoid passing any further data to the tokenizer. + suspended: Cell<bool>, + /// Whether to expect any further input from the associated network request. + last_chunk_received: Cell<bool>, + /// The pipeline associated with this parse, unavailable if this parse does not + /// correspond to a page load. + pipeline: Option<PipelineId>, } -impl Parser for ServoHTMLParser{ - fn parse_chunk(&self, input: String) { - self.tokenizer().borrow_mut().feed(input); +impl<'a> Parser for JSRef<'a, ServoHTMLParser> { + fn parse_chunk(self, input: String) { + self.document.root().r().set_current_parser(Some(self)); + self.pending_input.borrow_mut().push(input); + self.parse_sync(); } - fn finish(&self){ + + fn finish(self) { + assert!(!self.suspended.get()); + assert!(self.pending_input.borrow().is_empty()); + self.tokenizer().borrow_mut().end(); + debug!("finished parsing"); + + let document = self.document.root(); + document.r().set_current_parser(None); + + if let Some(pipeline) = self.pipeline { + ScriptTask::parsing_complete(pipeline); + } } } impl ServoHTMLParser { #[allow(unrooted_must_root)] - pub fn new(base_url: Option<Url>, document: JSRef<Document>) -> Temporary<ServoHTMLParser> { + pub fn new(base_url: Option<Url>, document: JSRef<Document>, pipeline: Option<PipelineId>) + -> Temporary<ServoHTMLParser> { let window = document.window().root(); let sink = Sink { base_url: base_url, @@ -78,6 +219,11 @@ impl ServoHTMLParser { let parser = ServoHTMLParser { reflector_: Reflector::new(), tokenizer: DOMRefCell::new(tok), + pending_input: DOMRefCell::new(vec!()), + document: JS::from_rooted(document), + suspended: Cell::new(false), + last_chunk_received: Cell::new(false), + pipeline: pipeline, }; reflect_dom_object(box parser, GlobalRef::Window(window.r()), @@ -111,6 +257,11 @@ impl ServoHTMLParser { let parser = ServoHTMLParser { reflector_: Reflector::new(), tokenizer: DOMRefCell::new(tok), + pending_input: DOMRefCell::new(vec!()), + document: JS::from_rooted(document), + suspended: Cell::new(false), + last_chunk_received: Cell::new(true), + pipeline: None, }; reflect_dom_object(box parser, GlobalRef::Window(window.r()), @@ -129,6 +280,73 @@ impl Reflectable for ServoHTMLParser { } } +trait PrivateServoHTMLParserHelpers { + /// Synchronously run the tokenizer parse loop until explicitly suspended or + /// the tokenizer runs out of input. + fn parse_sync(self); + /// Retrieve the window object associated with this parser. + fn window(self) -> Temporary<Window>; +} + +impl<'a> PrivateServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> { + fn parse_sync(self) { + let mut first = true; + + // This parser will continue to parse while there is either pending input or + // the parser remains unsuspended. + loop { + if self.suspended.get() { + return; + } + + if self.pending_input.borrow().is_empty() && !first { + break; + } + + let mut pending_input = self.pending_input.borrow_mut(); + if !pending_input.is_empty() { + let chunk = pending_input.remove(0); + self.tokenizer.borrow_mut().feed(chunk); + } else { + self.tokenizer.borrow_mut().run(); + } + + first = false; + } + + if self.last_chunk_received.get() { + self.finish(); + } + } + + fn window(self) -> Temporary<Window> { + let doc = self.document.root(); + window_from_node(doc.r()) + } +} + +pub trait ServoHTMLParserHelpers { + /// Cause the parser to interrupt next time the tokenizer reaches a quiescent state. + /// No further parsing will occur after that point until the `resume` method is called. + /// Panics if the parser is already suspended. + fn suspend(self); + /// Immediately resume a suspended parser. Panics if the parser is not suspended. + fn resume(self); +} + +impl<'a> ServoHTMLParserHelpers for JSRef<'a, ServoHTMLParser> { + fn suspend(self) { + assert!(!self.suspended.get()); + self.suspended.set(true); + } + + fn resume(self) { + assert!(self.suspended.get()); + self.suspended.set(false); + self.parse_sync(); + } +} + struct Tracer { trc: *mut JSTracer, } @@ -152,11 +370,6 @@ impl JSTraceable for ServoHTMLParser { let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>; unsafe { - // Assertion: If the parser is mutably borrowed, we're in the - // parsing code paths. - debug_assert!(task_state::get().contains(task_state::IN_HTML_PARSER) - || !self.tokenizer.is_mutably_borrowed()); - let tokenizer = self.tokenizer.borrow_for_gc_trace(); let tree_builder = tokenizer.sink(); tree_builder.trace_handles(tracer); |