diff options
author | Josh Matthews <josh@joshmatthews.net> | 2015-12-02 02:45:52 -0500 |
---|---|---|
committer | Josh Matthews <josh@joshmatthews.net> | 2015-12-04 16:32:54 -0500 |
commit | 9d3b915cace62d1e6aaa22572a992316894edf76 (patch) | |
tree | 131e1df17cd0ebbd2306a93c9caf49fd45920441 | |
parent | a840a23990fbca4ce9572e729d9f390f3d991390 (diff) | |
download | servo-9d3b915cace62d1e6aaa22572a992316894edf76.tar.gz servo-9d3b915cace62d1e6aaa22572a992316894edf76.zip |
Introduce abstraction over HTML and XML parsers for parser network listener.
-rw-r--r-- | components/script/dom/document.rs | 14 | ||||
-rw-r--r-- | components/script/dom/htmlscriptelement.rs | 2 | ||||
-rw-r--r-- | components/script/dom/servohtmlparser.rs | 216 | ||||
-rw-r--r-- | components/script/dom/servoxmlparser.rs | 119 | ||||
-rw-r--r-- | components/script/script_task.rs | 10 |
5 files changed, 328 insertions, 33 deletions
diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs index 4c1a7f12791..35a668ca196 100644 --- a/components/script/dom/document.rs +++ b/components/script/dom/document.rs @@ -65,7 +65,7 @@ use dom::nodeiterator::NodeIterator; use dom::nodelist::NodeList; use dom::processinginstruction::ProcessingInstruction; use dom::range::Range; -use dom::servohtmlparser::ServoHTMLParser; +use dom::servohtmlparser::{ParserRoot, ParserRef, MutNullableParserField}; use dom::text::Text; use dom::touch::Touch; use dom::touchevent::TouchEvent; @@ -184,7 +184,7 @@ pub struct Document { /// Tracks all outstanding loads related to this document. loader: DOMRefCell<DocumentLoader>, /// The current active HTML parser, to allow resuming after interruptions. - current_parser: MutNullableHeap<JS<ServoHTMLParser>>, + current_parser: MutNullableParserField, /// When we should kick off a reflow. This happens during parsing. reflow_timeout: Cell<Option<u64>>, /// The cached first `base` element with an `href` attribute. @@ -1224,9 +1224,9 @@ impl Document { // A finished resource load can potentially unblock parsing. In that case, resume the // parser so its loop can find out. - if let Some(parser) = self.current_parser.get() { - if parser.is_suspended() { - parser.resume(); + if let Some(parser) = self.get_current_parser() { + if parser.r().is_suspended() { + parser.r().resume(); } } else if self.reflow_timeout.get().is_none() { // If we don't have a parser, and the reflow timer has been reset, explicitly @@ -1347,11 +1347,11 @@ impl Document { } - pub fn set_current_parser(&self, script: Option<&ServoHTMLParser>) { + pub fn set_current_parser(&self, script: Option<ParserRef>) { self.current_parser.set(script); } - pub fn get_current_parser(&self) -> Option<Root<ServoHTMLParser>> { + pub fn get_current_parser(&self) -> Option<ParserRoot> { self.current_parser.get() } diff --git a/components/script/dom/htmlscriptelement.rs b/components/script/dom/htmlscriptelement.rs index 0d2da397406..5bc8b1bbc60 100644 --- a/components/script/dom/htmlscriptelement.rs +++ b/components/script/dom/htmlscriptelement.rs @@ -349,7 +349,7 @@ impl HTMLScriptElement { // TODO: make this suspension happen automatically. if was_parser_inserted { if let Some(parser) = doc.get_current_parser() { - parser.suspend(); + parser.r().suspend(); } } return NextParserState::Suspend; diff --git a/components/script/dom/servohtmlparser.rs b/components/script/dom/servohtmlparser.rs index 68920e19c22..500fd46d6e6 100644 --- a/components/script/dom/servohtmlparser.rs +++ b/components/script/dom/servohtmlparser.rs @@ -15,6 +15,7 @@ use dom::bindings::reflector::{Reflector, reflect_dom_object}; use dom::bindings::trace::JSTraceable; use dom::document::Document; use dom::node::Node; +use dom::servoxmlparser::ServoXMLParser; use dom::text::Text; use dom::window::Window; use encoding::all::UTF_8; @@ -31,7 +32,9 @@ use network_listener::PreInvoke; use parse::Parser; use script_task::{ScriptChan, ScriptTask}; use std::cell::Cell; +use std::cell::UnsafeCell; use std::default::Default; +use std::ptr; use url::Url; use util::str::DOMString; @@ -67,10 +70,164 @@ pub struct FragmentContext<'a> { pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>; +#[must_root] +#[derive(JSTraceable, HeapSizeOf)] +pub enum ParserField { + HTML(JS<ServoHTMLParser>), + XML(JS<ServoXMLParser>), +} + +#[must_root] +#[derive(JSTraceable, HeapSizeOf)] +pub struct MutNullableParserField { + #[ignore_heap_size_of = "XXXjdm"] + ptr: UnsafeCell<Option<ParserField>>, +} + +impl Default for MutNullableParserField { + #[allow(unrooted_must_root)] + fn default() -> MutNullableParserField { + MutNullableParserField { + ptr: UnsafeCell::new(None), + } + } +} + +impl MutNullableParserField { + #[allow(unsafe_code)] + pub fn set(&self, val: Option<ParserRef>) { + unsafe { + *self.ptr.get() = val.map(|val| { + match val { + ParserRef::HTML(parser) => ParserField::HTML(JS::from_ref(parser)), + ParserRef::XML(parser) => ParserField::XML(JS::from_ref(parser)), + } + }); + } + } + + #[allow(unsafe_code, unrooted_must_root)] + pub fn get(&self) -> Option<ParserRoot> { + unsafe { + ptr::read(self.ptr.get()).map(|o| { + match o { + ParserField::HTML(parser) => ParserRoot::HTML(Root::from_ref(&*parser)), + ParserField::XML(parser) => ParserRoot::XML(Root::from_ref(&*parser)), + } + }) + } + } +} + +pub enum ParserRoot { + HTML(Root<ServoHTMLParser>), + XML(Root<ServoXMLParser>), +} + +impl ParserRoot { + pub fn r(&self) -> ParserRef { + match *self { + ParserRoot::HTML(ref parser) => ParserRef::HTML(parser.r()), + ParserRoot::XML(ref parser) => ParserRef::XML(parser.r()), + } + } +} + +enum TrustedParser { + HTML(Trusted<ServoHTMLParser>), + XML(Trusted<ServoXMLParser>), +} + +impl TrustedParser { + pub fn root(&self) -> ParserRoot { + match *self { + TrustedParser::HTML(ref parser) => ParserRoot::HTML(parser.root()), + TrustedParser::XML(ref parser) => ParserRoot::XML(parser.root()), + } + } +} + +pub enum ParserRef<'a> { + HTML(&'a ServoHTMLParser), + XML(&'a ServoXMLParser), +} + +impl<'a> ParserRef<'a> { + fn parse_chunk(&self, input: String) { + match *self { + ParserRef::HTML(parser) => parser.parse_chunk(input), + ParserRef::XML(parser) => parser.parse_chunk(input), + } + } + + pub fn window(&self) -> &Window { + match *self { + ParserRef::HTML(parser) => parser.window(), + ParserRef::XML(parser) => parser.window(), + } + } + + pub fn resume(&self) { + match *self { + ParserRef::HTML(parser) => parser.resume(), + ParserRef::XML(parser) => parser.resume(), + } + } + + pub fn suspend(&self) { + match *self { + ParserRef::HTML(parser) => parser.suspend(), + ParserRef::XML(parser) => parser.suspend(), + } + } + + pub fn is_suspended(&self) -> bool { + match *self { + ParserRef::HTML(parser) => parser.is_suspended(), + ParserRef::XML(parser) => parser.is_suspended(), + } + } + + pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> { + match *self { + ParserRef::HTML(parser) => parser.pending_input(), + ParserRef::XML(parser) => parser.pending_input(), + } + } + + pub fn set_plaintext_state(&self) { + match *self { + ParserRef::HTML(parser) => parser.set_plaintext_state(), + ParserRef::XML(parser) => parser.set_plaintext_state(), + } + } + + pub fn parse_sync(&self) { + match *self { + ParserRef::HTML(parser) => parser.parse_sync(), + ParserRef::XML(parser) => parser.parse_sync(), + } + } + + pub fn document(&self) -> &Document { + match *self { + ParserRef::HTML(parser) => parser.document(), + ParserRef::XML(parser) => parser.document(), + } + } + + pub fn last_chunk_received(&self) -> &Cell<bool> { + match *self { + ParserRef::HTML(parser) => parser.last_chunk_received(), + ParserRef::XML(parser) => parser.last_chunk_received(), + } + } +} + /// The context required for asynchronously fetching a document and parsing it progressively. pub struct ParserContext { /// The parser that initiated the request. - parser: Option<Trusted<ServoHTMLParser>>, + parser: Option<TrustedParser>, /// Is this a synthesized document is_synthesized_document: bool, /// The pipeline associated with this document. @@ -110,22 +267,25 @@ impl AsyncResponseListener for ParserContext { let parser = parser.r(); let win = parser.window(); - self.parser = Some(Trusted::new(win.get_cx(), parser, self.script_chan.clone())); + self.parser = Some(match parser { + ParserRef::HTML(parser) => TrustedParser::HTML(Trusted::new(win.get_cx(), parser, self.script_chan.clone())), + ParserRef::XML(parser) => TrustedParser::XML(Trusted::new(win.get_cx(), parser, self.script_chan.clone())), + }); match content_type { Some(ContentType(Mime(TopLevel::Image, _, _))) => { self.is_synthesized_document = true; let page = format!("<html><body><img src='{}' /></body></html>", self.url.serialize()); - parser.pending_input.borrow_mut().push(page); + parser.pending_input().borrow_mut().push(page); parser.parse_sync(); }, Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => { // https://html.spec.whatwg.org/multipage/#read-text let page = format!("<pre>\n"); - parser.pending_input.borrow_mut().push(page); + parser.pending_input().borrow_mut().push(page); parser.parse_sync(); - parser.tokenizer().borrow_mut().set_plaintext_state(); + parser.set_plaintext_state(); }, Some(ContentType(Mime(TopLevel::Text, SubLevel::Html, _))) => {}, // Handle text/html Some(ContentType(Mime(toplevel, sublevel, _))) => { @@ -138,7 +298,7 @@ impl AsyncResponseListener for ParserContext { let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>", toplevel.as_str(), sublevel.as_str()); self.is_synthesized_document = true; - parser.pending_input.borrow_mut().push(page); + parser.pending_input().borrow_mut().push(page); parser.parse_sync(); }, None => { @@ -156,7 +316,7 @@ impl AsyncResponseListener for ParserContext { Some(parser) => parser.root(), None => return, }; - parser.parse_chunk(data); + parser.r().parse_chunk(data); } } @@ -165,16 +325,16 @@ impl AsyncResponseListener for ParserContext { Some(parser) => parser.root(), None => return, }; - parser.document.finish_load(LoadType::PageSource(self.url.clone())); + parser.r().document().finish_load(LoadType::PageSource(self.url.clone())); if let Err(err) = status { debug!("Failed to load page URL {}, error: {}", self.url.serialize(), err); // TODO(Savago): we should send a notification to callers #5463. } - parser.last_chunk_received.set(true); - if !parser.is_suspended() { - parser.parse_sync(); + parser.r().last_chunk_received().set(true); + if !parser.r().is_suspended() { + parser.r().parse_sync(); } } } @@ -202,7 +362,7 @@ pub struct ServoHTMLParser { impl<'a> Parser for &'a ServoHTMLParser { fn parse_chunk(self, input: String) { - self.document.set_current_parser(Some(self)); + self.document.set_current_parser(Some(ParserRef::HTML(self))); self.pending_input.borrow_mut().push(input); if !self.is_suspended() { self.parse_sync(); @@ -213,7 +373,7 @@ impl<'a> Parser for &'a ServoHTMLParser { assert!(!self.suspended.get()); assert!(self.pending_input.borrow().is_empty()); - self.tokenizer().borrow_mut().end(); + self.tokenizer.borrow_mut().end(); debug!("finished parsing"); self.document.set_current_parser(None); @@ -295,6 +455,19 @@ impl ServoHTMLParser { pub fn tokenizer(&self) -> &DOMRefCell<Tokenizer> { &self.tokenizer } + + pub fn set_plaintext_state(&self) { + self.tokenizer.borrow_mut().set_plaintext_state() + } + + pub fn end_tokenizer(&self) { + self.tokenizer.borrow_mut().end() + } + + pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> { + &self.pending_input + } + } @@ -330,24 +503,29 @@ impl ServoHTMLParser { fn window(&self) -> &Window { self.document.window() } -} - -impl ServoHTMLParser { - pub fn suspend(&self) { + fn suspend(&self) { assert!(!self.suspended.get()); self.suspended.set(true); } - pub fn resume(&self) { + fn resume(&self) { assert!(self.suspended.get()); self.suspended.set(false); self.parse_sync(); } - pub fn is_suspended(&self) -> bool { + fn is_suspended(&self) -> bool { self.suspended.get() } + + fn document(&self) -> &Document { + &self.document + } + + fn last_chunk_received(&self) -> &Cell<bool> { + &self.last_chunk_received + } } struct Tracer { diff --git a/components/script/dom/servoxmlparser.rs b/components/script/dom/servoxmlparser.rs index 987c43f4982..9075dd5f2eb 100644 --- a/components/script/dom/servoxmlparser.rs +++ b/components/script/dom/servoxmlparser.rs @@ -2,15 +2,26 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +use dom::bindings::cell::DOMRefCell; use dom::bindings::js::{JS, Root}; use dom::bindings::reflector::Reflector; use dom::bindings::trace::JSTraceable; use dom::document::Document; use dom::node::Node; +use dom::servohtmlparser::ParserRef; use dom::text::Text; +use dom::window::Window; +use js::jsapi::JSTracer; +use msg::constellation_msg::PipelineId; +use parse::Parser; +use script_task::ScriptTask; +use std::cell::Cell; use url::Url; use util::str::DOMString; -use xml5ever::tree_builder::{NodeOrText, TreeSink}; +use xml5ever::tokenizer; +use xml5ever::tree_builder::{self, NodeOrText, XmlTreeBuilder}; + +pub type Tokenizer = tokenizer::XmlTokenizer<XmlTreeBuilder<JS<Node>, Sink>>; #[must_root] #[derive(JSTraceable, HeapSizeOf)] @@ -36,9 +47,115 @@ impl Sink { #[dom_struct] pub struct ServoXMLParser { reflector_: Reflector, + #[ignore_heap_size_of = "Defined in xml5ever"] + tokenizer: DOMRefCell<Tokenizer>, + /// Input chunks received but not yet passed to the parser. + pending_input: DOMRefCell<Vec<String>>, + /// The document associated with this parser. + document: JS<Document>, + /// True if this parser should avoid passing any further data to the tokenizer. + suspended: Cell<bool>, + /// Whether to expect any further input from the associated network request. + last_chunk_received: Cell<bool>, + /// The pipeline associated with this parse, unavailable if this parse does not + /// correspond to a page load. + pipeline: Option<PipelineId>, +} + +impl<'a> Parser for &'a ServoXMLParser { + fn parse_chunk(self, input: String) { + self.document.set_current_parser(Some(ParserRef::XML(self))); + self.pending_input.borrow_mut().push(input); + if !self.is_suspended() { + self.parse_sync(); + } + } + + fn finish(self) { + assert!(!self.suspended.get()); + assert!(self.pending_input.borrow().is_empty()); + + self.tokenizer.borrow_mut().end(); + debug!("finished parsing"); + + self.document.set_current_parser(None); + + if let Some(pipeline) = self.pipeline { + ScriptTask::parsing_complete(pipeline); + } + } } impl ServoXMLParser { pub fn new() { } + + pub fn window(&self) -> &Window { + self.document.window() + } + + pub fn resume(&self) { + panic!() + } + + pub fn suspend(&self) { + panic!() + } + + pub fn is_suspended(&self) -> bool { + panic!() + } + + pub fn parse_sync(&self) { + panic!() + } + + pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> { + &self.pending_input + } + + pub fn set_plaintext_state(&self) { + //self.tokenizer.borrow_mut().set_plaintext_state() + } + + pub fn end_tokenizer(&self) { + self.tokenizer.borrow_mut().end() + } + + pub fn document(&self) -> &Document { + &self.document + } + + pub fn last_chunk_received(&self) -> &Cell<bool> { + &self.last_chunk_received + } + + pub fn tokenizer(&self) -> &DOMRefCell<Tokenizer> { + &self.tokenizer + } +} + +struct Tracer { + trc: *mut JSTracer, +} + +impl tree_builder::Tracer for Tracer { + type Handle = JS<Node>; + #[allow(unrooted_must_root)] + fn trace_handle(&self, node: JS<Node>) { + node.trace(self.trc); + } +} + +impl JSTraceable for Tokenizer { + fn trace(&self, trc: *mut JSTracer) { + let tracer = Tracer { + trc: trc, + }; + let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>; + + let tree_builder = self.sink(); + tree_builder.trace_handles(tracer); + tree_builder.sink().trace(trc); + } } diff --git a/components/script/script_task.rs b/components/script/script_task.rs index 374b9ec70a9..c7078d0db75 100644 --- a/components/script/script_task.rs +++ b/components/script/script_task.rs @@ -27,7 +27,7 @@ use dom::bindings::conversions::{FromJSValConvertible, StringificationBehavior}; use dom::bindings::global::GlobalRef; use dom::bindings::inheritance::Castable; use dom::bindings::js::{JS, RootCollection, trace_roots}; -use dom::bindings::js::{Root, RootCollectionPtr, RootedReference}; +use dom::bindings::js::{RootCollectionPtr, RootedReference}; use dom::bindings::refcounted::{LiveDOMReferences, Trusted, TrustedReference, trace_refcounted_objects}; use dom::bindings::trace::{JSTraceable, RootedVec, trace_traceables}; use dom::bindings::utils::{DOM_CALLBACKS, WRAP_CALLBACKS}; @@ -36,7 +36,7 @@ use dom::element::Element; use dom::event::{Event, EventBubbles, EventCancelable}; use dom::htmlanchorelement::HTMLAnchorElement; use dom::node::{Node, NodeDamage, window_from_node}; -use dom::servohtmlparser::{ParserContext, ServoHTMLParser}; +use dom::servohtmlparser::{ParserContext, ParserRoot}; use dom::uievent::UIEvent; use dom::window::{ReflowReason, ScriptHelpers, Window}; use dom::worker::TrustedWorkerAddress; @@ -595,7 +595,7 @@ pub unsafe extern "C" fn shadow_check_callback(_cx: *mut JSContext, impl ScriptTask { pub fn page_fetch_complete(id: PipelineId, subpage: Option<SubpageId>, metadata: Metadata) - -> Option<Root<ServoHTMLParser>> { + -> Option<ParserRoot> { SCRIPT_TASK_ROOT.with(|root| { let script_task = unsafe { &*root.borrow().unwrap() }; script_task.handle_page_fetch_complete(id, subpage, metadata) @@ -1451,7 +1451,7 @@ impl ScriptTask { /// We have received notification that the response associated with a load has completed. /// Kick off the document and frame tree creation process using the result. fn handle_page_fetch_complete(&self, id: PipelineId, subpage: Option<SubpageId>, - metadata: Metadata) -> Option<Root<ServoHTMLParser>> { + metadata: Metadata) -> Option<ParserRoot> { let idx = self.incomplete_loads.borrow().iter().position(|load| { load.pipeline_id == id && load.parent_info.map(|info| info.1) == subpage }); @@ -1547,7 +1547,7 @@ impl ScriptTask { /// The entry point to document loading. Defines bindings, sets up the window and document /// objects, parses HTML and CSS, and kicks off initial layout. - fn load(&self, metadata: Metadata, incomplete: InProgressLoad) -> Root<ServoHTMLParser> { + fn load(&self, metadata: Metadata, incomplete: InProgressLoad) -> ParserRoot { let final_url = metadata.final_url.clone(); debug!("ScriptTask: loading {} on page {:?}", incomplete.url.serialize(), incomplete.pipeline_id); |