diff options
author | Nikhil Shagrithaya <nikhilshagri@gmail.com> | 2017-05-21 21:53:11 +0530 |
---|---|---|
committer | Nikhil Shagrithaya <nikhilshagri@gmail.com> | 2017-06-17 13:10:10 +0530 |
commit | 161ff15d54f1cf7746f37c1075a63273f374eab7 (patch) | |
tree | aec3f900f121887e4607f1b53b2a3c5c6bb4a8d0 /components/script/dom | |
parent | 80488c4494911257e62c98388b36d94d74f9d330 (diff) | |
download | servo-161ff15d54f1cf7746f37c1075a63273f374eab7.tar.gz servo-161ff15d54f1cf7746f37c1075a63273f374eab7.zip |
Added Async HTML Tokenizer
Diffstat (limited to 'components/script/dom')
-rw-r--r-- | components/script/dom/servoparser/async_html.rs | 493 | ||||
-rw-r--r-- | components/script/dom/servoparser/mod.rs | 10 |
2 files changed, 502 insertions, 1 deletions
diff --git a/components/script/dom/servoparser/async_html.rs b/components/script/dom/servoparser/async_html.rs new file mode 100644 index 00000000000..e13f8169faa --- /dev/null +++ b/components/script/dom/servoparser/async_html.rs @@ -0,0 +1,493 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#![allow(unrooted_must_root)] + +use dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods; +use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; +use dom::bindings::inheritance::Castable; +use dom::bindings::js::{JS, MutNullableJS, Root}; +use dom::bindings::str::DOMString; +use dom::bindings::trace::JSTraceable; +use dom::comment::Comment; +use dom::document::Document; +use dom::documenttype::DocumentType; +use dom::element::{Element, ElementCreator}; +use dom::htmlformelement::{FormControlElementHelpers, HTMLFormElement}; +use dom::htmlscriptelement::HTMLScriptElement; +use dom::htmltemplateelement::HTMLTemplateElement; +use dom::node::Node; +use dom::processinginstruction::ProcessingInstruction; +use dom::virtualmethods::vtable_for; +use html5ever::{Attribute, QualName, ExpandedName}; +use html5ever::buffer_queue::BufferQueue; +use html5ever::tendril::StrTendril; +use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts, TokenizerResult}; +use html5ever::tree_builder::{NodeOrText, TreeSink, NextParserState, QuirksMode, ElementFlags}; +use html5ever::tree_builder::{Tracer as HtmlTracer, TreeBuilder, TreeBuilderOpts}; +use js::jsapi::JSTracer; +use servo_url::ServoUrl; +use std::ascii::AsciiExt; +use std::borrow::Cow; +use std::cell::Cell; +use std::collections::HashMap; +use style::context::QuirksMode as ServoQuirksMode; + +#[derive(HeapSizeOf, JSTraceable)] +#[must_root] +pub struct Tokenizer { + #[ignore_heap_size_of = "Defined in html5ever"] + inner: HtmlTokenizer<TreeBuilder<ParseNode, Sink>>, +} + +impl Tokenizer { + pub fn new( + document: &Document, + url: ServoUrl, + fragment_context: Option<super::FragmentContext>) + -> Self { + let mut sink = Sink::new(url, document); + + let options = TreeBuilderOpts { + ignore_missing_rules: true, + .. Default::default() + }; + + let inner = if let Some(fc) = fragment_context { + let ctxt_parse_node = sink.new_parse_node(); + sink.nodes.insert(ctxt_parse_node.id, JS::from_ref(fc.context_elem)); + + let form_parse_node = fc.form_elem.map(|form_elem| { + let node = sink.new_parse_node(); + sink.nodes.insert(node.id, JS::from_ref(form_elem)); + node + }); + let tb = TreeBuilder::new_for_fragment( + sink, + ctxt_parse_node, + form_parse_node, + options); + + let tok_options = TokenizerOpts { + initial_state: Some(tb.tokenizer_state_for_context_elem()), + .. Default::default() + }; + + HtmlTokenizer::new(tb, tok_options) + } else { + HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default()) + }; + + Tokenizer { + inner: inner, + } + } + + pub fn feed(&mut self, input: &mut BufferQueue) -> Result<(), Root<HTMLScriptElement>> { + match self.inner.feed(input) { + TokenizerResult::Done => Ok(()), + TokenizerResult::Script(script) => { + let nodes = &self.inner.sink.sink.nodes; + let script = nodes.get(&script.id).unwrap(); + Err(Root::from_ref(script.downcast().unwrap())) + }, + } + } + + pub fn end(&mut self) { + self.inner.end(); + } + + pub fn url(&self) -> &ServoUrl { + &self.inner.sink.sink.base_url + } + + pub fn set_plaintext_state(&mut self) { + self.inner.set_plaintext_state(); + } +} + +#[allow(unsafe_code)] +unsafe impl JSTraceable for HtmlTokenizer<TreeBuilder<ParseNode, Sink>> { + unsafe fn trace(&self, trc: *mut JSTracer) { + struct Tracer(*mut JSTracer); + let tracer = Tracer(trc); + + impl HtmlTracer for Tracer { + type Handle = ParseNode; + #[allow(unrooted_must_root)] + fn trace_handle(&self, node: &ParseNode) { + unsafe { node.trace(self.0); } + } + } + + let tree_builder = &self.sink; + tree_builder.trace_handles(&tracer); + tree_builder.sink.trace(trc); + } +} + +type ParseNodeID = usize; + +#[derive(JSTraceable, Clone, HeapSizeOf)] +pub struct ParseNode { + id: ParseNodeID, + qual_name: Option<QualName>, +} + +#[derive(JSTraceable, HeapSizeOf)] +struct ParseNodeData { + target: Option<String>, + data: Option<String>, + contents: Option<ParseNode>, + is_integration_point: bool, +} + +impl Default for ParseNodeData { + fn default() -> ParseNodeData { + ParseNodeData { + target: None, + data: None, + contents: None, + is_integration_point: false, + } + } +} + +enum ParseOperation { + GetTemplateContents(ParseNodeID, ParseNodeID), + CreateElement(ParseNodeID, QualName, Vec<Attribute>), + CreateComment(StrTendril, ParseNodeID), + // sibling, node to be inserted + AppendBeforeSibling(ParseNodeID, NodeOrText<ParseNode>), + // parent, node to be inserted + Append(ParseNodeID, NodeOrText<ParseNode>), + AppendDoctypeToDocument(StrTendril, StrTendril, StrTendril), + AddAttrsIfMissing(ParseNodeID, Vec<Attribute>), + RemoveFromParent(ParseNodeID), + MarkScriptAlreadyStarted(ParseNodeID), + ReparentChildren(ParseNodeID, ParseNodeID), + AssociateWithForm(ParseNodeID, ParseNodeID), + CreatePI(ParseNodeID), + Pop(ParseNodeID), +} + +#[derive(JSTraceable, HeapSizeOf)] +#[must_root] +pub struct Sink { + base_url: ServoUrl, + document: JS<Document>, + current_line: u64, + script: MutNullableJS<HTMLScriptElement>, + parse_node_data: HashMap<ParseNodeID, ParseNodeData>, + next_parse_node_id: Cell<ParseNodeID>, + nodes: HashMap<ParseNodeID, JS<Node>>, + document_node: ParseNode, +} + +impl Sink { + fn new(base_url: ServoUrl, document: &Document) -> Sink { + let mut sink = Sink { + base_url: base_url, + document: JS::from_ref(document), + current_line: 1, + script: Default::default(), + parse_node_data: HashMap::new(), + next_parse_node_id: Cell::new(1), + nodes: HashMap::new(), + document_node: ParseNode { + id: 0, + qual_name: None, + } + }; + let data = ParseNodeData::default(); + sink.insert_parse_node_data(0, data); + sink.insert_node(0, JS::from_ref(document.upcast())); + sink + } + + fn new_parse_node(&mut self) -> ParseNode { + let id = self.next_parse_node_id.get(); + let data = ParseNodeData::default(); + self.insert_parse_node_data(id, data); + self.next_parse_node_id.set(id + 1); + ParseNode { + id: id, + qual_name: None, + } + } + + fn insert_node(&mut self, id: ParseNodeID, node: JS<Node>) { + assert!(self.nodes.insert(id, node).is_none()); + } + + fn get_node<'a>(&'a self, id: &ParseNodeID) -> &'a JS<Node> { + self.nodes.get(id).expect("Node not found!") + } + + fn insert_parse_node_data(&mut self, id: ParseNodeID, data: ParseNodeData) { + assert!(self.parse_node_data.insert(id, data).is_none()); + } + + fn get_parse_node_data<'a>(&'a self, id: &'a ParseNodeID) -> &'a ParseNodeData { + self.parse_node_data.get(id).expect("Parse Node data not found!") + } + + fn get_parse_node_data_mut<'a>(&'a mut self, id: &'a ParseNodeID) -> &'a mut ParseNodeData { + self.parse_node_data.get_mut(id).expect("Parse Node data not found!") + } + + fn process_operation(&mut self, op: ParseOperation) { + let document = Root::from_ref(&**self.get_node(&0)); + let document = document.downcast::<Document>().expect("Document node should be downcasted!"); + match op { + ParseOperation::GetTemplateContents(target, contents) => { + let target = Root::from_ref(&**self.get_node(&target)); + let template = target.downcast::<HTMLTemplateElement>().expect( + "Tried to extract contents from non-template element while parsing"); + self.insert_node(contents, JS::from_ref(template.Content().upcast())); + } + ParseOperation::CreateElement(id, name, attrs) => { + let elem = Element::create(name, &*self.document, + ElementCreator::ParserCreated(self.current_line)); + for attr in attrs { + elem.set_attribute_from_parser(attr.name, DOMString::from(String::from(attr.value)), None); + } + + self.insert_node(id, JS::from_ref(elem.upcast())); + } + ParseOperation::CreateComment(text, id) => { + let comment = Comment::new(DOMString::from(String::from(text)), document); + self.insert_node(id, JS::from_ref(&comment.upcast())); + } + ParseOperation::AppendBeforeSibling(sibling, node) => { + let node = match node { + NodeOrText::AppendNode(n) => NodeOrText::AppendNode(JS::from_ref(&**self.get_node(&n.id))), + NodeOrText::AppendText(text) => NodeOrText::AppendText(text) + }; + let sibling = &**self.get_node(&sibling); + let parent = &*sibling.GetParentNode().expect("append_before_sibling called on node without parent"); + + super::insert(parent, Some(sibling), node); + } + ParseOperation::Append(parent, node) => { + let node = match node { + NodeOrText::AppendNode(n) => NodeOrText::AppendNode(JS::from_ref(&**self.get_node(&n.id))), + NodeOrText::AppendText(text) => NodeOrText::AppendText(text) + }; + + let parent = &**self.get_node(&parent); + super::insert(parent, None, node); + } + ParseOperation::AppendDoctypeToDocument(name, public_id, system_id) => { + let doctype = DocumentType::new( + DOMString::from(String::from(name)), Some(DOMString::from(String::from(public_id))), + Some(DOMString::from(String::from(system_id))), document); + + document.upcast::<Node>().AppendChild(doctype.upcast()).expect("Appending failed"); + } + ParseOperation::AddAttrsIfMissing(target_id, attrs) => { + let elem = self.get_node(&target_id).downcast::<Element>() + .expect("tried to set attrs on non-Element in HTML parsing"); + for attr in attrs { + elem.set_attribute_from_parser(attr.name, DOMString::from(String::from(attr.value)), None); + } + } + ParseOperation::RemoveFromParent(target) => { + if let Some(ref parent) = self.get_node(&target).GetParentNode() { + parent.RemoveChild(&**self.get_node(&target)).unwrap(); + } + } + ParseOperation::MarkScriptAlreadyStarted(node) => { + let script = self.get_node(&node).downcast::<HTMLScriptElement>(); + script.map(|script| script.set_already_started(true)); + } + ParseOperation::ReparentChildren(parent, new_parent) => { + let parent = self.get_node(&parent); + let new_parent = self.get_node(&new_parent); + while let Some(child) = parent.GetFirstChild() { + new_parent.AppendChild(&child).unwrap(); + } + } + ParseOperation::AssociateWithForm(target, form) => { + let form = self.get_node(&form); + let form = Root::downcast::<HTMLFormElement>(Root::from_ref(&**form)) + .expect("Owner must be a form element"); + + let node = self.get_node(&target); + let elem = node.downcast::<Element>(); + let control = elem.and_then(|e| e.as_maybe_form_control()); + + if let Some(control) = control { + control.set_form_owner_from_parser(&form); + } else { + // TODO remove this code when keygen is implemented. + assert!(node.NodeName() == "KEYGEN", "Unknown form-associatable element"); + } + } + ParseOperation::Pop(node) => { + vtable_for(self.get_node(&node)).pop(); + } + ParseOperation::CreatePI(node) => { + let pi; + { + let data = self.get_parse_node_data(&node); + pi = ProcessingInstruction::new( + DOMString::from(data.target.clone().unwrap()), + DOMString::from(data.data.clone().unwrap()), + document); + } + self.insert_node(node, JS::from_ref(pi.upcast())); + } + } + } +} + +#[allow(unrooted_must_root)] +impl TreeSink for Sink { + type Output = Self; + fn finish(self) -> Self { self } + + type Handle = ParseNode; + + fn get_document(&mut self) -> Self::Handle { + self.document_node.clone() + } + + fn get_template_contents(&mut self, target: &Self::Handle) -> Self::Handle { + if let Some(ref contents) = self.get_parse_node_data(&target.id).contents { + return contents.clone(); + } + let node = self.new_parse_node(); + { + let mut data = self.get_parse_node_data_mut(&target.id); + data.contents = Some(node.clone()); + } + self.process_operation(ParseOperation::GetTemplateContents(target.id, node.id)); + node + } + + fn same_node(&self, x: &Self::Handle, y: &Self::Handle) -> bool { + x.id == y.id + } + + fn elem_name<'a>(&self, target: &'a Self::Handle) -> ExpandedName<'a> { + target.qual_name.as_ref().expect("Expected qual name of node!").expanded() + } + + fn same_tree(&self, x: &Self::Handle, y: &Self::Handle) -> bool { + let x = self.get_node(&x.id); + let y = self.get_node(&y.id); + + let x = x.downcast::<Element>().expect("Element node expected"); + let y = y.downcast::<Element>().expect("Element node expected"); + x.is_in_same_home_subtree(y) + } + + fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>, _flags: ElementFlags) + -> Self::Handle { + let mut node = self.new_parse_node(); + node.qual_name = Some(name.clone()); + { + let mut node_data = self.get_parse_node_data_mut(&node.id); + node_data.is_integration_point = attrs.iter() + .any(|attr| { + let attr_value = &String::from(attr.value.clone()); + (attr.name.local == local_name!("encoding") && attr.name.ns == ns!()) && + (attr_value.eq_ignore_ascii_case("text/html") || + attr_value.eq_ignore_ascii_case("application/xhtml+xml")) + }); + } + self.process_operation(ParseOperation::CreateElement(node.id, name, attrs)); + node + } + + fn create_comment(&mut self, text: StrTendril) -> Self::Handle { + let node = self.new_parse_node(); + self.process_operation(ParseOperation::CreateComment(text, node.id)); + node + } + + fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> ParseNode { + let node = self.new_parse_node(); + { + let mut node_data = self.get_parse_node_data_mut(&node.id); + node_data.target = Some(String::from(target)); + node_data.data = Some(String::from(data)); + } + self.process_operation(ParseOperation::CreatePI(node.id)); + node + } + + fn has_parent_node(&self, node: &Self::Handle) -> bool { + self.get_node(&node.id).GetParentNode().is_some() + } + + fn associate_with_form(&mut self, target: &Self::Handle, form: &Self::Handle) { + self.process_operation(ParseOperation::AssociateWithForm(target.id, form.id)); + } + + fn append_before_sibling(&mut self, + sibling: &Self::Handle, + new_node: NodeOrText<Self::Handle>) { + self.process_operation(ParseOperation::AppendBeforeSibling(sibling.id, new_node)); + } + + fn parse_error(&mut self, msg: Cow<'static, str>) { + debug!("Parse error: {}", msg); + } + + fn set_quirks_mode(&mut self, mode: QuirksMode) { + let mode = match mode { + QuirksMode::Quirks => ServoQuirksMode::Quirks, + QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks, + QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks, + }; + self.document.set_quirks_mode(mode); + } + + fn append(&mut self, parent: &Self::Handle, child: NodeOrText<Self::Handle>) { + self.process_operation(ParseOperation::Append(parent.id, child)); + } + + fn append_doctype_to_document(&mut self, name: StrTendril, public_id: StrTendril, + system_id: StrTendril) { + self.process_operation(ParseOperation::AppendDoctypeToDocument(name, public_id, system_id)); + } + + fn add_attrs_if_missing(&mut self, target: &Self::Handle, attrs: Vec<Attribute>) { + self.process_operation(ParseOperation::AddAttrsIfMissing(target.id, attrs)); + } + + fn remove_from_parent(&mut self, target: &Self::Handle) { + self.process_operation(ParseOperation::RemoveFromParent(target.id)); + } + + fn mark_script_already_started(&mut self, node: &Self::Handle) { + self.process_operation(ParseOperation::MarkScriptAlreadyStarted(node.id)); + } + + fn complete_script(&mut self, _: &Self::Handle) -> NextParserState { + panic!("complete_script should not be called here!"); + } + + fn reparent_children(&mut self, parent: &Self::Handle, new_parent: &Self::Handle) { + self.process_operation(ParseOperation::ReparentChildren(parent.id, new_parent.id)); + } + + /// https://html.spec.whatwg.org/multipage/#html-integration-point + /// Specifically, the <annotation-xml> cases. + fn is_mathml_annotation_xml_integration_point(&self, handle: &Self::Handle) -> bool { + let node_data = self.get_parse_node_data(&handle.id); + node_data.is_integration_point + } + + fn set_current_line(&mut self, line_number: u64) { + self.current_line = line_number; + } + + fn pop(&mut self, node: &Self::Handle) { + self.process_operation(ParseOperation::Pop(node.id)); + } +} diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index 254c178f8f6..3be0484dc7c 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -51,6 +51,7 @@ use std::cell::Cell; use std::mem; use style::context::QuirksMode as ServoQuirksMode; +mod async_html; mod html; mod xml; @@ -138,6 +139,7 @@ impl ServoParser { // Step 11. let form = context_node.inclusive_ancestors() .find(|element| element.is::<HTMLFormElement>()); + let fragment_context = FragmentContext { context_elem: context_node, form_elem: form.r(), @@ -145,7 +147,7 @@ impl ServoParser { let parser = ServoParser::new(&document, Tokenizer::Html(self::html::Tokenizer::new(&document, - url.clone(), + url, Some(fragment_context))), LastChunkState::Received, ParserKind::Normal); @@ -485,6 +487,7 @@ enum ParserKind { #[must_root] enum Tokenizer { Html(self::html::Tokenizer), + AsyncHtml(self::async_html::Tokenizer), Xml(self::xml::Tokenizer), } @@ -492,6 +495,7 @@ impl Tokenizer { fn feed(&mut self, input: &mut BufferQueue) -> Result<(), Root<HTMLScriptElement>> { match *self { Tokenizer::Html(ref mut tokenizer) => tokenizer.feed(input), + Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.feed(input), Tokenizer::Xml(ref mut tokenizer) => tokenizer.feed(input), } } @@ -499,6 +503,7 @@ impl Tokenizer { fn end(&mut self) { match *self { Tokenizer::Html(ref mut tokenizer) => tokenizer.end(), + Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.end(), Tokenizer::Xml(ref mut tokenizer) => tokenizer.end(), } } @@ -506,6 +511,7 @@ impl Tokenizer { fn url(&self) -> &ServoUrl { match *self { Tokenizer::Html(ref tokenizer) => tokenizer.url(), + Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(), Tokenizer::Xml(ref tokenizer) => tokenizer.url(), } } @@ -513,6 +519,7 @@ impl Tokenizer { fn set_plaintext_state(&mut self) { match *self { Tokenizer::Html(ref mut tokenizer) => tokenizer.set_plaintext_state(), + Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.set_plaintext_state(), Tokenizer::Xml(_) => unimplemented!(), } } @@ -520,6 +527,7 @@ impl Tokenizer { fn profiler_category(&self) -> ProfilerCategory { match *self { Tokenizer::Html(_) => ProfilerCategory::ScriptParseHTML, + Tokenizer::AsyncHtml(_) => ProfilerCategory::ScriptParseHTML, Tokenizer::Xml(_) => ProfilerCategory::ScriptParseXML, } } |