diff options
author | Chris Paris <cap@chrisparis.org> | 2015-02-19 17:43:24 -1000 |
---|---|---|
committer | Chris Paris <cap@chrisparis.org> | 2015-03-18 12:17:56 -1000 |
commit | a5d6c6a1fc60975b4901914c183624a13f496c4c (patch) | |
tree | 9a69e62ac618b8889757c68dfe6ec2c10e1fca0d | |
parent | a5217556072390131f41a7a4cd07e8eb5a671d06 (diff) | |
download | servo-a5d6c6a1fc60975b4901914c183624a13f496c4c.tar.gz servo-a5d6c6a1fc60975b4901914c183624a13f496c4c.zip |
Serialize using html5ever
-rw-r--r-- | components/script/dom/element.rs | 26 | ||||
-rw-r--r-- | components/script/dom/htmlserializer.rs | 168 | ||||
-rw-r--r-- | components/script/dom/mod.rs | 1 | ||||
-rw-r--r-- | components/script/parse/html.rs | 83 |
4 files changed, 104 insertions, 174 deletions
diff --git a/components/script/dom/element.rs b/components/script/dom/element.rs index d8ebb804c87..1e8d786cb6f 100644 --- a/components/script/dom/element.rs +++ b/components/script/dom/element.rs @@ -40,14 +40,13 @@ use dom::htmlbodyelement::{HTMLBodyElement, HTMLBodyElementHelpers}; use dom::htmlcollection::HTMLCollection; use dom::htmlelement::HTMLElementTypeId; use dom::htmlinputelement::{HTMLInputElement, RawLayoutHTMLInputElementHelpers, HTMLInputElementHelpers}; -use dom::htmlserializer::serialize; use dom::htmltableelement::{HTMLTableElement, HTMLTableElementHelpers}; use dom::htmltablecellelement::{HTMLTableCellElement, HTMLTableCellElementHelpers}; use dom::htmltablerowelement::{HTMLTableRowElement, HTMLTableRowElementHelpers}; use dom::htmltablesectionelement::{HTMLTableSectionElement, HTMLTableSectionElementHelpers}; use dom::htmltextareaelement::{HTMLTextAreaElement, RawLayoutHTMLTextAreaElementHelpers}; use dom::node::{CLICK_IN_PROGRESS, LayoutNodeHelpers, Node, NodeHelpers, NodeTypeId}; -use dom::node::{NodeIterator, document_from_node, NodeDamage}; +use dom::node::{document_from_node, NodeDamage}; use dom::node::{window_from_node}; use dom::nodelist::NodeList; use dom::virtualmethods::{VirtualMethods, vtable_for}; @@ -60,6 +59,10 @@ use style; use util::namespace; use util::str::{DOMString, LengthOrPercentageOrAuto}; +use html5ever::serialize; +use html5ever::serialize::SerializeOpts; +use html5ever::serialize::TraversalScope; +use html5ever::serialize::TraversalScope::{IncludeNode, ChildrenOnly}; use html5ever::tree_builder::{NoQuirks, LimitedQuirks, Quirks}; use cssparser::RGBA; @@ -68,6 +71,7 @@ use std::borrow::{IntoCow, ToOwned}; use std::cell::{Ref, RefMut}; use std::default::Default; use std::mem; +use std::old_io::{MemWriter, Writer}; use std::sync::Arc; use string_cache::{Atom, Namespace, QualName}; use url::UrlParser; @@ -424,6 +428,7 @@ pub trait ElementHelpers<'a> { fn update_inline_style(self, property_decl: PropertyDeclaration, style_priority: StylePriority); fn get_inline_style_declaration(self, property: &Atom) -> Option<PropertyDeclaration>; fn get_important_inline_style_declaration(self, property: &Atom) -> Option<PropertyDeclaration>; + fn serialize(self, traversal_scope: TraversalScope) -> Fallible<DOMString>; } impl<'a> ElementHelpers<'a> for JSRef<'a, Element> { @@ -569,6 +574,19 @@ impl<'a> ElementHelpers<'a> for JSRef<'a, Element> { .map(|decl| decl.clone()) }) } + + fn serialize(self, traversal_scope: TraversalScope) -> Fallible<DOMString> { + let node: JSRef<Node> = NodeCast::from_ref(self); + let mut writer = MemWriter::new(); + match serialize(&mut writer, &node, + SerializeOpts { + traversal_scope: traversal_scope, + .. Default::default() + }) { + Ok(()) => Ok(String::from_utf8(writer.into_inner()).unwrap()), + Err(_) => panic!("Cannot serialize element"), + } + } } pub trait AttributeHandlers { @@ -1114,11 +1132,11 @@ impl<'a> ElementMethods for JSRef<'a, Element> { fn GetInnerHTML(self) -> Fallible<DOMString> { //XXX TODO: XML case - Ok(serialize(&mut NodeIterator::new(NodeCast::from_ref(self), false, false))) + self.serialize(ChildrenOnly) } fn GetOuterHTML(self) -> Fallible<DOMString> { - Ok(serialize(&mut NodeIterator::new(NodeCast::from_ref(self), true, false))) + self.serialize(IncludeNode) } // http://dom.spec.whatwg.org/#dom-parentnode-children diff --git a/components/script/dom/htmlserializer.rs b/components/script/dom/htmlserializer.rs deleted file mode 100644 index 9ad6cd77e3b..00000000000 --- a/components/script/dom/htmlserializer.rs +++ /dev/null @@ -1,168 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -use dom::attr::{Attr, AttrHelpers}; -use dom::bindings::codegen::InheritTypes::{ElementCast, TextCast, CommentCast, NodeCast}; -use dom::bindings::codegen::InheritTypes::{DocumentTypeCast, CharacterDataCast}; -use dom::bindings::codegen::InheritTypes::ProcessingInstructionCast; -use dom::bindings::js::JSRef; -use dom::characterdata::CharacterData; -use dom::comment::Comment; -use dom::documenttype::DocumentType; -use dom::element::{Element, ElementHelpers}; -use dom::node::{Node, NodeHelpers, NodeTypeId, NodeIterator}; -use dom::processinginstruction::ProcessingInstruction; -use dom::text::Text; - -use std::borrow::ToOwned; - -#[allow(unrooted_must_root)] -pub fn serialize(iterator: &mut NodeIterator) -> String { - let mut html = String::new(); - let mut open_elements: Vec<String> = vec!(); - while let Some(node) = iterator.next() { - let depth = iterator.depth; - while open_elements.len() > depth { - html.push_str("</"); - html.push_str(open_elements.pop().unwrap().as_slice()); - html.push_str(">"); - } - match node.type_id() { - NodeTypeId::Element(..) => { - let elem: JSRef<Element> = ElementCast::to_ref(node).unwrap(); - serialize_elem(elem, &mut open_elements, &mut html) - } - NodeTypeId::Comment => { - let comment: JSRef<Comment> = CommentCast::to_ref(node).unwrap(); - serialize_comment(comment, &mut html) - } - NodeTypeId::Text => { - let text: JSRef<Text> = TextCast::to_ref(node).unwrap(); - serialize_text(text, &mut html) - } - NodeTypeId::DocumentType => { - let doctype: JSRef<DocumentType> = DocumentTypeCast::to_ref(node).unwrap(); - serialize_doctype(doctype, &mut html) - } - NodeTypeId::ProcessingInstruction => { - let processing_instruction: JSRef<ProcessingInstruction> = - ProcessingInstructionCast::to_ref(node).unwrap(); - serialize_processing_instruction(processing_instruction, &mut html) - } - NodeTypeId::DocumentFragment => {} - NodeTypeId::Document => { - panic!("It shouldn't be possible to serialize a document node") - } - } - } - while open_elements.len() > 0 { - html.push_str("</"); - html.push_str(open_elements.pop().unwrap().as_slice()); - html.push_str(">"); - } - html -} - -fn serialize_comment(comment: JSRef<Comment>, html: &mut String) { - html.push_str("<!--"); - html.push_str(comment.characterdata().data().as_slice()); - html.push_str("-->"); -} - -fn serialize_text(text: JSRef<Text>, html: &mut String) { - let text_node: JSRef<Node> = NodeCast::from_ref(text); - match text_node.parent_node().map(|node| node.root()) { - Some(ref parent) if parent.r().is_element() => { - let elem: JSRef<Element> = ElementCast::to_ref(parent.r()).unwrap(); - match elem.local_name().as_slice() { - "style" | "script" | "xmp" | "iframe" | - "noembed" | "noframes" | "plaintext" | - "noscript" if *elem.namespace() == ns!(HTML) - => html.push_str(text.characterdata().data().as_slice()), - _ => escape(text.characterdata().data().as_slice(), false, html) - } - } - _ => escape(text.characterdata().data().as_slice(), false, html) - } -} - -fn serialize_processing_instruction(processing_instruction: JSRef<ProcessingInstruction>, - html: &mut String) { - html.push_str("<?"); - html.push_str(processing_instruction.target().as_slice()); - html.push(' '); - html.push_str(processing_instruction.characterdata().data().as_slice()); - html.push_str("?>"); -} - -fn serialize_doctype(doctype: JSRef<DocumentType>, html: &mut String) { - html.push_str("<!DOCTYPE"); - html.push_str(doctype.name().as_slice()); - html.push('>'); -} - -fn serialize_elem(elem: JSRef<Element>, open_elements: &mut Vec<String>, html: &mut String) { - html.push('<'); - html.push_str(elem.local_name().as_slice()); - for attr in elem.attrs().iter() { - let attr = attr.root(); - serialize_attr(attr.r(), html); - }; - html.push('>'); - - match elem.local_name().as_slice() { - "pre" | "listing" | "textarea" if *elem.namespace() == ns!(HTML) => { - let node: JSRef<Node> = NodeCast::from_ref(elem); - match node.first_child().map(|child| child.root()) { - Some(ref child) if child.r().is_text() => { - let text: JSRef<CharacterData> = CharacterDataCast::to_ref(child.r()).unwrap(); - if text.data().len() > 0 && text.data().as_slice().char_at(0) == '\n' { - html.push('\x0A'); - } - }, - _ => {} - } - }, - _ => {} - } - - if !(elem.is_void()) { - open_elements.push(elem.local_name().as_slice().to_owned()); - } -} - -fn serialize_attr(attr: JSRef<Attr>, html: &mut String) { - html.push(' '); - if *attr.namespace() == ns!(XML) { - html.push_str("xml:"); - html.push_str(attr.local_name().as_slice()); - } else if *attr.namespace() == ns!(XMLNS) && - *attr.local_name() == atom!("xmlns") { - html.push_str("xmlns"); - } else if *attr.namespace() == ns!(XMLNS) { - html.push_str("xmlns:"); - html.push_str(attr.local_name().as_slice()); - } else if *attr.namespace() == ns!(XLink) { - html.push_str("xlink:"); - html.push_str(attr.local_name().as_slice()); - } else { - html.push_str(attr.name().as_slice()); - }; - html.push_str("=\""); - escape(attr.value().as_slice(), true, html); - html.push('"'); -} - -fn escape(string: &str, attr_mode: bool, html: &mut String) { - for c in string.chars() { - match c { - '&' => html.push_str("&"), - '\u{A0}' => html.push_str(" "), - '"' if attr_mode => html.push_str("""), - '<' if !attr_mode => html.push_str("<"), - '>' if !attr_mode => html.push_str(">"), - c => html.push(c), - } - } -} diff --git a/components/script/dom/mod.rs b/components/script/dom/mod.rs index 7d115d1b1ed..9f12913e23c 100644 --- a/components/script/dom/mod.rs +++ b/components/script/dom/mod.rs @@ -272,7 +272,6 @@ pub mod htmlprogresselement; pub mod htmlquoteelement; pub mod htmlscriptelement; pub mod htmlselectelement; -pub mod htmlserializer; pub mod htmlspanelement; pub mod htmlsourceelement; pub mod htmlstyleelement; diff --git a/components/script/parse/html.rs b/components/script/parse/html.rs index 836eac6dfba..29a1de686e8 100644 --- a/components/script/parse/html.rs +++ b/components/script/parse/html.rs @@ -7,6 +7,8 @@ use dom::attr::AttrHelpers; use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; use dom::bindings::codegen::InheritTypes::{NodeCast, ElementCast, HTMLScriptElementCast}; +use dom::bindings::codegen::InheritTypes::{DocumentTypeCast, TextCast, CommentCast}; +use dom::bindings::codegen::InheritTypes::ProcessingInstructionCast; use dom::bindings::js::{JS, JSRef, Temporary, OptionalRootable, Root}; use dom::comment::Comment; use dom::document::{Document, DocumentHelpers}; @@ -14,7 +16,8 @@ use dom::documenttype::DocumentType; use dom::element::{Element, AttributeHandlers, ElementHelpers, ElementCreator}; use dom::htmlscriptelement::HTMLScriptElement; use dom::htmlscriptelement::HTMLScriptElementHelpers; -use dom::node::{Node, NodeHelpers}; +use dom::node::{Node, NodeHelpers, NodeTypeId}; +use dom::processinginstruction::ProcessingInstruction; use dom::servohtmlparser; use dom::servohtmlparser::ServoHTMLParser; use dom::text::Text; @@ -27,9 +30,13 @@ use net::resource_task::{ProgressMsg, LoadResponse}; use util::task_state; use util::task_state::IN_HTML_PARSER; use std::ascii::AsciiExt; +use std::old_io::{Writer, IoResult}; use std::string::CowString; use url::Url; use html5ever::Attribute; +use html5ever::serialize::{Serializable, Serializer, AttrRef}; +use html5ever::serialize::TraversalScope; +use html5ever::serialize::TraversalScope::{IncludeNode, ChildrenOnly}; use html5ever::tree_builder::{TreeSink, QuirksMode, NodeOrText, AppendNode, AppendText}; use string_cache::QualName; @@ -169,6 +176,80 @@ impl<'a> TreeSink for servohtmlparser::Sink { } } +impl<'a> Serializable for JSRef<'a, Node> { + fn serialize<'wr, Wr: Writer>(&self, serializer: &mut Serializer<'wr, Wr>, + traversal_scope: TraversalScope) -> IoResult<()> { + let node = *self; + match (traversal_scope, node.type_id()) { + (_, NodeTypeId::Element(..)) => { + let elem: JSRef<Element> = ElementCast::to_ref(node).unwrap(); + let name = QualName::new(elem.namespace().clone(), + elem.local_name().clone()); + if traversal_scope == IncludeNode { + let attrs = elem.attrs().iter().map(|at| { + let attr = at.root(); + let qname = QualName::new(attr.r().namespace().clone(), + attr.r().local_name().clone()); + let value = attr.r().value().clone(); + (qname, value) + }).collect::<Vec<_>>(); + let attr_refs = attrs.iter().map(|&(ref qname, ref value)| { + let ar: AttrRef = (&qname, value.as_slice()); + ar + }); + try!(serializer.start_elem(name.clone(), attr_refs)); + } + + for handle in node.children() { + try!(handle.serialize(serializer, IncludeNode)); + } + + if traversal_scope == IncludeNode { + try!(serializer.end_elem(name.clone())); + } + Ok(()) + }, + + (ChildrenOnly, NodeTypeId::Document) => { + for handle in node.children() { + try!(handle.serialize(serializer, IncludeNode)); + } + Ok(()) + }, + + (ChildrenOnly, _) => Ok(()), + + (IncludeNode, NodeTypeId::DocumentType) => { + let doctype: JSRef<DocumentType> = DocumentTypeCast::to_ref(node).unwrap(); + serializer.write_doctype(doctype.name().as_slice()) + }, + + (IncludeNode, NodeTypeId::Text) => { + let text: JSRef<Text> = TextCast::to_ref(node).unwrap(); + let data = text.characterdata().data(); + serializer.write_text(data.as_slice()) + }, + + (IncludeNode, NodeTypeId::Comment) => { + let comment: JSRef<Comment> = CommentCast::to_ref(node).unwrap(); + let data = comment.characterdata().data(); + serializer.write_comment(data.as_slice()) + }, + + (IncludeNode, NodeTypeId::ProcessingInstruction) => { + let pi: JSRef<ProcessingInstruction> = ProcessingInstructionCast::to_ref(node).unwrap(); + let data = pi.characterdata().data(); + serializer.write_processing_instruction(pi.target().as_slice(), + data.as_slice()) + }, + + (IncludeNode, NodeTypeId::DocumentFragment) => Ok(()), + + (IncludeNode, NodeTypeId::Document) => panic!("Can't serialize Document node itself"), + } + } +} + pub fn parse_html(document: JSRef<Document>, input: HTMLInput, url: &Url) { |