/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ #![allow(crown::unrooted_must_root)] use std::io; use html5ever::buffer_queue::BufferQueue; use html5ever::serialize::TraversalScope::IncludeNode; use html5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; use html5ever::tokenizer::{Tokenizer as HtmlTokenizer, TokenizerOpts, TokenizerResult}; use html5ever::tree_builder::{Tracer as HtmlTracer, TreeBuilder, TreeBuilderOpts}; use html5ever::QualName; use js::jsapi::JSTracer; use servo_url::ServoUrl; use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods; use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId}; use crate::dom::bindings::root::{Dom, DomRoot}; use crate::dom::bindings::trace::{CustomTraceable, JSTraceable}; use crate::dom::characterdata::CharacterData; use crate::dom::document::Document; use crate::dom::documentfragment::DocumentFragment; use crate::dom::documenttype::DocumentType; use crate::dom::element::Element; use crate::dom::htmlscriptelement::HTMLScriptElement; use crate::dom::htmltemplateelement::HTMLTemplateElement; use crate::dom::node::Node; use crate::dom::processinginstruction::ProcessingInstruction; use crate::dom::servoparser::{ParsingAlgorithm, Sink}; #[derive(JSTraceable, MallocSizeOf)] #[crown::unrooted_must_root_lint::must_root] pub struct Tokenizer { #[ignore_malloc_size_of = "Defined in html5ever"] inner: HtmlTokenizer, Sink>>, } impl Tokenizer { pub fn new( document: &Document, url: ServoUrl, fragment_context: Option, parsing_algorithm: ParsingAlgorithm, ) -> Self { let sink = Sink { base_url: url, document: Dom::from_ref(document), current_line: 1, script: Default::default(), parsing_algorithm, }; let options = TreeBuilderOpts { ignore_missing_rules: true, ..Default::default() }; let inner = if let Some(fc) = fragment_context { let tb = TreeBuilder::new_for_fragment( sink, Dom::from_ref(fc.context_elem), fc.form_elem.map(Dom::from_ref), options, ); let tok_options = TokenizerOpts { initial_state: Some(tb.tokenizer_state_for_context_elem()), ..Default::default() }; HtmlTokenizer::new(tb, tok_options) } else { HtmlTokenizer::new(TreeBuilder::new(sink, options), Default::default()) }; Tokenizer { inner } } pub fn feed(&mut self, input: &mut BufferQueue) -> TokenizerResult> { match self.inner.feed(input) { TokenizerResult::Done => TokenizerResult::Done, TokenizerResult::Script(script) => { TokenizerResult::Script(DomRoot::from_ref(script.downcast().unwrap())) }, } } pub fn end(&mut self) { self.inner.end(); } pub fn url(&self) -> &ServoUrl { &self.inner.sink.sink.base_url } pub fn set_plaintext_state(&mut self) { self.inner.set_plaintext_state(); } } #[allow(unsafe_code)] unsafe impl CustomTraceable for HtmlTokenizer, Sink>> { unsafe fn trace(&self, trc: *mut JSTracer) { struct Tracer(*mut JSTracer); let tracer = Tracer(trc); impl HtmlTracer for Tracer { type Handle = Dom; #[allow(crown::unrooted_must_root)] fn trace_handle(&self, node: &Dom) { unsafe { node.trace(self.0); } } } let tree_builder = &self.sink; tree_builder.trace_handles(&tracer); tree_builder.sink.trace(trc); } } fn start_element(node: &Element, serializer: &mut S) -> io::Result<()> { let name = QualName::new(None, node.namespace().clone(), node.local_name().clone()); let attrs = node .attrs() .iter() .map(|attr| { let qname = QualName::new(None, attr.namespace().clone(), attr.local_name().clone()); let value = attr.value().clone(); (qname, value) }) .collect::>(); let attr_refs = attrs.iter().map(|(qname, value)| { let ar: AttrRef = (&qname, &**value); ar }); serializer.start_elem(name, attr_refs)?; Ok(()) } fn end_element(node: &Element, serializer: &mut S) -> io::Result<()> { let name = QualName::new(None, node.namespace().clone(), node.local_name().clone()); serializer.end_elem(name) } enum SerializationCommand { OpenElement(DomRoot), CloseElement(DomRoot), SerializeNonelement(DomRoot), } struct SerializationIterator { stack: Vec, } fn rev_children_iter(n: &Node) -> impl Iterator> { if n.downcast::().map_or(false, |e| e.is_void()) { return Node::new_document_node().rev_children(); } match n.downcast::() { Some(t) => t.Content().upcast::().rev_children(), None => n.rev_children(), } } impl SerializationIterator { fn new(node: &Node, skip_first: bool) -> SerializationIterator { let mut ret = SerializationIterator { stack: vec![] }; if skip_first || node.is::() || node.is::() { for c in rev_children_iter(node) { ret.push_node(&c); } } else { ret.push_node(node); } ret } fn push_node(&mut self, n: &Node) { match n.downcast::() { Some(e) => self .stack .push(SerializationCommand::OpenElement(DomRoot::from_ref(e))), None => self.stack.push(SerializationCommand::SerializeNonelement( DomRoot::from_ref(n), )), } } } impl Iterator for SerializationIterator { type Item = SerializationCommand; fn next(&mut self) -> Option { let res = self.stack.pop(); if let Some(SerializationCommand::OpenElement(ref e)) = res { self.stack .push(SerializationCommand::CloseElement(e.clone())); for c in rev_children_iter(e.upcast::()) { self.push_node(&c); } } res } } impl<'a> Serialize for &'a Node { fn serialize( &self, serializer: &mut S, traversal_scope: TraversalScope, ) -> io::Result<()> { let node = *self; let iter = SerializationIterator::new(node, traversal_scope != IncludeNode); for cmd in iter { match cmd { SerializationCommand::OpenElement(n) => { start_element(&n, serializer)?; }, SerializationCommand::CloseElement(n) => { end_element(&n, serializer)?; }, SerializationCommand::SerializeNonelement(n) => match n.type_id() { NodeTypeId::DocumentType => { let doctype = n.downcast::().unwrap(); serializer.write_doctype(doctype.name())?; }, NodeTypeId::CharacterData(CharacterDataTypeId::Text(_)) => { let cdata = n.downcast::().unwrap(); serializer.write_text(&cdata.data())?; }, NodeTypeId::CharacterData(CharacterDataTypeId::Comment) => { let cdata = n.downcast::().unwrap(); serializer.write_comment(&cdata.data())?; }, NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) => { let pi = n.downcast::().unwrap(); let data = pi.upcast::().data(); serializer.write_processing_instruction(pi.target(), &data)?; }, NodeTypeId::DocumentFragment(_) => {}, NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"), NodeTypeId::Element(_) => panic!("Element shouldn't appear here"), NodeTypeId::Attr => panic!("Attr shouldn't appear here"), }, } } Ok(()) } }