diff options
Diffstat (limited to 'components/script/dom/servoparser/mod.rs')
-rw-r--r-- | components/script/dom/servoparser/mod.rs | 1100 |
1 files changed, 920 insertions, 180 deletions
diff --git a/components/script/dom/servoparser/mod.rs b/components/script/dom/servoparser/mod.rs index 3904910d3a3..481b39e25d3 100644 --- a/components/script/dom/servoparser/mod.rs +++ b/components/script/dom/servoparser/mod.rs @@ -1,49 +1,69 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -use document_loader::{DocumentLoader, LoadType}; -use dom::bindings::cell::DOMRefCell; -use dom::bindings::codegen::Bindings::DocumentBinding::{DocumentMethods, DocumentReadyState}; -use dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods; -use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; -use dom::bindings::codegen::Bindings::ServoParserBinding; -use dom::bindings::inheritance::Castable; -use dom::bindings::js::{JS, Root, RootedReference}; -use dom::bindings::refcounted::Trusted; -use dom::bindings::reflector::{Reflector, reflect_dom_object}; -use dom::bindings::str::DOMString; -use dom::characterdata::CharacterData; -use dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument}; -use dom::element::Element; -use dom::globalscope::GlobalScope; -use dom::htmlformelement::HTMLFormElement; -use dom::htmlimageelement::HTMLImageElement; -use dom::htmlscriptelement::{HTMLScriptElement, ScriptResult}; -use dom::node::{Node, NodeSiblingIterator}; -use dom::text::Text; + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use crate::document_loader::{DocumentLoader, LoadType}; +use crate::dom::bindings::cell::DomRefCell; +use crate::dom::bindings::codegen::Bindings::DocumentBinding::{ + DocumentMethods, DocumentReadyState, +}; +use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods; +use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods; +use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; +use crate::dom::bindings::inheritance::Castable; +use crate::dom::bindings::refcounted::Trusted; +use crate::dom::bindings::reflector::{reflect_dom_object, DomObject, Reflector}; +use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom}; +use crate::dom::bindings::settings_stack::is_execution_stack_empty; +use crate::dom::bindings::str::{DOMString, USVString}; +use crate::dom::characterdata::CharacterData; +use crate::dom::comment::Comment; +use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument}; +use crate::dom::documenttype::DocumentType; +use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator}; +use crate::dom::globalscope::GlobalScope; +use crate::dom::htmlformelement::{FormControlElementHelpers, HTMLFormElement}; +use crate::dom::htmlimageelement::HTMLImageElement; +use crate::dom::htmlinputelement::HTMLInputElement; +use crate::dom::htmlscriptelement::{HTMLScriptElement, ScriptResult}; +use crate::dom::htmltemplateelement::HTMLTemplateElement; +use crate::dom::node::{Node, ShadowIncluding}; +use crate::dom::performanceentry::PerformanceEntry; +use crate::dom::performancenavigationtiming::PerformanceNavigationTiming; +use crate::dom::processinginstruction::ProcessingInstruction; +use crate::dom::text::Text; +use crate::dom::virtualmethods::vtable_for; +use crate::network_listener::PreInvoke; +use crate::script_thread::ScriptThread; +use content_security_policy::{self as csp, CspList}; use dom_struct::dom_struct; -use encoding::all::UTF_8; -use encoding::types::{DecoderTrap, Encoding}; -use html5ever::tokenizer::buffer_queue::BufferQueue; -use html5ever::tree_builder::NodeOrText; -use hyper::header::ContentType; -use hyper::mime::{Mime, SubLevel, TopLevel}; +use embedder_traits::resources::{self, Resource}; +use encoding_rs::Encoding; +use html5ever::buffer_queue::BufferQueue; +use html5ever::tendril::fmt::UTF8; +use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink}; +use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink}; +use html5ever::{Attribute, ExpandedName, LocalName, QualName}; use hyper_serde::Serde; +use mime::{self, Mime}; use msg::constellation_msg::PipelineId; use net_traits::{FetchMetadata, FetchResponseListener, Metadata, NetworkError}; -use network_listener::PreInvoke; -use profile_traits::time::{TimerMetadata, TimerMetadataFrameType}; -use profile_traits::time::{TimerMetadataReflowType, ProfilerCategory, profile}; -use script_thread::ScriptThread; +use net_traits::{ResourceFetchTiming, ResourceTimingType}; +use profile_traits::time::{ + profile, ProfilerCategory, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType, +}; use script_traits::DocumentActivity; -use servo_config::resource_files::read_resource_file; +use servo_config::pref; use servo_url::ServoUrl; -use std::ascii::AsciiExt; +use std::borrow::Cow; use std::cell::Cell; use std::mem; +use style::context::QuirksMode as ServoQuirksMode; +use tendril::stream::LossyDecoder; +mod async_html; mod html; +mod prefetch; mod xml; #[dom_struct] @@ -62,25 +82,39 @@ mod xml; pub struct ServoParser { reflector: Reflector, /// The document associated with this parser. - document: JS<Document>, + document: Dom<Document>, + /// The BOM sniffing state. + /// + /// `None` means we've found the BOM, we've found there isn't one, or + /// we're not parsing from a byte stream. `Some` contains the BOM bytes + /// found so far. + bom_sniff: DomRefCell<Option<Vec<u8>>>, + /// The decoder used for the network input. + network_decoder: DomRefCell<Option<NetworkDecoder>>, /// Input received from network. - #[ignore_heap_size_of = "Defined in html5ever"] - network_input: DOMRefCell<BufferQueue>, + #[ignore_malloc_size_of = "Defined in html5ever"] + network_input: DomRefCell<BufferQueue>, /// Input received from script. Used only to support document.write(). - #[ignore_heap_size_of = "Defined in html5ever"] - script_input: DOMRefCell<BufferQueue>, + #[ignore_malloc_size_of = "Defined in html5ever"] + script_input: DomRefCell<BufferQueue>, /// The tokenizer of this parser. - tokenizer: DOMRefCell<Tokenizer>, + tokenizer: DomRefCell<Tokenizer>, /// Whether to expect any further input from the associated network request. last_chunk_received: Cell<bool>, /// Whether this parser should avoid passing any further data to the tokenizer. suspended: Cell<bool>, - /// https://html.spec.whatwg.org/multipage/#script-nesting-level + /// <https://html.spec.whatwg.org/multipage/#script-nesting-level> script_nesting_level: Cell<usize>, - /// https://html.spec.whatwg.org/multipage/#abort-a-parser + /// <https://html.spec.whatwg.org/multipage/#abort-a-parser> aborted: Cell<bool>, - /// https://html.spec.whatwg.org/multipage/#script-created-parser + /// <https://html.spec.whatwg.org/multipage/#script-created-parser> script_created_parser: bool, + /// We do a quick-and-dirty parse of the input looking for resources to prefetch. + // TODO: if we had speculative parsing, we could do this when speculatively + // building the DOM. https://github.com/servo/servo/pull/19203 + prefetch_tokenizer: DomRefCell<prefetch::Tokenizer>, + #[ignore_malloc_size_of = "Defined in html5ever"] + prefetch_input: DomRefCell<BufferQueue>, } #[derive(PartialEq)] @@ -89,56 +123,117 @@ enum LastChunkState { NotReceived, } +pub struct ElementAttribute { + name: QualName, + value: DOMString, +} + +#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)] +pub enum ParsingAlgorithm { + Normal, + Fragment, +} + +impl ElementAttribute { + pub fn new(name: QualName, value: DOMString) -> ElementAttribute { + ElementAttribute { + name: name, + value: value, + } + } +} + impl ServoParser { - pub fn parse_html_document(document: &Document, input: DOMString, url: ServoUrl) { - let parser = ServoParser::new(document, - Tokenizer::Html(self::html::Tokenizer::new(document, url, None)), - LastChunkState::NotReceived, - ParserKind::Normal); - parser.parse_chunk(String::from(input)); + pub fn parser_is_not_active(&self) -> bool { + self.can_write() || self.tokenizer.try_borrow_mut().is_ok() + } + + pub fn parse_html_document(document: &Document, input: Option<DOMString>, url: ServoUrl) { + let parser = if pref!(dom.servoparser.async_html_tokenizer.enabled) { + ServoParser::new( + document, + Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)), + LastChunkState::NotReceived, + ParserKind::Normal, + ) + } else { + ServoParser::new( + document, + Tokenizer::Html(self::html::Tokenizer::new( + document, + url, + None, + ParsingAlgorithm::Normal, + )), + LastChunkState::NotReceived, + ParserKind::Normal, + ) + }; + + // Set as the document's current parser and initialize with `input`, if given. + if let Some(input) = input { + parser.parse_string_chunk(String::from(input)); + } else { + parser.document.set_current_parser(Some(&parser)); + } } // https://html.spec.whatwg.org/multipage/#parsing-html-fragments - pub fn parse_html_fragment(context: &Element, input: DOMString) -> FragmentParsingResult { + pub fn parse_html_fragment( + context: &Element, + input: DOMString, + ) -> impl Iterator<Item = DomRoot<Node>> { let context_node = context.upcast::<Node>(); let context_document = context_node.owner_doc(); let window = context_document.window(); let url = context_document.url(); // Step 1. - let loader = DocumentLoader::new_with_threads(context_document.loader().resource_threads().clone(), - Some(url.clone())); - let document = Document::new(window, - HasBrowsingContext::No, - Some(url.clone()), - context_document.origin().clone(), - IsHTMLDocument::HTMLDocument, - None, - None, - DocumentActivity::Inactive, - DocumentSource::FromParser, - loader, - None, - None); + let loader = DocumentLoader::new_with_threads( + context_document.loader().resource_threads().clone(), + Some(url.clone()), + ); + let document = Document::new( + window, + HasBrowsingContext::No, + Some(url.clone()), + context_document.origin().clone(), + IsHTMLDocument::HTMLDocument, + None, + None, + DocumentActivity::Inactive, + DocumentSource::FromParser, + loader, + None, + None, + Default::default(), + ); // Step 2. document.set_quirks_mode(context_document.quirks_mode()); // Step 11. - let form = context_node.inclusive_ancestors() + let form = context_node + .inclusive_ancestors(ShadowIncluding::No) .find(|element| element.is::<HTMLFormElement>()); + let fragment_context = FragmentContext { context_elem: context_node, - form_elem: form.r(), + form_elem: form.as_deref(), }; - let parser = ServoParser::new(&document, - Tokenizer::Html(self::html::Tokenizer::new(&document, - url.clone(), - Some(fragment_context))), - LastChunkState::Received, - ParserKind::Normal); - parser.parse_chunk(String::from(input)); + let parser = ServoParser::new( + &document, + Tokenizer::Html(self::html::Tokenizer::new( + &document, + url, + Some(fragment_context), + ParsingAlgorithm::Fragment, + )), + LastChunkState::Received, + ParserKind::Normal, + ); + parser.parse_string_chunk(String::from(input)); // Step 14. let root_element = document.GetDocumentElement().expect("no document element"); @@ -147,24 +242,36 @@ impl ServoParser { } } - pub fn parse_html_script_input(document: &Document, url: ServoUrl, type_: &str) { - let parser = ServoParser::new(document, - Tokenizer::Html(self::html::Tokenizer::new(document, url, None)), - LastChunkState::NotReceived, - ParserKind::ScriptCreated); + pub fn parse_html_script_input(document: &Document, url: ServoUrl) { + let parser = ServoParser::new( + document, + Tokenizer::Html(self::html::Tokenizer::new( + document, + url, + None, + ParsingAlgorithm::Normal, + )), + LastChunkState::NotReceived, + ParserKind::ScriptCreated, + ); + *parser.bom_sniff.borrow_mut() = None; document.set_current_parser(Some(&parser)); - if !type_.eq_ignore_ascii_case("text/html") { - parser.parse_chunk("<pre>\n".to_owned()); - parser.tokenizer.borrow_mut().set_plaintext_state(); - } } - pub fn parse_xml_document(document: &Document, input: DOMString, url: ServoUrl) { - let parser = ServoParser::new(document, - Tokenizer::Xml(self::xml::Tokenizer::new(document, url)), - LastChunkState::NotReceived, - ParserKind::Normal); - parser.parse_chunk(String::from(input)); + pub fn parse_xml_document(document: &Document, input: Option<DOMString>, url: ServoUrl) { + let parser = ServoParser::new( + document, + Tokenizer::Xml(self::xml::Tokenizer::new(document, url)), + LastChunkState::NotReceived, + ParserKind::Normal, + ); + + // Set as the document's current parser and initialize with `input`, if given. + if let Some(input) = input { + parser.parse_string_chunk(String::from(input)); + } else { + parser.document.set_current_parser(Some(&parser)); + } } pub fn script_nesting_level(&self) -> usize { @@ -177,7 +284,7 @@ impl ServoParser { /// Corresponds to the latter part of the "Otherwise" branch of the 'An end /// tag whose tag name is "script"' of - /// https://html.spec.whatwg.org/multipage/#parsing-main-incdata + /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata> /// /// This first moves everything from the script input to the beginning of /// the network input, effectively resetting the insertion point to just @@ -189,12 +296,18 @@ impl ServoParser { /// ^ /// insertion point /// ``` - pub fn resume_with_pending_parsing_blocking_script(&self, script: &HTMLScriptElement, result: ScriptResult) { + pub fn resume_with_pending_parsing_blocking_script( + &self, + script: &HTMLScriptElement, + result: ScriptResult, + ) { assert!(self.suspended.get()); self.suspended.set(false); - mem::swap(&mut *self.script_input.borrow_mut(), - &mut *self.network_input.borrow_mut()); + mem::swap( + &mut *self.script_input.borrow_mut(), + &mut *self.network_input.borrow_mut(), + ); while let Some(chunk) = self.script_input.borrow_mut().pop_front() { self.network_input.borrow_mut().push_back(chunk); } @@ -224,7 +337,9 @@ impl ServoParser { // parser is suspended, we just append everything to the // script input and abort these steps. for chunk in text { - self.script_input.borrow_mut().push_back(String::from(chunk).into()); + self.script_input + .borrow_mut() + .push_back(String::from(chunk).into()); } return; } @@ -280,49 +395,133 @@ impl ServoParser { *self.network_input.borrow_mut() = BufferQueue::new(); // Step 2. - self.document.set_ready_state(DocumentReadyState::Interactive); + self.document + .set_ready_state(DocumentReadyState::Interactive); // Step 3. self.tokenizer.borrow_mut().end(); self.document.set_current_parser(None); // Step 4. - self.document.set_ready_state(DocumentReadyState::Interactive); + self.document.set_ready_state(DocumentReadyState::Complete); + } + + // https://html.spec.whatwg.org/multipage/#active-parser + pub fn is_active(&self) -> bool { + self.script_nesting_level() > 0 && !self.aborted.get() } #[allow(unrooted_must_root)] - fn new_inherited(document: &Document, - tokenizer: Tokenizer, - last_chunk_state: LastChunkState, - kind: ParserKind) - -> Self { + fn new_inherited( + document: &Document, + tokenizer: Tokenizer, + last_chunk_state: LastChunkState, + kind: ParserKind, + ) -> Self { ServoParser { reflector: Reflector::new(), - document: JS::from_ref(document), - network_input: DOMRefCell::new(BufferQueue::new()), - script_input: DOMRefCell::new(BufferQueue::new()), - tokenizer: DOMRefCell::new(tokenizer), + document: Dom::from_ref(document), + bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))), + network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))), + network_input: DomRefCell::new(BufferQueue::new()), + script_input: DomRefCell::new(BufferQueue::new()), + tokenizer: DomRefCell::new(tokenizer), last_chunk_received: Cell::new(last_chunk_state == LastChunkState::Received), suspended: Default::default(), script_nesting_level: Default::default(), aborted: Default::default(), script_created_parser: kind == ParserKind::ScriptCreated, + prefetch_tokenizer: DomRefCell::new(prefetch::Tokenizer::new(document)), + prefetch_input: DomRefCell::new(BufferQueue::new()), } } #[allow(unrooted_must_root)] - fn new(document: &Document, - tokenizer: Tokenizer, - last_chunk_state: LastChunkState, - kind: ParserKind) - -> Root<Self> { - reflect_dom_object(box ServoParser::new_inherited(document, tokenizer, last_chunk_state, kind), - document.window(), - ServoParserBinding::Wrap) + fn new( + document: &Document, + tokenizer: Tokenizer, + last_chunk_state: LastChunkState, + kind: ParserKind, + ) -> DomRoot<Self> { + reflect_dom_object( + Box::new(ServoParser::new_inherited( + document, + tokenizer, + last_chunk_state, + kind, + )), + document.window(), + ) + } + + fn push_tendril_input_chunk(&self, chunk: StrTendril) { + if chunk.is_empty() { + return; + } + // Per https://github.com/whatwg/html/issues/1495 + // stylesheets should not be loaded for documents + // without browsing contexts. + // https://github.com/whatwg/html/issues/1495#issuecomment-230334047 + // suggests that no content should be preloaded in such a case. + // We're conservative, and only prefetch for documents + // with browsing contexts. + if self.document.browsing_context().is_some() { + // Push the chunk into the prefetch input stream, + // which is tokenized eagerly, to scan for resources + // to prefetch. If the user script uses `document.write()` + // to overwrite the network input, this prefetching may + // have been wasted, but in most cases it won't. + let mut prefetch_input = self.prefetch_input.borrow_mut(); + prefetch_input.push_back(chunk.clone()); + self.prefetch_tokenizer + .borrow_mut() + .feed(&mut *prefetch_input); + } + // Push the chunk into the network input stream, + // which is tokenized lazily. + self.network_input.borrow_mut().push_back(chunk); + } + + fn push_bytes_input_chunk(&self, chunk: Vec<u8>) { + // BOM sniff. This is needed because NetworkDecoder will switch the + // encoding based on the BOM, but it won't change + // `self.document.encoding` in the process. + { + let mut bom_sniff = self.bom_sniff.borrow_mut(); + if let Some(partial_bom) = bom_sniff.as_mut() { + if partial_bom.len() + chunk.len() >= 3 { + partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied()); + if let Some((encoding, _)) = Encoding::for_bom(&partial_bom) { + self.document.set_encoding(encoding); + } + drop(bom_sniff); + *self.bom_sniff.borrow_mut() = None; + } else { + partial_bom.extend(chunk.iter().copied()); + } + } + } + + // For byte input, we convert it to text using the network decoder. + let chunk = self + .network_decoder + .borrow_mut() + .as_mut() + .unwrap() + .decode(chunk); + self.push_tendril_input_chunk(chunk); } - fn push_input_chunk(&self, chunk: String) { - self.network_input.borrow_mut().push_back(chunk.into()); + fn push_string_input_chunk(&self, chunk: String) { + // If the input is a string, we don't have a BOM. + if self.bom_sniff.borrow().is_some() { + *self.bom_sniff.borrow_mut() = None; + } + + // The input has already been decoded as a string, so doesn't need + // to be decoded by the network decoder again. + let chunk = StrTendril::from(chunk); + self.push_tendril_input_chunk(chunk); } fn parse_sync(&self) { @@ -332,10 +531,16 @@ impl ServoParser { incremental: TimerMetadataReflowType::FirstReflow, }; let profiler_category = self.tokenizer.borrow().profiler_category(); - profile(profiler_category, - Some(metadata), - self.document.window().upcast::<GlobalScope>().time_profiler_chan().clone(), - || self.do_parse_sync()) + profile( + profiler_category, + Some(metadata), + self.document + .window() + .upcast::<GlobalScope>() + .time_profiler_chan() + .clone(), + || self.do_parse_sync(), + ) } fn do_parse_sync(&self) { @@ -344,6 +549,14 @@ impl ServoParser { // This parser will continue to parse while there is either pending input or // the parser remains unsuspended. + if self.last_chunk_received.get() { + if let Some(decoder) = self.network_decoder.borrow_mut().take() { + let chunk = decoder.finish(); + if !chunk.is_empty() { + self.network_input.borrow_mut().push_back(chunk); + } + } + } self.tokenize(|tokenizer| tokenizer.feed(&mut *self.network_input.borrow_mut())); if self.suspended.get() { @@ -357,16 +570,25 @@ impl ServoParser { } } - fn parse_chunk(&self, input: String) { + fn parse_string_chunk(&self, input: String) { self.document.set_current_parser(Some(self)); - self.push_input_chunk(input); + self.push_string_input_chunk(input); + if !self.suspended.get() { + self.parse_sync(); + } + } + + fn parse_bytes_chunk(&self, input: Vec<u8>) { + self.document.set_current_parser(Some(self)); + self.push_bytes_input_chunk(input); if !self.suspended.get() { self.parse_sync(); } } fn tokenize<F>(&self, mut feed: F) - where F: FnMut(&mut Tokenizer) -> Result<(), Root<HTMLScriptElement>>, + where + F: FnMut(&mut Tokenizer) -> Result<(), DomRoot<HTMLScriptElement>>, { loop { assert!(!self.suspended.get()); @@ -378,6 +600,19 @@ impl ServoParser { Err(script) => script, }; + // https://html.spec.whatwg.org/multipage/#parsing-main-incdata + // branch "An end tag whose tag name is "script" + // The spec says to perform the microtask checkpoint before + // setting the insertion mode back from Text, but this is not + // possible with the way servo and html5ever currently + // relate to each other, and hopefully it is not observable. + if is_execution_stack_empty() { + self.document + .window() + .upcast::<GlobalScope>() + .perform_a_microtask_checkpoint(); + } + let script_nesting_level = self.script_nesting_level.get(); self.script_nesting_level.set(script_nesting_level + 1); @@ -388,6 +623,9 @@ impl ServoParser { self.suspended.set(true); return; } + if self.aborted.get() { + return; + } } } @@ -397,9 +635,11 @@ impl ServoParser { assert!(self.last_chunk_received.get()); assert!(self.script_input.borrow().is_empty()); assert!(self.network_input.borrow().is_empty()); + assert!(self.network_decoder.borrow().is_none()); // Step 1. - self.document.set_ready_state(DocumentReadyState::Interactive); + self.document + .set_ready_state(DocumentReadyState::Interactive); // Step 2. self.tokenizer.borrow_mut().end(); @@ -411,40 +651,49 @@ impl ServoParser { } } -pub struct FragmentParsingResult { - inner: NodeSiblingIterator, +struct FragmentParsingResult<I> +where + I: Iterator<Item = DomRoot<Node>>, +{ + inner: I, } -impl Iterator for FragmentParsingResult { - type Item = Root<Node>; +impl<I> Iterator for FragmentParsingResult<I> +where + I: Iterator<Item = DomRoot<Node>>, +{ + type Item = DomRoot<Node>; - fn next(&mut self) -> Option<Root<Node>> { - let next = match self.inner.next() { - Some(next) => next, - None => return None, - }; + fn next(&mut self) -> Option<DomRoot<Node>> { + let next = self.inner.next()?; next.remove_self(); Some(next) } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.inner.size_hint() + } } -#[derive(HeapSizeOf, JSTraceable, PartialEq)] +#[derive(JSTraceable, MallocSizeOf, PartialEq)] enum ParserKind { Normal, ScriptCreated, } -#[derive(HeapSizeOf, JSTraceable)] -#[must_root] +#[derive(JSTraceable, MallocSizeOf)] +#[unrooted_must_root_lint::must_root] enum Tokenizer { Html(self::html::Tokenizer), + AsyncHtml(self::async_html::Tokenizer), Xml(self::xml::Tokenizer), } impl Tokenizer { - fn feed(&mut self, input: &mut BufferQueue) -> Result<(), Root<HTMLScriptElement>> { + fn feed(&mut self, input: &mut BufferQueue) -> Result<(), DomRoot<HTMLScriptElement>> { match *self { Tokenizer::Html(ref mut tokenizer) => tokenizer.feed(input), + Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.feed(input), Tokenizer::Xml(ref mut tokenizer) => tokenizer.feed(input), } } @@ -452,6 +701,7 @@ impl Tokenizer { fn end(&mut self) { match *self { Tokenizer::Html(ref mut tokenizer) => tokenizer.end(), + Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.end(), Tokenizer::Xml(ref mut tokenizer) => tokenizer.end(), } } @@ -459,6 +709,7 @@ impl Tokenizer { fn url(&self) -> &ServoUrl { match *self { Tokenizer::Html(ref tokenizer) => tokenizer.url(), + Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(), Tokenizer::Xml(ref tokenizer) => tokenizer.url(), } } @@ -466,6 +717,7 @@ impl Tokenizer { fn set_plaintext_state(&mut self) { match *self { Tokenizer::Html(ref mut tokenizer) => tokenizer.set_plaintext_state(), + Tokenizer::AsyncHtml(ref mut tokenizer) => tokenizer.set_plaintext_state(), Tokenizer::Xml(_) => unimplemented!(), } } @@ -473,6 +725,7 @@ impl Tokenizer { fn profiler_category(&self) -> ProfilerCategory { match *self { Tokenizer::Html(_) => ProfilerCategory::ScriptParseHTML, + Tokenizer::AsyncHtml(_) => ProfilerCategory::ScriptParseHTML, Tokenizer::Xml(_) => ProfilerCategory::ScriptParseXML, } } @@ -489,6 +742,10 @@ pub struct ParserContext { id: PipelineId, /// The URL for this document. url: ServoUrl, + /// timing data for this resource + resource_timing: ResourceFetchTiming, + /// pushed entry index + pushed_entry_index: Option<usize>, } impl ParserContext { @@ -498,6 +755,8 @@ impl ParserContext { is_synthesized_document: false, id: id, url: url, + resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation), + pushed_entry_index: None, } } } @@ -511,15 +770,13 @@ impl FetchResponseListener for ParserContext { let mut ssl_error = None; let mut network_error = None; let metadata = match meta_result { - Ok(meta) => { - Some(match meta { - FetchMetadata::Unfiltered(m) => m, - FetchMetadata::Filtered { unsafe_, .. } => unsafe_, - }) - }, - Err(NetworkError::SslValidation(url, reason)) => { - ssl_error = Some(reason); - let mut meta = Metadata::default(url); + Ok(meta) => Some(match meta { + FetchMetadata::Unfiltered(m) => m, + FetchMetadata::Filtered { unsafe_, .. } => unsafe_, + }), + Err(NetworkError::SslValidation(reason, cert_bytes)) => { + ssl_error = Some((reason, cert_bytes)); + let mut meta = Metadata::default(self.url.clone()); let mime: Option<Mime> = "text/html".parse().ok(); meta.set_content_type(mime.as_ref()); Some(meta) @@ -533,7 +790,36 @@ impl FetchResponseListener for ParserContext { }, Err(_) => None, }; - let content_type = metadata.clone().and_then(|meta| meta.content_type).map(Serde::into_inner); + let content_type: Option<Mime> = metadata + .clone() + .and_then(|meta| meta.content_type) + .map(Serde::into_inner) + .map(Into::into); + + // https://www.w3.org/TR/CSP/#initialize-document-csp + // TODO: Implement step 1 (local scheme special case) + let csp_list = metadata.as_ref().and_then(|m| { + let h = m.headers.as_ref()?; + let mut csp = h.get_all("content-security-policy").iter(); + // This silently ignores the CSP if it contains invalid Unicode. + // We should probably report an error somewhere. + let c = csp.next().and_then(|c| c.to_str().ok())?; + let mut csp_list = CspList::parse( + c, + csp::PolicySource::Header, + csp::PolicyDisposition::Enforce, + ); + for c in csp { + let c = c.to_str().ok()?; + csp_list.append(CspList::parse( + c, + csp::PolicySource::Header, + csp::PolicyDisposition::Enforce, + )); + } + Some(csp_list) + }); + let parser = match ScriptThread::page_headers_available(&self.id, metadata) { Some(parser) => parser, None => return, @@ -542,61 +828,72 @@ impl FetchResponseListener for ParserContext { return; } + parser.document.set_csp_list(csp_list); + self.parser = Some(Trusted::new(&*parser)); + self.submit_resource_timing(); + match content_type { - Some(ContentType(Mime(TopLevel::Image, _, _))) => { + Some(ref mime) if mime.type_() == mime::IMAGE => { self.is_synthesized_document = true; let page = "<html><body></body></html>".into(); - parser.push_input_chunk(page); + parser.push_string_input_chunk(page); parser.parse_sync(); let doc = &parser.document; - let doc_body = Root::upcast::<Node>(doc.GetBody().unwrap()); + let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap()); let img = HTMLImageElement::new(local_name!("img"), None, doc); - img.SetSrc(DOMString::from(self.url.to_string())); - doc_body.AppendChild(&Root::upcast::<Node>(img)).expect("Appending failed"); - + img.SetSrc(USVString(self.url.to_string())); + doc_body + .AppendChild(&DomRoot::upcast::<Node>(img)) + .expect("Appending failed"); }, - Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => { + Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::PLAIN => { // https://html.spec.whatwg.org/multipage/#read-text let page = "<pre>\n".into(); - parser.push_input_chunk(page); + parser.push_string_input_chunk(page); parser.parse_sync(); parser.tokenizer.borrow_mut().set_plaintext_state(); }, - Some(ContentType(Mime(TopLevel::Text, SubLevel::Html, _))) => { + Some(ref mime) if mime.type_() == mime::TEXT && mime.subtype() == mime::HTML => { // Handle text/html - if let Some(reason) = ssl_error { + if let Some((reason, bytes)) = ssl_error { self.is_synthesized_document = true; - let page_bytes = read_resource_file("badcert.html").unwrap(); - let page = String::from_utf8(page_bytes).unwrap(); + let page = resources::read_string(Resource::BadCertHTML); let page = page.replace("${reason}", &reason); - parser.push_input_chunk(page); + let page = + page.replace("${bytes}", std::str::from_utf8(&bytes).unwrap_or_default()); + let page = + page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string()); + parser.push_string_input_chunk(page); parser.parse_sync(); } if let Some(reason) = network_error { self.is_synthesized_document = true; - let page_bytes = read_resource_file("neterror.html").unwrap(); - let page = String::from_utf8(page_bytes).unwrap(); + let page = resources::read_string(Resource::NetErrorHTML); let page = page.replace("${reason}", &reason); - parser.push_input_chunk(page); + parser.push_string_input_chunk(page); parser.parse_sync(); } }, - Some(ContentType(Mime(TopLevel::Text, SubLevel::Xml, _))) => {}, // Handle text/xml - Some(ContentType(Mime(toplevel, sublevel, _))) => { - if toplevel.as_str() == "application" && sublevel.as_str() == "xhtml+xml" { - // Handle xhtml (application/xhtml+xml). - return; - } - + // Handle text/xml, application/xml + Some(ref mime) + if (mime.type_() == mime::TEXT && mime.subtype() == mime::XML) || + (mime.type_() == mime::APPLICATION && mime.subtype() == mime::XML) => {}, + Some(ref mime) + if mime.type_() == mime::APPLICATION && + mime.subtype().as_str() == "xhtml" && + mime.suffix() == Some(mime::XML) => {}, // Handle xhtml (application/xhtml+xml) + Some(ref mime) => { // Show warning page for unknown mime types. - let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>", - toplevel.as_str(), - sublevel.as_str()); + let page = format!( + "<html><body><p>Unknown content type ({}/{}).</p></body></html>", + mime.type_().as_str(), + mime.subtype().as_str() + ); self.is_synthesized_document = true; - parser.push_input_chunk(page); + parser.push_string_input_chunk(page); parser.parse_sync(); }, None => { @@ -610,8 +907,6 @@ impl FetchResponseListener for ParserContext { if self.is_synthesized_document { return; } - // FIXME: use Vec<u8> (html5ever #34) - let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap(); let parser = match self.parser.as_ref() { Some(parser) => parser.root(), None => return, @@ -619,10 +914,13 @@ impl FetchResponseListener for ParserContext { if parser.aborted.get() { return; } - parser.parse_chunk(data); + parser.parse_bytes_chunk(payload); } - fn process_response_eof(&mut self, status: Result<(), NetworkError>) { + // This method is called via script_thread::handle_fetch_eof, so we must call + // submit_resource_timing in this function + // Resource listeners are called via net_traits::Action::process, which handles submission for them + fn process_response_eof(&mut self, status: Result<ResourceFetchTiming, NetworkError>) { let parser = match self.parser.as_ref() { Some(parser) => parser.root(), None => return, @@ -631,15 +929,61 @@ impl FetchResponseListener for ParserContext { return; } - if let Err(err) = status { + match status { + // are we throwing this away or can we use it? + Ok(_) => (), // TODO(Savago): we should send a notification to callers #5463. - debug!("Failed to load page URL {}, error: {:?}", self.url, err); + Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err), } + parser + .document + .set_redirect_count(self.resource_timing.redirect_count); + parser.last_chunk_received.set(true); if !parser.suspended.get() { parser.parse_sync(); } + + //TODO only update if this is the current document resource + if let Some(pushed_index) = self.pushed_entry_index { + let document = &parser.document; + let performance_entry = + PerformanceNavigationTiming::new(&document.global(), 0, 0, &document); + document + .global() + .performance() + .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>()); + } + } + + fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming { + &mut self.resource_timing + } + + fn resource_timing(&self) -> &ResourceFetchTiming { + &self.resource_timing + } + + // store a PerformanceNavigationTiming entry in the globalscope's Performance buffer + fn submit_resource_timing(&mut self) { + let parser = match self.parser.as_ref() { + Some(parser) => parser.root(), + None => return, + }; + if parser.aborted.get() { + return; + } + + let document = &parser.document; + + //TODO nav_start and nav_start_precise + let performance_entry = + PerformanceNavigationTiming::new(&document.global(), 0, 0, &document); + self.pushed_entry_index = document + .global() + .performance() + .queue_entry(performance_entry.upcast::<PerformanceEntry>()); } } @@ -651,16 +995,33 @@ pub struct FragmentContext<'a> { } #[allow(unrooted_must_root)] -fn insert(parent: &Node, reference_child: Option<&Node>, child: NodeOrText<JS<Node>>) { +fn insert( + parent: &Node, + reference_child: Option<&Node>, + child: NodeOrText<Dom<Node>>, + parsing_algorithm: ParsingAlgorithm, +) { match child { NodeOrText::AppendNode(n) => { + // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element + // applies if this is an element; if not, it may be + // https://html.spec.whatwg.org/multipage/#insert-a-comment + let element_in_non_fragment = + parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>(); + if element_in_non_fragment { + ScriptThread::push_new_element_queue(); + } parent.InsertBefore(&n, reference_child).unwrap(); + if element_in_non_fragment { + ScriptThread::pop_current_element_queue(); + } }, NodeOrText::AppendText(t) => { + // https://html.spec.whatwg.org/multipage/#insert-a-character let text = reference_child .and_then(Node::GetPreviousSibling) .or_else(|| parent.GetLastChild()) - .and_then(Root::downcast::<Text>); + .and_then(DomRoot::downcast::<Text>); if let Some(text) = text { text.upcast::<CharacterData>().append_data(&t); @@ -671,3 +1032,382 @@ fn insert(parent: &Node, reference_child: Option<&Node>, child: NodeOrText<JS<No }, } } + +#[derive(JSTraceable, MallocSizeOf)] +#[unrooted_must_root_lint::must_root] +pub struct Sink { + base_url: ServoUrl, + document: Dom<Document>, + current_line: u64, + script: MutNullableDom<HTMLScriptElement>, + parsing_algorithm: ParsingAlgorithm, +} + +impl Sink { + fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool { + let x = x.downcast::<Element>().expect("Element node expected"); + let y = y.downcast::<Element>().expect("Element node expected"); + + x.is_in_same_home_subtree(y) + } + + fn has_parent_node(&self, node: &Dom<Node>) -> bool { + node.GetParentNode().is_some() + } +} + +#[allow(unrooted_must_root)] // FIXME: really? +impl TreeSink for Sink { + type Output = Self; + fn finish(self) -> Self { + self + } + + type Handle = Dom<Node>; + + fn get_document(&mut self) -> Dom<Node> { + Dom::from_ref(self.document.upcast()) + } + + fn get_template_contents(&mut self, target: &Dom<Node>) -> Dom<Node> { + let template = target + .downcast::<HTMLTemplateElement>() + .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing"); + Dom::from_ref(template.Content().upcast()) + } + + fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool { + x == y + } + + fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> { + let elem = target + .downcast::<Element>() + .expect("tried to get name of non-Element in HTML parsing"); + ExpandedName { + ns: elem.namespace(), + local: elem.local_name(), + } + } + + fn create_element( + &mut self, + name: QualName, + attrs: Vec<Attribute>, + _flags: ElementFlags, + ) -> Dom<Node> { + let attrs = attrs + .into_iter() + .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value)))) + .collect(); + let element = create_element_for_token( + name, + attrs, + &*self.document, + ElementCreator::ParserCreated(self.current_line), + self.parsing_algorithm, + ); + Dom::from_ref(element.upcast()) + } + + fn create_comment(&mut self, text: StrTendril) -> Dom<Node> { + let comment = Comment::new(DOMString::from(String::from(text)), &*self.document); + Dom::from_ref(comment.upcast()) + } + + fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Dom<Node> { + let doc = &*self.document; + let pi = ProcessingInstruction::new( + DOMString::from(String::from(target)), + DOMString::from(String::from(data)), + doc, + ); + Dom::from_ref(pi.upcast()) + } + + fn associate_with_form( + &mut self, + target: &Dom<Node>, + form: &Dom<Node>, + nodes: (&Dom<Node>, Option<&Dom<Node>>), + ) { + let (element, prev_element) = nodes; + let tree_node = prev_element.map_or(element, |prev| { + if self.has_parent_node(element) { + element + } else { + prev + } + }); + if !self.same_tree(tree_node, form) { + return; + } + + let node = target; + let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form)) + .expect("Owner must be a form element"); + + let elem = node.downcast::<Element>(); + let control = elem.and_then(|e| e.as_maybe_form_control()); + + if let Some(control) = control { + control.set_form_owner_from_parser(&form); + } else { + // TODO remove this code when keygen is implemented. + assert_eq!( + node.NodeName(), + "KEYGEN", + "Unknown form-associatable element" + ); + } + } + + fn append_before_sibling(&mut self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) { + let parent = sibling + .GetParentNode() + .expect("append_before_sibling called on node without parent"); + + insert(&parent, Some(&*sibling), new_node, self.parsing_algorithm); + } + + fn parse_error(&mut self, msg: Cow<'static, str>) { + debug!("Parse error: {}", msg); + } + + fn set_quirks_mode(&mut self, mode: QuirksMode) { + let mode = match mode { + QuirksMode::Quirks => ServoQuirksMode::Quirks, + QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks, + QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks, + }; + self.document.set_quirks_mode(mode); + } + + fn append(&mut self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) { + insert(&parent, None, child, self.parsing_algorithm); + } + + fn append_based_on_parent_node( + &mut self, + elem: &Dom<Node>, + prev_elem: &Dom<Node>, + child: NodeOrText<Dom<Node>>, + ) { + if self.has_parent_node(elem) { + self.append_before_sibling(elem, child); + } else { + self.append(prev_elem, child); + } + } + + fn append_doctype_to_document( + &mut self, + name: StrTendril, + public_id: StrTendril, + system_id: StrTendril, + ) { + let doc = &*self.document; + let doctype = DocumentType::new( + DOMString::from(String::from(name)), + Some(DOMString::from(String::from(public_id))), + Some(DOMString::from(String::from(system_id))), + doc, + ); + doc.upcast::<Node>() + .AppendChild(doctype.upcast()) + .expect("Appending failed"); + } + + fn add_attrs_if_missing(&mut self, target: &Dom<Node>, attrs: Vec<Attribute>) { + let elem = target + .downcast::<Element>() + .expect("tried to set attrs on non-Element in HTML parsing"); + for attr in attrs { + elem.set_attribute_from_parser( + attr.name, + DOMString::from(String::from(attr.value)), + None, + ); + } + } + + fn remove_from_parent(&mut self, target: &Dom<Node>) { + if let Some(ref parent) = target.GetParentNode() { + parent.RemoveChild(&*target).unwrap(); + } + } + + fn mark_script_already_started(&mut self, node: &Dom<Node>) { + let script = node.downcast::<HTMLScriptElement>(); + script.map(|script| script.set_already_started(true)); + } + + fn complete_script(&mut self, node: &Dom<Node>) -> NextParserState { + if let Some(script) = node.downcast() { + self.script.set(Some(script)); + NextParserState::Suspend + } else { + NextParserState::Continue + } + } + + fn reparent_children(&mut self, node: &Dom<Node>, new_parent: &Dom<Node>) { + while let Some(ref child) = node.GetFirstChild() { + new_parent.AppendChild(&child).unwrap(); + } + } + + /// <https://html.spec.whatwg.org/multipage/#html-integration-point> + /// Specifically, the <annotation-xml> cases. + fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool { + let elem = handle.downcast::<Element>().unwrap(); + elem.get_attribute(&ns!(), &local_name!("encoding")) + .map_or(false, |attr| { + attr.value().eq_ignore_ascii_case("text/html") || + attr.value().eq_ignore_ascii_case("application/xhtml+xml") + }) + } + + fn set_current_line(&mut self, line_number: u64) { + self.current_line = line_number; + } + + fn pop(&mut self, node: &Dom<Node>) { + let node = DomRoot::from_ref(&**node); + vtable_for(&node).pop(); + } +} + +/// https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token +fn create_element_for_token( + name: QualName, + attrs: Vec<ElementAttribute>, + document: &Document, + creator: ElementCreator, + parsing_algorithm: ParsingAlgorithm, +) -> DomRoot<Element> { + // Step 3. + let is = attrs + .iter() + .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is")) + .map(|attr| LocalName::from(&*attr.value)); + + // Step 4. + let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref()); + + // Step 5. + let will_execute_script = + definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment; + + // Step 6. + if will_execute_script { + // Step 6.1. + document.increment_throw_on_dynamic_markup_insertion_counter(); + // Step 6.2 + if is_execution_stack_empty() { + document + .window() + .upcast::<GlobalScope>() + .perform_a_microtask_checkpoint(); + } + // Step 6.3 + ScriptThread::push_new_element_queue() + } + + // Step 7. + let creation_mode = if will_execute_script { + CustomElementCreationMode::Synchronous + } else { + CustomElementCreationMode::Asynchronous + }; + + let element = Element::create(name, is, document, creator, creation_mode); + + // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3 + // says to invoke sanitization "when an input element is first created"; + // however, since sanitization requires content attributes to function, + // it can't mean that literally. + // Indeed, to make sanitization work correctly, we need to _not_ sanitize + // until after all content attributes have been added + + let maybe_input = element.downcast::<HTMLInputElement>(); + if let Some(input) = maybe_input { + input.disable_sanitization(); + } + + // Step 8 + for attr in attrs { + element.set_attribute_from_parser(attr.name, attr.value, None); + } + + // _now_ we can sanitize (and we sanitize now even if the "value" + // attribute isn't present!) + if let Some(input) = maybe_input { + input.enable_sanitization(); + } + + // Step 9. + if will_execute_script { + // Steps 9.1 - 9.2. + ScriptThread::pop_current_element_queue(); + // Step 9.3. + document.decrement_throw_on_dynamic_markup_insertion_counter(); + } + + // TODO: Step 10. + // TODO: Step 11. + + // Step 12 is handled in `associate_with_form`. + + // Step 13. + element +} + +#[derive(JSTraceable, MallocSizeOf)] +struct NetworkDecoder { + #[ignore_malloc_size_of = "Defined in tendril"] + decoder: LossyDecoder<NetworkSink>, +} + +impl NetworkDecoder { + fn new(encoding: &'static Encoding) -> Self { + Self { + decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()), + } + } + + fn decode(&mut self, chunk: Vec<u8>) -> StrTendril { + self.decoder.process(ByteTendril::from(&*chunk)); + mem::replace( + &mut self.decoder.inner_sink_mut().output, + Default::default(), + ) + } + + fn finish(self) -> StrTendril { + self.decoder.finish() + } +} + +#[derive(Default, JSTraceable)] +struct NetworkSink { + output: StrTendril, +} + +impl TendrilSink<UTF8> for NetworkSink { + type Output = StrTendril; + + fn process(&mut self, t: StrTendril) { + if self.output.is_empty() { + self.output = t; + } else { + self.output.push_tendril(&t); + } + } + + fn error(&mut self, _desc: Cow<'static, str>) {} + + fn finish(self) -> Self::Output { + self.output + } +} |