/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use document_loader::LoadType; use dom::bindings::cell::DOMRefCell; use dom::bindings::codegen::Bindings::DocumentBinding::DocumentMethods; use dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods; use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; use dom::bindings::codegen::Bindings::ServoParserBinding; use dom::bindings::inheritance::Castable; use dom::bindings::js::{JS, Root}; use dom::bindings::refcounted::Trusted; use dom::bindings::reflector::{Reflector, reflect_dom_object}; use dom::bindings::str::DOMString; use dom::bindings::trace::JSTraceable; use dom::document::Document; use dom::globalscope::GlobalScope; use dom::htmlimageelement::HTMLImageElement; use dom::node::Node; use encoding::all::UTF_8; use encoding::types::{DecoderTrap, Encoding}; use html5ever::tokenizer::Tokenizer as HtmlTokenizer; use html5ever::tree_builder::Tracer as HtmlTracer; use html5ever::tree_builder::TreeBuilder as HtmlTreeBuilder; use hyper::header::ContentType; use hyper::mime::{Mime, SubLevel, TopLevel}; use hyper_serde::Serde; use js::jsapi::JSTracer; use msg::constellation_msg::PipelineId; use net_traits::{AsyncResponseListener, Metadata, NetworkError}; use network_listener::PreInvoke; use profile_traits::time::{TimerMetadata, TimerMetadataFrameType}; use profile_traits::time::{TimerMetadataReflowType, ProfilerCategory, profile}; use script_thread::ScriptThread; use std::cell::Cell; use std::collections::VecDeque; use url::Url; use util::resource_files::read_resource_file; use xml5ever::tokenizer::XmlTokenizer; use xml5ever::tree_builder::{Tracer as XmlTracer, XmlTreeBuilder}; pub mod html; pub mod xml; #[dom_struct] pub struct ServoParser { reflector: Reflector, /// The document associated with this parser. document: JS, /// The pipeline associated with this parse, unavailable if this parse /// does not correspond to a page load. pipeline: Option, /// Input chunks received but not yet passed to the parser. pending_input: DOMRefCell>, /// The tokenizer of this parser. tokenizer: DOMRefCell, /// Whether to expect any further input from the associated network request. last_chunk_received: Cell, /// Whether this parser should avoid passing any further data to the tokenizer. suspended: Cell, } #[derive(PartialEq)] enum LastChunkState { Received, NotReceived, } impl ServoParser { #[allow(unrooted_must_root)] fn new_inherited( document: &Document, pipeline: Option, tokenizer: Tokenizer, last_chunk_state: LastChunkState) -> Self { ServoParser { reflector: Reflector::new(), document: JS::from_ref(document), pipeline: pipeline, pending_input: DOMRefCell::new(VecDeque::new()), tokenizer: DOMRefCell::new(tokenizer), last_chunk_received: Cell::new(last_chunk_state == LastChunkState::Received), suspended: Default::default(), } } #[allow(unrooted_must_root)] fn new( document: &Document, pipeline: Option, tokenizer: Tokenizer, last_chunk_state: LastChunkState) -> Root { reflect_dom_object( box ServoParser::new_inherited(document, pipeline, tokenizer, last_chunk_state), document.window(), ServoParserBinding::Wrap) } pub fn document(&self) -> &Document { &self.document } pub fn pipeline(&self) -> Option { self.pipeline } fn has_pending_input(&self) -> bool { !self.pending_input.borrow().is_empty() } fn push_input_chunk(&self, chunk: String) { self.pending_input.borrow_mut().push_back(chunk); } fn take_next_input_chunk(&self) -> Option { let mut pending_input = self.pending_input.borrow_mut(); if pending_input.is_empty() { None } else { pending_input.pop_front() } } fn last_chunk_received(&self) -> bool { self.last_chunk_received.get() } fn mark_last_chunk_received(&self) { self.last_chunk_received.set(true) } fn set_plaintext_state(&self) { self.tokenizer.borrow_mut().set_plaintext_state() } pub fn end_tokenizer(&self) { self.tokenizer.borrow_mut().end() } pub fn suspend(&self) { assert!(!self.suspended.get()); self.suspended.set(true); } pub fn resume(&self) { assert!(self.suspended.get()); self.suspended.set(false); self.parse_sync(); } pub fn is_suspended(&self) -> bool { self.suspended.get() } fn parse_sync(&self) { let metadata = TimerMetadata { url: self.document().url().as_str().into(), iframe: TimerMetadataFrameType::RootWindow, incremental: TimerMetadataReflowType::FirstReflow, }; let profiler_category = self.tokenizer.borrow().profiler_category(); profile(profiler_category, Some(metadata), self.document().window().upcast::().time_profiler_chan().clone(), || self.do_parse_sync()) } fn do_parse_sync(&self) { // This parser will continue to parse while there is either pending input or // the parser remains unsuspended. loop { self.document().reflow_if_reflow_timer_expired(); if let Some(chunk) = self.take_next_input_chunk() { self.tokenizer.borrow_mut().feed(chunk); } else { self.tokenizer.borrow_mut().run(); } // Document parsing is blocked on an external resource. if self.suspended.get() { return; } if !self.has_pending_input() { break; } } if self.last_chunk_received() { self.finish(); } } fn parse_chunk(&self, input: String) { self.document().set_current_parser(Some(self)); self.push_input_chunk(input); if !self.is_suspended() { self.parse_sync(); } } fn finish(&self) { assert!(!self.suspended.get()); assert!(!self.has_pending_input()); self.tokenizer.borrow_mut().end(); debug!("finished parsing"); self.document().set_current_parser(None); if let Some(pipeline) = self.pipeline() { ScriptThread::parsing_complete(pipeline); } } } #[derive(HeapSizeOf)] #[must_root] enum Tokenizer { HTML( #[ignore_heap_size_of = "Defined in html5ever"] HtmlTokenizer, Sink>> ), XML( #[ignore_heap_size_of = "Defined in xml5ever"] XmlTokenizer, Sink>> ), } #[derive(JSTraceable, HeapSizeOf)] #[must_root] struct Sink { pub base_url: Url, pub document: JS, } impl Tokenizer { fn feed(&mut self, input: String) { match *self { Tokenizer::HTML(ref mut tokenizer) => tokenizer.feed(input.into()), Tokenizer::XML(ref mut tokenizer) => tokenizer.feed(input.into()), } } fn run(&mut self) { match *self { Tokenizer::HTML(ref mut tokenizer) => tokenizer.run(), Tokenizer::XML(ref mut tokenizer) => tokenizer.run(), } } fn end(&mut self) { match *self { Tokenizer::HTML(ref mut tokenizer) => tokenizer.end(), Tokenizer::XML(ref mut tokenizer) => tokenizer.end(), } } fn set_plaintext_state(&mut self) { match *self { Tokenizer::HTML(ref mut tokenizer) => tokenizer.set_plaintext_state(), Tokenizer::XML(_) => { /* todo */ }, } } fn profiler_category(&self) -> ProfilerCategory { match *self { Tokenizer::HTML(_) => ProfilerCategory::ScriptParseHTML, Tokenizer::XML(_) => ProfilerCategory::ScriptParseXML, } } } impl JSTraceable for Tokenizer { fn trace(&self, trc: *mut JSTracer) { struct Tracer(*mut JSTracer); let tracer = Tracer(trc); match *self { Tokenizer::HTML(ref tokenizer) => { impl HtmlTracer for Tracer { type Handle = JS; #[allow(unrooted_must_root)] fn trace_handle(&self, node: &JS) { node.trace(self.0); } } let tree_builder = tokenizer.sink(); tree_builder.trace_handles(&tracer); tree_builder.sink().trace(trc); }, Tokenizer::XML(ref tokenizer) => { impl XmlTracer for Tracer { type Handle = JS; #[allow(unrooted_must_root)] fn trace_handle(&self, node: JS) { node.trace(self.0); } } let tree_builder = tokenizer.sink(); tree_builder.trace_handles(&tracer); tree_builder.sink().trace(trc); } } } } /// The context required for asynchronously fetching a document /// and parsing it progressively. pub struct ParserContext { /// The parser that initiated the request. parser: Option>, /// Is this a synthesized document is_synthesized_document: bool, /// The pipeline associated with this document. id: PipelineId, /// The URL for this document. url: Url, } impl ParserContext { pub fn new(id: PipelineId, url: Url) -> ParserContext { ParserContext { parser: None, is_synthesized_document: false, id: id, url: url, } } } impl AsyncResponseListener for ParserContext { fn headers_available(&mut self, meta_result: Result) { let mut ssl_error = None; let metadata = match meta_result { Ok(meta) => Some(meta), Err(NetworkError::SslValidation(url, reason)) => { ssl_error = Some(reason); let mut meta = Metadata::default(url); let mime: Option = "text/html".parse().ok(); meta.set_content_type(mime.as_ref()); Some(meta) }, Err(_) => None, }; let content_type = metadata.clone().and_then(|meta| meta.content_type).map(Serde::into_inner); let parser = match ScriptThread::page_headers_available(&self.id, metadata) { Some(parser) => parser, None => return, }; self.parser = Some(Trusted::new(&*parser)); match content_type { Some(ContentType(Mime(TopLevel::Image, _, _))) => { self.is_synthesized_document = true; let page = "".into(); parser.push_input_chunk(page); parser.parse_sync(); let doc = parser.document(); let doc_body = Root::upcast::(doc.GetBody().unwrap()); let img = HTMLImageElement::new(atom!("img"), None, doc); img.SetSrc(DOMString::from(self.url.to_string())); doc_body.AppendChild(&Root::upcast::(img)).expect("Appending failed"); }, Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => { // https://html.spec.whatwg.org/multipage/#read-text let page = "
\n".into();
                parser.push_input_chunk(page);
                parser.parse_sync();
                parser.set_plaintext_state();
            },
            Some(ContentType(Mime(TopLevel::Text, SubLevel::Html, _))) => { // Handle text/html
                if let Some(reason) = ssl_error {
                    self.is_synthesized_document = true;
                    let page_bytes = read_resource_file("badcert.html").unwrap();
                    let page = String::from_utf8(page_bytes).unwrap();
                    let page = page.replace("${reason}", &reason);
                    parser.push_input_chunk(page);
                    parser.parse_sync();
                }
            },
            Some(ContentType(Mime(TopLevel::Text, SubLevel::Xml, _))) => {}, // Handle text/xml
            Some(ContentType(Mime(toplevel, sublevel, _))) => {
                if toplevel.as_str() == "application" && sublevel.as_str() == "xhtml+xml" {
                    // Handle xhtml (application/xhtml+xml).
                    return;
                }

                // Show warning page for unknown mime types.
                let page = format!("

Unknown content type ({}/{}).

", toplevel.as_str(), sublevel.as_str()); self.is_synthesized_document = true; parser.push_input_chunk(page); parser.parse_sync(); }, None => { // No content-type header. // Merge with #4212 when fixed. } } } fn data_available(&mut self, payload: Vec) { if !self.is_synthesized_document { // FIXME: use Vec (html5ever #34) let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap(); let parser = match self.parser.as_ref() { Some(parser) => parser.root(), None => return, }; parser.parse_chunk(data); } } fn response_complete(&mut self, status: Result<(), NetworkError>) { let parser = match self.parser.as_ref() { Some(parser) => parser.root(), None => return, }; if let Err(NetworkError::Internal(ref reason)) = status { // Show an error page for network errors, // certificate errors are handled earlier. self.is_synthesized_document = true; let page_bytes = read_resource_file("neterror.html").unwrap(); let page = String::from_utf8(page_bytes).unwrap(); let page = page.replace("${reason}", reason); parser.push_input_chunk(page); parser.parse_sync(); } else if let Err(err) = status { // TODO(Savago): we should send a notification to callers #5463. debug!("Failed to load page URL {}, error: {:?}", self.url, err); } parser.document() .finish_load(LoadType::PageSource(self.url.clone())); parser.mark_last_chunk_received(); if !parser.is_suspended() { parser.parse_sync(); } } } impl PreInvoke for ParserContext {}