diff options
author | Simon Sapin <simon.sapin@exyr.org> | 2017-10-31 19:06:34 +0100 |
---|---|---|
committer | Simon Sapin <simon.sapin@exyr.org> | 2017-11-01 10:16:11 +0100 |
commit | a3971eb686503641a0e9cc64f4844a6abdd5cda1 (patch) | |
tree | f443a6e5182bfb8c2b09afae3bbb95c4df3a7f0e /components/script | |
parent | 3c36a36cc942a6cce52d4575ad6c26bbde6e4bd7 (diff) | |
download | servo-a3971eb686503641a0e9cc64f4844a6abdd5cda1.tar.gz servo-a3971eb686503641a0e9cc64f4844a6abdd5cda1.zip |
Replace rust-encoding with encoding-rs
Diffstat (limited to 'components/script')
-rw-r--r-- | components/script/Cargo.toml | 1 | ||||
-rw-r--r-- | components/script/dom/bindings/trace.rs | 4 | ||||
-rw-r--r-- | components/script/dom/document.rs | 38 | ||||
-rw-r--r-- | components/script/dom/filereader.rs | 14 | ||||
-rwxr-xr-x | components/script/dom/htmlformelement.rs | 29 | ||||
-rw-r--r-- | components/script/dom/htmlscriptelement.rs | 13 | ||||
-rw-r--r-- | components/script/dom/textdecoder.rs | 40 | ||||
-rw-r--r-- | components/script/dom/xmlhttprequest.rs | 47 | ||||
-rw-r--r-- | components/script/lib.rs | 1 |
9 files changed, 77 insertions, 110 deletions
diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 1013a3adfe2..299a25f88f6 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -39,7 +39,6 @@ deny_public_fields = {path = "../deny_public_fields"} devtools_traits = {path = "../devtools_traits"} dom_struct = {path = "../dom_struct"} domobject_derive = {path = "../domobject_derive"} -encoding = "0.2" encoding_rs = "0.7" euclid = "0.15" fnv = "1.0" diff --git a/components/script/dom/bindings/trace.rs b/components/script/dom/bindings/trace.rs index f65fcbeca2f..affc70401ed 100644 --- a/components/script/dom/bindings/trace.rs +++ b/components/script/dom/bindings/trace.rs @@ -46,7 +46,7 @@ use dom::bindings::root::{Dom, DomRoot}; use dom::bindings::str::{DOMString, USVString}; use dom::bindings::utils::WindowProxyHandler; use dom::document::PendingRestyle; -use encoding::types::EncodingRef; +use encoding_rs::Encoding; use euclid::{Transform2D, Transform3D, Point2D, Vector2D, Rect, TypedSize2D, ScaleFactor}; use euclid::Length as EuclidLength; use html5ever::{Prefix, LocalName, Namespace, QualName}; @@ -122,7 +122,7 @@ pub unsafe trait JSTraceable { unsafe_no_jsmanaged_fields!(CSSError); -unsafe_no_jsmanaged_fields!(EncodingRef); +unsafe_no_jsmanaged_fields!(&'static Encoding); unsafe_no_jsmanaged_fields!(Reflector); diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs index bf7d82bf0f8..0cb0bf2def5 100644 --- a/components/script/dom/document.rs +++ b/components/script/dom/document.rs @@ -90,8 +90,7 @@ use dom::webglcontextevent::WebGLContextEvent; use dom::window::{ReflowReason, Window}; use dom::windowproxy::WindowProxy; use dom_struct::dom_struct; -use encoding::EncodingRef; -use encoding::all::UTF_8; +use encoding_rs::{Encoding, UTF_8}; use euclid::Point2D; use html5ever::{LocalName, Namespace, QualName}; use hyper::header::{Header, SetCookie}; @@ -240,7 +239,7 @@ pub struct Document { implementation: MutNullableDom<DOMImplementation>, content_type: DOMString, last_modified: Option<String>, - encoding: Cell<EncodingRef>, + encoding: Cell<&'static Encoding>, has_browsing_context: bool, is_html_document: bool, activity: Cell<DocumentActivity>, @@ -576,11 +575,11 @@ impl Document { } } - pub fn encoding(&self) -> EncodingRef { + pub fn encoding(&self) -> &'static Encoding { self.encoding.get() } - pub fn set_encoding(&self, encoding: EncodingRef) { + pub fn set_encoding(&self, encoding: &'static Encoding) { self.encoding.set(encoding); } @@ -2828,34 +2827,7 @@ impl DocumentMethods for Document { // https://dom.spec.whatwg.org/#dom-document-characterset fn CharacterSet(&self) -> DOMString { - DOMString::from(match self.encoding.get().name() { - "utf-8" => "UTF-8", - "ibm866" => "IBM866", - "iso-8859-2" => "ISO-8859-2", - "iso-8859-3" => "ISO-8859-3", - "iso-8859-4" => "ISO-8859-4", - "iso-8859-5" => "ISO-8859-5", - "iso-8859-6" => "ISO-8859-6", - "iso-8859-7" => "ISO-8859-7", - "iso-8859-8" => "ISO-8859-8", - "iso-8859-8-i" => "ISO-8859-8-I", - "iso-8859-10" => "ISO-8859-10", - "iso-8859-13" => "ISO-8859-13", - "iso-8859-14" => "ISO-8859-14", - "iso-8859-15" => "ISO-8859-15", - "iso-8859-16" => "ISO-8859-16", - "koi8-r" => "KOI8-R", - "koi8-u" => "KOI8-U", - "gbk" => "GBK", - "big5" => "Big5", - "euc-jp" => "EUC-JP", - "iso-2022-jp" => "ISO-2022-JP", - "shift_jis" => "Shift_JIS", - "euc-kr" => "EUC-KR", - "utf-16be" => "UTF-16BE", - "utf-16le" => "UTF-16LE", - name => name - }) + DOMString::from(self.encoding.get().name()) } // https://dom.spec.whatwg.org/#dom-document-charset diff --git a/components/script/dom/filereader.rs b/components/script/dom/filereader.rs index db5518116ef..7a2674f293a 100644 --- a/components/script/dom/filereader.rs +++ b/components/script/dom/filereader.rs @@ -21,9 +21,7 @@ use dom::eventtarget::EventTarget; use dom::globalscope::GlobalScope; use dom::progressevent::ProgressEvent; use dom_struct::dom_struct; -use encoding::all::UTF_8; -use encoding::label::encoding_from_whatwg_label; -use encoding::types::{DecoderTrap, EncodingRef}; +use encoding_rs::{Encoding, UTF_8}; use hyper::mime::{Attr, Mime}; use js::jsapi::Heap; use js::jsapi::JSAutoCompartment; @@ -223,8 +221,8 @@ impl FileReader { //https://w3c.github.io/FileAPI/#encoding-determination // Steps 1 & 2 & 3 let mut encoding = blob_label.as_ref() - .map(|string| &**string) - .and_then(encoding_from_whatwg_label); + .map(|string| string.as_bytes()) + .and_then(Encoding::for_label); // Step 4 & 5 encoding = encoding.or_else(|| { @@ -232,16 +230,16 @@ impl FileReader { resultmime.and_then(|Mime(_, _, ref parameters)| { parameters.iter() .find(|&&(ref k, _)| &Attr::Charset == k) - .and_then(|&(_, ref v)| encoding_from_whatwg_label(&v.to_string())) + .and_then(|&(_, ref v)| Encoding::for_label(v.as_str().as_bytes())) }) }); // Step 6 - let enc = encoding.unwrap_or(UTF_8 as EncodingRef); + let enc = encoding.unwrap_or(UTF_8); let convert = blob_bytes; // Step 7 - let output = enc.decode(convert, DecoderTrap::Replace).unwrap(); + let (output, _, _) = enc.decode(convert); *result.borrow_mut() = Some(FileReaderResult::String(DOMString::from(output))); } diff --git a/components/script/dom/htmlformelement.rs b/components/script/dom/htmlformelement.rs index 18c8a9bf348..e4ce6dfef43 100755 --- a/components/script/dom/htmlformelement.rs +++ b/components/script/dom/htmlformelement.rs @@ -42,9 +42,7 @@ use dom::node::{document_from_node, window_from_node}; use dom::validitystate::ValidationFlags; use dom::virtualmethods::VirtualMethods; use dom_struct::dom_struct; -use encoding::{EncodingRef, EncoderTrap}; -use encoding::all::UTF_8; -use encoding::label::encoding_from_whatwg_label; +use encoding_rs::{Encoding, UTF_8}; use html5ever::{LocalName, Prefix}; use hyper::header::{Charset, ContentDisposition, ContentType, DispositionParam, DispositionType}; use hyper::method::Method; @@ -252,14 +250,15 @@ pub enum ResetFrom { impl HTMLFormElement { // https://html.spec.whatwg.org/multipage/#picking-an-encoding-for-the-form - fn pick_encoding(&self) -> EncodingRef { + fn pick_encoding(&self) -> &'static Encoding { // Step 2 if self.upcast::<Element>().has_attribute(&local_name!("accept-charset")) { // Substep 1 let input = self.upcast::<Element>().get_string_attribute(&local_name!("accept-charset")); // Substep 2, 3, 4 - let mut candidate_encodings = split_html_space_chars(&*input).filter_map(encoding_from_whatwg_label); + let mut candidate_encodings = split_html_space_chars(&*input) + .filter_map(|c| Encoding::for_label(c.as_bytes())); // Substep 5, 6 return candidate_encodings.next().unwrap_or(UTF_8); @@ -278,7 +277,7 @@ impl HTMLFormElement { let encoding = self.pick_encoding(); // Step 3 - let charset = &*encoding.whatwg_name().unwrap(); + let charset = encoding.name(); for entry in form_data.iter_mut() { // Step 4, 5 @@ -377,8 +376,8 @@ impl HTMLFormElement { } // https://html.spec.whatwg.org/multipage/#submit-mutate-action - fn mutate_action_url(&self, form_data: &mut Vec<FormDatum>, mut load_data: LoadData, encoding: EncodingRef) { - let charset = &*encoding.whatwg_name().unwrap(); + fn mutate_action_url(&self, form_data: &mut Vec<FormDatum>, mut load_data: LoadData, encoding: &'static Encoding) { + let charset = encoding.name(); self.set_encoding_override(load_data.url.as_mut_url().query_pairs_mut()) .clear() @@ -390,11 +389,11 @@ impl HTMLFormElement { // https://html.spec.whatwg.org/multipage/#submit-body fn submit_entity_body(&self, form_data: &mut Vec<FormDatum>, mut load_data: LoadData, - enctype: FormEncType, encoding: EncodingRef) { + enctype: FormEncType, encoding: &'static Encoding) { let boundary = generate_boundary(); let bytes = match enctype { FormEncType::UrlEncoded => { - let charset = &*encoding.whatwg_name().unwrap(); + let charset = encoding.name(); load_data.headers.set(ContentType::form_url_encoded()); self.set_encoding_override(load_data.url.as_mut_url().query_pairs_mut()) @@ -422,11 +421,7 @@ impl HTMLFormElement { fn set_encoding_override<'a>(&self, mut serializer: Serializer<UrlQuery<'a>>) -> Serializer<UrlQuery<'a>> { let encoding = self.pick_encoding(); - if encoding.name() != "utf-8" { - serializer.custom_encoding_override(move |s| { - encoding.encode(s, EncoderTrap::NcrEscape).unwrap().into() - }); - } + serializer.custom_encoding_override(move |s| encoding.encode(s).0); serializer } @@ -1118,12 +1113,12 @@ impl FormControlElementHelpers for Element { // https://html.spec.whatwg.org/multipage/#multipart/form-data-encoding-algorithm pub fn encode_multipart_form_data(form_data: &mut Vec<FormDatum>, - boundary: String, encoding: EncodingRef) -> Vec<u8> { + boundary: String, encoding: &'static Encoding) -> Vec<u8> { // Step 1 let mut result = vec![]; // Step 2 - let charset = &*encoding.whatwg_name().unwrap_or("UTF-8"); + let charset = encoding.name(); // Step 3 for entry in form_data.iter_mut() { diff --git a/components/script/dom/htmlscriptelement.rs b/components/script/dom/htmlscriptelement.rs index 8ff45372c88..b3c1fe76464 100644 --- a/components/script/dom/htmlscriptelement.rs +++ b/components/script/dom/htmlscriptelement.rs @@ -23,8 +23,7 @@ use dom::node::{ChildrenMutation, CloneChildrenFlag, Node}; use dom::node::{document_from_node, window_from_node}; use dom::virtualmethods::VirtualMethods; use dom_struct::dom_struct; -use encoding::label::encoding_from_whatwg_label; -use encoding::types::{DecoderTrap, EncodingRef}; +use encoding_rs::Encoding; use html5ever::{LocalName, Prefix}; use ipc_channel::ipc; use ipc_channel::router::ROUTER; @@ -147,7 +146,7 @@ struct ScriptContext { kind: ExternalScriptKind, /// The (fallback) character encoding argument to the "fetch a classic /// script" algorithm. - character_encoding: EncodingRef, + character_encoding: &'static Encoding, /// The response body received to date. data: Vec<u8>, /// The response metadata received to date. @@ -199,11 +198,11 @@ impl FetchResponseListener for ScriptContext { // Step 6. let encoding = metadata.charset - .and_then(|encoding| encoding_from_whatwg_label(&encoding)) + .and_then(|encoding| Encoding::for_label(encoding.as_bytes())) .unwrap_or(self.character_encoding); // Step 7. - let source_text = encoding.decode(&self.data, DecoderTrap::Replace).unwrap(); + let (source_text, _, _) = encoding.decode(&self.data); ClassicScript::external(DOMString::from(source_text), metadata.final_url) }); @@ -232,7 +231,7 @@ fn fetch_a_classic_script(script: &HTMLScriptElement, url: ServoUrl, cors_setting: Option<CorsSettings>, integrity_metadata: String, - character_encoding: EncodingRef) { + character_encoding: &'static Encoding) { let doc = document_from_node(script); // Step 1, 2. @@ -366,7 +365,7 @@ impl HTMLScriptElement { // Step 14. let encoding = element.get_attribute(&ns!(), &local_name!("charset")) - .and_then(|charset| encoding_from_whatwg_label(&charset.value())) + .and_then(|charset| Encoding::for_label(charset.value().as_bytes())) .unwrap_or_else(|| doc.encoding()); // Step 15. diff --git a/components/script/dom/textdecoder.rs b/components/script/dom/textdecoder.rs index 2bfe574c380..5ae9c572511 100644 --- a/components/script/dom/textdecoder.rs +++ b/components/script/dom/textdecoder.rs @@ -10,21 +10,20 @@ use dom::bindings::root::DomRoot; use dom::bindings::str::{DOMString, USVString}; use dom::globalscope::GlobalScope; use dom_struct::dom_struct; -use encoding::label::encoding_from_whatwg_label; -use encoding::types::{DecoderTrap, EncodingRef}; +use encoding_rs::Encoding; use js::jsapi::{JSContext, JSObject}; +use std::ascii::AsciiExt; use std::borrow::ToOwned; #[dom_struct] pub struct TextDecoder { reflector_: Reflector, - #[ignore_malloc_size_of = "Defined in rust-encoding"] - encoding: EncodingRef, + encoding: &'static Encoding, fatal: bool, } impl TextDecoder { - fn new_inherited(encoding: EncodingRef, fatal: bool) -> TextDecoder { + fn new_inherited(encoding: &'static Encoding, fatal: bool) -> TextDecoder { TextDecoder { reflector_: Reflector::new(), encoding: encoding, @@ -36,7 +35,7 @@ impl TextDecoder { Err(Error::Range("The given encoding is not supported.".to_owned())) } - pub fn new(global: &GlobalScope, encoding: EncodingRef, fatal: bool) -> DomRoot<TextDecoder> { + pub fn new(global: &GlobalScope, encoding: &'static Encoding, fatal: bool) -> DomRoot<TextDecoder> { reflect_dom_object(Box::new(TextDecoder::new_inherited(encoding, fatal)), global, TextDecoderBinding::Wrap) @@ -47,19 +46,10 @@ impl TextDecoder { label: DOMString, options: &TextDecoderBinding::TextDecoderOptions) -> Fallible<DomRoot<TextDecoder>> { - let encoding = match encoding_from_whatwg_label(&label) { + let encoding = match Encoding::for_label_no_replacement(label.as_bytes()) { None => return TextDecoder::make_range_error(), Some(enc) => enc }; - // The rust-encoding crate has WHATWG compatibility, so we are - // guaranteed to have a whatwg_name because we successfully got - // the encoding from encoding_from_whatwg_label. - // Use match + panic! instead of unwrap for better error message - match encoding.whatwg_name() { - None => panic!("Label {} fits valid encoding without valid name", label), - Some("replacement") => return TextDecoder::make_range_error(), - _ => () - }; Ok(TextDecoder::new(global, encoding, options.fatal)) } } @@ -68,7 +58,7 @@ impl TextDecoder { impl TextDecoderMethods for TextDecoder { // https://encoding.spec.whatwg.org/#dom-textdecoder-encoding fn Encoding(&self) -> DOMString { - DOMString::from(self.encoding.whatwg_name().unwrap()) + DOMString::from(self.encoding.name().to_ascii_lowercase()) } // https://encoding.spec.whatwg.org/#dom-textdecoder-fatal @@ -93,15 +83,15 @@ impl TextDecoderMethods for TextDecoder { } }; - let trap = if self.fatal { - DecoderTrap::Strict + let s = if self.fatal { + match self.encoding.decode_without_bom_handling_and_without_replacement(data.as_slice()) { + Some(s) => s, + None => return Err(Error::Type("Decoding failed".to_owned())), + } } else { - DecoderTrap::Replace + let (s, _has_errors) = self.encoding.decode_without_bom_handling(data.as_slice()); + s }; - - match self.encoding.decode(data.as_slice(), trap) { - Ok(s) => Ok(USVString(s)), - Err(_) => Err(Error::Type("Decoding failed".to_owned())), - } + Ok(USVString(s.into_owned())) } } diff --git a/components/script/dom/xmlhttprequest.rs b/components/script/dom/xmlhttprequest.rs index c7745546717..a20d8c90fb7 100644 --- a/components/script/dom/xmlhttprequest.rs +++ b/components/script/dom/xmlhttprequest.rs @@ -36,9 +36,7 @@ use dom::workerglobalscope::WorkerGlobalScope; use dom::xmlhttprequesteventtarget::XMLHttpRequestEventTarget; use dom::xmlhttprequestupload::XMLHttpRequestUpload; use dom_struct::dom_struct; -use encoding::all::UTF_8; -use encoding::label::encoding_from_whatwg_label; -use encoding::types::{DecoderTrap, EncoderTrap, Encoding, EncodingRef}; +use encoding_rs::{Encoding, UTF_8}; use euclid::Length; use html5ever::serialize; use html5ever::serialize::SerializeOpts; @@ -66,6 +64,7 @@ use std::ascii::AsciiExt; use std::borrow::ToOwned; use std::cell::Cell; use std::default::Default; +use std::slice; use std::str; use std::sync::{Arc, Mutex}; use task_source::networking::NetworkingTaskSource; @@ -137,8 +136,7 @@ pub struct XMLHttpRequest { response_headers: DomRefCell<Headers>, #[ignore_malloc_size_of = "Defined in hyper"] override_mime_type: DomRefCell<Option<Mime>>, - #[ignore_malloc_size_of = "Defined in rust-encoding"] - override_charset: DomRefCell<Option<EncodingRef>>, + override_charset: DomRefCell<Option<&'static Encoding>>, // Associated concepts #[ignore_malloc_size_of = "Defined in hyper"] @@ -726,7 +724,7 @@ impl XMLHttpRequestMethods for XMLHttpRequest { // Step 4 let value = override_mime.get_param(mime::Attr::Charset); *self.override_charset.borrow_mut() = value.and_then(|value| { - encoding_from_whatwg_label(value) + Encoding::for_label(value.as_bytes()) }); Ok(()) } @@ -1085,7 +1083,9 @@ impl XMLHttpRequest { // According to Simon, decode() should never return an error, so unwrap()ing // the result should be fine. XXXManishearth have a closer look at this later // Step 1, 2, 6 - charset.decode(&self.response.borrow(), DecoderTrap::Replace).unwrap() + let response = self.response.borrow(); + let (text, _, _) = charset.decode(&response); + text.into_owned() } // https://xhr.spec.whatwg.org/#blob-response @@ -1164,8 +1164,22 @@ impl XMLHttpRequest { return NullValue(); } // Step 4 - let json_text = UTF_8.decode(&bytes, DecoderTrap::Replace).unwrap(); - let json_text: Vec<u16> = json_text.encode_utf16().collect(); + fn decode_to_utf16(bytes: &[u8], encoding: &'static Encoding) -> Vec<u16> { + let mut decoder = encoding.new_decoder(); + let capacity = decoder.max_utf16_buffer_length(bytes.len()).expect("Overflow"); + let mut utf16 = Vec::with_capacity(capacity); + let extra = unsafe { + slice::from_raw_parts_mut(utf16.as_mut_ptr(), capacity) + }; + let last = true; + let (_, read, written, _) = decoder.decode_to_utf16(bytes, extra, last); + assert!(read == bytes.len()); + unsafe { + utf16.set_len(written) + } + utf16 + } + let json_text = decode_to_utf16(&bytes, UTF_8); // Step 5 rooted!(in(cx) let mut rval = UndefinedValue()); unsafe { @@ -1185,7 +1199,8 @@ impl XMLHttpRequest { fn document_text_html(&self) -> DomRoot<Document> { let charset = self.final_charset().unwrap_or(UTF_8); let wr = self.global(); - let decoded = charset.decode(&self.response.borrow(), DecoderTrap::Replace).unwrap(); + let response = self.response.borrow(); + let (decoded, _, _) = charset.decode(&response); let document = self.new_doc(IsHTMLDocument::HTMLDocument); // TODO: Disable scripting while parsing ServoParser::parse_html_document( @@ -1198,7 +1213,8 @@ impl XMLHttpRequest { fn handle_xml(&self) -> DomRoot<Document> { let charset = self.final_charset().unwrap_or(UTF_8); let wr = self.global(); - let decoded = charset.decode(&self.response.borrow(), DecoderTrap::Replace).unwrap(); + let response = self.response.borrow(); + let (decoded, _, _) = charset.decode(&response); let document = self.new_doc(IsHTMLDocument::NonHTMLDocument); // TODO: Disable scripting while parsing ServoParser::parse_xml_document( @@ -1307,7 +1323,7 @@ impl XMLHttpRequest { Ok(()) } - fn final_charset(&self) -> Option<EncodingRef> { + fn final_charset(&self) -> Option<&'static Encoding> { if self.override_charset.borrow().is_some() { self.override_charset.borrow().clone() } else { @@ -1315,7 +1331,7 @@ impl XMLHttpRequest { Some(&ContentType(ref mime)) => { let value = mime.get_param(mime::Attr::Charset); value.and_then(|value|{ - encoding_from_whatwg_label(value) + Encoding::for_label(value.as_bytes()) }) } None => { None } @@ -1370,7 +1386,7 @@ impl Extractable for Blob { impl Extractable for DOMString { fn extract(&self) -> (Vec<u8>, Option<DOMString>) { - (UTF_8.encode(self, EncoderTrap::Replace).unwrap(), + (self.as_bytes().to_owned(), Some(DOMString::from("text/plain;charset=UTF-8"))) } } @@ -1378,8 +1394,7 @@ impl Extractable for DOMString { impl Extractable for FormData { fn extract(&self) -> (Vec<u8>, Option<DOMString>) { let boundary = generate_boundary(); - let bytes = encode_multipart_form_data(&mut self.datums(), boundary.clone(), - UTF_8 as EncodingRef); + let bytes = encode_multipart_form_data(&mut self.datums(), boundary.clone(), UTF_8); (bytes, Some(DOMString::from(format!("multipart/form-data;boundary={}", boundary)))) } } diff --git a/components/script/lib.rs b/components/script/lib.rs index 74276422207..f56e2c3dcaa 100644 --- a/components/script/lib.rs +++ b/components/script/lib.rs @@ -34,7 +34,6 @@ extern crate devtools_traits; extern crate dom_struct; #[macro_use] extern crate domobject_derive; -extern crate encoding; extern crate encoding_rs; extern crate euclid; extern crate fnv; |