diff options
author | bors-servo <metajack+bors@gmail.com> | 2015-09-18 07:22:59 -0600 |
---|---|---|
committer | bors-servo <metajack+bors@gmail.com> | 2015-09-18 07:22:59 -0600 |
commit | 8a8204ffc8fa287dde2321c40d12b191b51960da (patch) | |
tree | f471d6cf869b72818540ff587276c7fd8b3c14bb | |
parent | eae3eaf97474febb2c50a7a4d392594cbc8d2da2 (diff) | |
parent | cc44448b0922f184b22cdb2ecc29af37496c52b0 (diff) | |
download | servo-8a8204ffc8fa287dde2321c40d12b191b51960da.tar.gz servo-8a8204ffc8fa287dde2321c40d12b191b51960da.zip |
Auto merge of #7447 - ddrmanxbxfr:master, r=jdm
Issue #7382 Use descriptive enums instead of booleans for MIMEClassifier::classifer
Hi guys i've done a small pass of refactor in the MIMEClassifier implementation. (See issue #7382 )
- Moved the predicates to separate functions
- Added a mimetype enum so we can compare them easily after calling MIMEClassifier::get_media_type
I hope it follows rust good pratices (care it's my first time doing rust).
Improvements and tips are welcome :).
Thanks for looking at it.
<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/7447)
<!-- Reviewable:end -->
-rw-r--r-- | components/net/mime_classifier.rs | 127 | ||||
-rw-r--r-- | components/net/resource_task.rs | 33 | ||||
-rw-r--r-- | tests/unit/net/mime_classifier.rs | 99 | ||||
-rw-r--r-- | tests/unit/net/parsable_mime/unknown/binary_file | bin | 0 -> 1024 bytes |
4 files changed, 206 insertions, 53 deletions
diff --git a/components/net/mime_classifier.rs b/components/net/mime_classifier.rs index de1b445abf4..a9eac63c8cf 100644 --- a/components/net/mime_classifier.rs +++ b/components/net/mime_classifier.rs @@ -6,45 +6,58 @@ use std::borrow::ToOwned; pub struct MIMEClassifier { image_classifier: GroupedClassifier, - audio_video_classifer: GroupedClassifier, + audio_video_classifier: GroupedClassifier, scriptable_classifier: GroupedClassifier, plaintext_classifier: GroupedClassifier, - archive_classifer: GroupedClassifier, + archive_classifier: GroupedClassifier, binary_or_plaintext: BinaryOrPlaintextClassifier, feeds_classifier: FeedsClassifier } +pub enum MediaType { + Xml, + Html, + AudioVideo, + Image, +} + +pub enum ApacheBugFlag { + ON, + OFF +} + +#[derive(PartialEq)] +pub enum NoSniffFlag { + ON, + OFF +} + impl MIMEClassifier { //Performs MIME Type Sniffing Algorithm (section 7) pub fn classify(&self, - no_sniff: bool, - check_for_apache_bug: bool, + no_sniff_flag: NoSniffFlag, + apache_bug_flag: ApacheBugFlag, supplied_type: &Option<(String, String)>, data: &[u8]) -> Option<(String, String)> { - match *supplied_type { - None => self.sniff_unknown_type(!no_sniff, data), + None => self.sniff_unknown_type(no_sniff_flag, data), Some((ref media_type, ref media_subtype)) => { - match (&**media_type, &**media_subtype) { - ("unknown", "unknown") | - ("application", "unknown") | - ("*", "*") => self.sniff_unknown_type(!no_sniff, data), - _ => { - if no_sniff { - supplied_type.clone() - } else if check_for_apache_bug { - self.sniff_text_or_data(data) - } else if MIMEClassifier::is_xml(media_type, media_subtype) { - supplied_type.clone() - } else if MIMEClassifier::is_html(media_type, media_subtype) { - //Implied in section 7.3, but flow is not clear - self.feeds_classifier.classify(data).or(supplied_type.clone()) - } else { - match (&**media_type, &**media_subtype) { - ("image", _) => self.image_classifier.classify(data), - ("audio", _) | ("video", _) | ("application", "ogg") => - self.audio_video_classifer.classify(data), - _ => None + if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) { + self.sniff_unknown_type(no_sniff_flag, data) + } else { + match no_sniff_flag { + NoSniffFlag::ON => supplied_type.clone(), + NoSniffFlag::OFF => match apache_bug_flag { + ApacheBugFlag::ON => self.sniff_text_or_data(data), + ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type, + media_subtype) { + Some(MediaType::Xml) => supplied_type.clone(), + Some(MediaType::Html) => + //Implied in section 7.3, but flow is not clear + self.feeds_classifier.classify(data).or(supplied_type.clone()), + Some(MediaType::Image) => self.image_classifier.classify(data), + Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data), + None => None }.or(supplied_type.clone()) } } @@ -56,26 +69,30 @@ impl MIMEClassifier { pub fn new() -> MIMEClassifier { MIMEClassifier { image_classifier: GroupedClassifier::image_classifer(), - audio_video_classifer: GroupedClassifier::audio_video_classifer(), + audio_video_classifier: GroupedClassifier::audio_video_classifier(), scriptable_classifier: GroupedClassifier::scriptable_classifier(), plaintext_classifier: GroupedClassifier::plaintext_classifier(), - archive_classifer: GroupedClassifier::archive_classifier(), + archive_classifier: GroupedClassifier::archive_classifier(), binary_or_plaintext: BinaryOrPlaintextClassifier, feeds_classifier: FeedsClassifier } } + //some sort of iterator over the classifiers might be better? - fn sniff_unknown_type(&self, sniff_scriptable: bool, data: &[u8]) -> + fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) -> Option<(String, String)> { - if sniff_scriptable { + let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::OFF; + let sniffed = if should_sniff_scriptable { self.scriptable_classifier.classify(data) } else { None - }.or_else(|| self.plaintext_classifier.classify(data)) - .or_else(|| self.image_classifier.classify(data)) - .or_else(|| self.audio_video_classifer.classify(data)) - .or_else(|| self.archive_classifer.classify(data)) - .or_else(|| self.binary_or_plaintext.classify(data)) + }; + + sniffed.or_else(|| self.plaintext_classifier.classify(data)) + .or_else(|| self.image_classifier.classify(data)) + .or_else(|| self.audio_video_classifier.classify(data)) + .or_else(|| self.archive_classifier.classify(data)) + .or_else(|| self.binary_or_plaintext.classify(data)) } fn sniff_text_or_data(&self, data: &[u8]) -> Option<(String, String)> { @@ -93,6 +110,40 @@ impl MIMEClassifier { fn is_html(tp: &str, sub_tp: &str) -> bool { tp == "text" && sub_tp == "html" } + + fn is_image(tp: &str) -> bool { + tp == "image" + } + + fn is_audio_video(tp: &str, sub_tp: &str) -> bool { + tp == "audio" || + tp == "video" || + (tp == "application" && sub_tp == "ogg") + } + + fn is_explicit_unknown(tp: &str, sub_tp: &str) -> bool { + match(tp, sub_tp) { + ("unknown", "unknown") | + ("application", "unknown") | + ("*", "*") => true, + _ => false + } + } + + fn get_media_type(media_type: &String, + media_subtype: &String) -> Option<MediaType> { + if MIMEClassifier::is_xml(media_type, media_subtype) { + Some(MediaType::Xml) + } else if MIMEClassifier::is_html(media_type, media_subtype) { + Some(MediaType::Html) + } else if MIMEClassifier::is_image(media_type) { + Some(MediaType::Image) + } else if MIMEClassifier::is_audio_video(media_type, media_subtype) { + Some(MediaType::AudioVideo) + } else { + None + } + } } pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<(String, String)> { @@ -227,8 +278,8 @@ struct BinaryOrPlaintextClassifier; impl BinaryOrPlaintextClassifier { fn classify_impl(&self, data: &[u8]) -> (&'static str, &'static str) { - if data == &[0xFFu8, 0xFEu8] || - data == &[0xFEu8, 0xFFu8] || + if data.starts_with(&[0xFFu8, 0xFEu8]) || + data.starts_with(&[0xFEu8, 0xFFu8]) || data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8]) { ("text", "plain") @@ -265,7 +316,7 @@ impl GroupedClassifier { ] } } - fn audio_video_classifer() -> GroupedClassifier { + fn audio_video_classifier() -> GroupedClassifier { GroupedClassifier { byte_matchers: vec![ box ByteMatcher::video_webm(), diff --git a/components/net/resource_task.rs b/components/net/resource_task.rs index bf3d99c9f8b..1da1cd49a7b 100644 --- a/components/net/resource_task.rs +++ b/components/net/resource_task.rs @@ -10,8 +10,7 @@ use cookie_storage::CookieStorage; use data_loader; use file_loader; use http_loader::{self, create_http_connector, Connector}; -use mime_classifier::MIMEClassifier; - +use mime_classifier::{ApacheBugFlag, MIMEClassifier, NoSniffFlag}; use net_traits::ProgressMsg::Done; use net_traits::{ControlMsg, LoadData, LoadResponse, LoadConsumer, CookieSource}; use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction}; @@ -29,7 +28,9 @@ use ipc_channel::ipc::{self, IpcReceiver, IpcSender}; use std::borrow::ToOwned; use std::boxed::FnBox; -use std::sync::Arc; + +use std::sync::{Arc}; + use std::sync::mpsc::{channel, Sender}; pub enum ProgressSender { @@ -72,21 +73,20 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat -> Result<ProgressSender, ()> { if opts::get().sniff_mime_types { // TODO: should be calculated in the resource loader, from pull requeset #4094 - let mut nosniff = false; - let mut check_for_apache_bug = false; + let mut no_sniff = NoSniffFlag::OFF; + let mut check_for_apache_bug = ApacheBugFlag::OFF; if let Some(ref headers) = metadata.headers { if let Some(ref raw_content_type) = headers.get_raw("content-type") { if raw_content_type.len() > 0 { let ref last_raw_content_type = raw_content_type[raw_content_type.len() - 1]; - check_for_apache_bug = last_raw_content_type == b"text/plain" - || last_raw_content_type == b"text/plain; charset=ISO-8859-1" - || last_raw_content_type == b"text/plain; charset=iso-8859-1" - || last_raw_content_type == b"text/plain; charset=UTF-8"; + check_for_apache_bug = apache_bug_predicate(last_raw_content_type) } } if let Some(ref raw_content_type_options) = headers.get_raw("X-content-type-options") { - nosniff = raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff"); + if raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff") { + no_sniff = NoSniffFlag::ON + } } } @@ -94,7 +94,7 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| { (format!("{}", toplevel), format!("{}", sublevel)) }); - metadata.content_type = classifier.classify(nosniff, check_for_apache_bug, &supplied_type, + metadata.content_type = classifier.classify(no_sniff, check_for_apache_bug, &supplied_type, &partial_body).map(|(toplevel, sublevel)| { let mime_tp: TopLevel = toplevel.parse().unwrap(); let mime_sb: SubLevel = sublevel.parse().unwrap(); @@ -106,6 +106,17 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat start_sending_opt(start_chan, metadata) } +fn apache_bug_predicate(last_raw_content_type: &[u8]) -> ApacheBugFlag { + if last_raw_content_type == b"text/plain" + || last_raw_content_type == b"text/plain; charset=ISO-8859-1" + || last_raw_content_type == b"text/plain; charset=iso-8859-1" + || last_raw_content_type == b"text/plain; charset=UTF-8" { + ApacheBugFlag::ON + } else { + ApacheBugFlag::OFF + } +} + /// For use by loaders in responding to a Load message. pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result<ProgressSender, ()> { match start_chan { diff --git a/tests/unit/net/mime_classifier.rs b/tests/unit/net/mime_classifier.rs index 2c62878872e..b8538dc9ab8 100644 --- a/tests/unit/net/mime_classifier.rs +++ b/tests/unit/net/mime_classifier.rs @@ -3,7 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use net::mime_classifier::as_string_option; -use net::mime_classifier::{Mp4Matcher, MIMEClassifier}; +use net::mime_classifier::{Mp4Matcher, MIMEClassifier, ApacheBugFlag, NoSniffFlag}; use std::env; use std::fs::File; use std::io::{self, Read}; @@ -37,8 +37,12 @@ fn test_sniff_mp4_matcher() { } #[cfg(test)] -fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str, - supplied_type: Option<(&'static str, &'static str)>) { +fn test_sniff_with_flags(filename_orig: &path::Path, + type_string: &str, + subtype_string: &str, + supplied_type: Option<(&'static str, &'static str)>, + no_sniff_flag: NoSniffFlag, + apache_bug_flag: ApacheBugFlag) { let current_working_directory = env::current_dir().unwrap(); println!("The current directory is {}", current_working_directory.display()); @@ -51,7 +55,7 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string match read_result { Ok(data) => { - match classifier.classify(false, false, &as_string_option(supplied_type), &data) { + match classifier.classify(no_sniff_flag, apache_bug_flag, &as_string_option(supplied_type), &data) { Some((parsed_type, parsed_subtp)) => { if (&parsed_type[..] != type_string) || (&parsed_subtp[..] != subtype_string) { @@ -70,6 +74,17 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string } #[cfg(test)] +fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str, + supplied_type: Option<(&'static str, &'static str)>) { + test_sniff_with_flags(filename_orig, + type_string, + subtype_string, + supplied_type, + NoSniffFlag::OFF, + ApacheBugFlag::OFF) +} + +#[cfg(test)] fn test_sniff_classification(file: &str, type_string: &str, subtype_string: &str, supplied_type: Option<(&'static str, &'static str)>) { let mut x = PathBuf::from("./"); @@ -448,3 +463,79 @@ fn test_sniff_rss_feed() { fn test_sniff_atom_feed() { test_sniff_full(&PathBuf::from("text/xml/feed.atom"), "application", "atom+xml", Some(("text", "html"))); } + +#[test] +fn test_sniff_binary_file() { + test_sniff_full(&PathBuf::from("unknown/binary_file"), "application", "octet-stream", None); +} + +#[test] +fn test_sniff_atom_feed_with_no_sniff_flag_on() { + test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"), + "text", + "html", + Some(("text", "html")), + NoSniffFlag::ON, + ApacheBugFlag::OFF); +} + +#[test] +fn test_sniff_with_no_sniff_flag_on_and_apache_flag_on() { + test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"), + "text", + "html", + Some(("text", "html")), + NoSniffFlag::ON, + ApacheBugFlag::ON); +} + +#[test] +fn test_sniff_utf_8_bom_with_apache_flag_on() { + test_sniff_with_flags(&PathBuf::from("text/plain/utf8bom.txt"), + "text", + "plain", + Some(("dummy", "text")), + NoSniffFlag::OFF, + ApacheBugFlag::ON); +} + +#[test] +fn test_sniff_utf_16be_bom_with_apache_flag_on() { + test_sniff_with_flags(&PathBuf::from("text/plain/utf16bebom.txt"), + "text", + "plain", + Some(("dummy", "text")), + NoSniffFlag::OFF, + ApacheBugFlag::ON); +} + +#[test] +fn test_sniff_utf_16le_bom_with_apache_flag_on() { + test_sniff_with_flags(&PathBuf::from("text/plain/utf16lebom.txt"), + "text", + "plain", + Some(("dummy", "text")), + NoSniffFlag::OFF, + ApacheBugFlag::ON); +} + +#[test] +fn test_sniff_octet_stream_apache_flag_on() { + test_sniff_with_flags(&PathBuf::from("unknown/binary_file"), + "application", + "octet-stream", + Some(("dummy", "binary")), + NoSniffFlag::OFF, + ApacheBugFlag::ON); +} + +#[test] +fn test_sniff_mp4_video_apache_flag_on() { + test_sniff_with_flags(&PathBuf::from("video/mp4/test.mp4"), + "application", + "octet-stream", + Some(("video", "mp4")), + NoSniffFlag::OFF, + ApacheBugFlag::ON); +} + diff --git a/tests/unit/net/parsable_mime/unknown/binary_file b/tests/unit/net/parsable_mime/unknown/binary_file Binary files differnew file mode 100644 index 00000000000..ecf3bbcdf3e --- /dev/null +++ b/tests/unit/net/parsable_mime/unknown/binary_file |