Auto merge of #7447 - ddrmanxbxfr:master, r=jdm

Issue #7382 Use descriptive enums instead of booleans for MIMEClassifier::classifer Hi guys i've done a small pass of refactor in the MIMEClassifier implementation. (See issue #7382 ) - Moved the predicates to separate functions - Added a mimetype enum so we can compare them easily after calling MIMEClassifier::get_media_type I hope it follows rust good pratices (care it's my first time doing rust). Improvements and tips are welcome :). Thanks for looking at it.  [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/7447)
author: bors-servo <metajack+bors@gmail.com> 2015-09-18 07:22:59 -0600
committer: bors-servo <metajack+bors@gmail.com> 2015-09-18 07:22:59 -0600
commit: 8a8204ffc8fa287dde2321c40d12b191b51960da (patch)
tree: f471d6cf869b72818540ff587276c7fd8b3c14bb
parent: eae3eaf97474febb2c50a7a4d392594cbc8d2da2 (diff)
parent: cc44448b0922f184b22cdb2ecc29af37496c52b0 (diff)
download: servo-8a8204ffc8fa287dde2321c40d12b191b51960da.tar.gz
servo-8a8204ffc8fa287dde2321c40d12b191b51960da.zip
4 files changed, 206 insertions, 53 deletions
diff --git a/components/net/mime_classifier.rs b/components/net/mime_classifier.rs
index de1b445abf4..a9eac63c8cf 100644
--- a/components/net/mime_classifier.rs
+++ b/components/net/mime_classifier.rs
@@ -6,45 +6,58 @@ use std::borrow::ToOwned;
 
 pub struct MIMEClassifier {
     image_classifier: GroupedClassifier,
-    audio_video_classifer: GroupedClassifier,
+    audio_video_classifier: GroupedClassifier,
     scriptable_classifier: GroupedClassifier,
     plaintext_classifier: GroupedClassifier,
-    archive_classifer: GroupedClassifier,
+    archive_classifier: GroupedClassifier,
     binary_or_plaintext: BinaryOrPlaintextClassifier,
     feeds_classifier: FeedsClassifier
 }
 
+pub enum MediaType {
+    Xml,
+    Html,
+    AudioVideo,
+    Image,
+}
+
+pub enum ApacheBugFlag {
+    ON,
+    OFF
+}
+
+#[derive(PartialEq)]
+pub enum NoSniffFlag {
+    ON,
+    OFF
+}
+
 impl MIMEClassifier {
     //Performs MIME Type Sniffing Algorithm (section 7)
     pub fn classify(&self,
-                    no_sniff: bool,
-                    check_for_apache_bug: bool,
+                    no_sniff_flag: NoSniffFlag,
+                    apache_bug_flag: ApacheBugFlag,
                     supplied_type: &Option<(String, String)>,
                     data: &[u8]) -> Option<(String, String)> {
-
         match *supplied_type {
-            None => self.sniff_unknown_type(!no_sniff, data),
+            None => self.sniff_unknown_type(no_sniff_flag, data),
             Some((ref media_type, ref media_subtype)) => {
-                match (&**media_type, &**media_subtype) {
-                    ("unknown", "unknown") |
-                    ("application", "unknown") |
-                    ("*", "*") => self.sniff_unknown_type(!no_sniff, data),
-                    _ => {
-                        if no_sniff {
-                            supplied_type.clone()
-                        } else if check_for_apache_bug {
-                            self.sniff_text_or_data(data)
-                        } else if MIMEClassifier::is_xml(media_type, media_subtype) {
-                            supplied_type.clone()
-                        } else if MIMEClassifier::is_html(media_type, media_subtype) {
-                            //Implied in section 7.3, but flow is not clear
-                            self.feeds_classifier.classify(data).or(supplied_type.clone())
-                        } else {
-                            match (&**media_type, &**media_subtype) {
-                                ("image", _) => self.image_classifier.classify(data),
-                                ("audio", _) | ("video", _) | ("application", "ogg") =>
-                                    self.audio_video_classifer.classify(data),
-                                _ => None
+                if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
+                    self.sniff_unknown_type(no_sniff_flag, data)
+                } else {
+                    match no_sniff_flag {
+                        NoSniffFlag::ON => supplied_type.clone(),
+                        NoSniffFlag::OFF => match apache_bug_flag {
+                            ApacheBugFlag::ON => self.sniff_text_or_data(data),
+                            ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
+                                                                                       media_subtype) {
+                                Some(MediaType::Xml) => supplied_type.clone(),
+                                Some(MediaType::Html) =>
+                                    //Implied in section 7.3, but flow is not clear
+                                    self.feeds_classifier.classify(data).or(supplied_type.clone()),
+                                Some(MediaType::Image) => self.image_classifier.classify(data),
+                                Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
+                                None => None
                             }.or(supplied_type.clone())
                         }
                     }
@@ -56,26 +69,30 @@ impl MIMEClassifier {
     pub fn new() -> MIMEClassifier {
          MIMEClassifier {
              image_classifier: GroupedClassifier::image_classifer(),
-             audio_video_classifer: GroupedClassifier::audio_video_classifer(),
+             audio_video_classifier: GroupedClassifier::audio_video_classifier(),
              scriptable_classifier: GroupedClassifier::scriptable_classifier(),
              plaintext_classifier: GroupedClassifier::plaintext_classifier(),
-             archive_classifer: GroupedClassifier::archive_classifier(),
+             archive_classifier: GroupedClassifier::archive_classifier(),
              binary_or_plaintext: BinaryOrPlaintextClassifier,
              feeds_classifier: FeedsClassifier
          }
     }
+
     //some sort of iterator over the classifiers might be better?
-    fn sniff_unknown_type(&self, sniff_scriptable: bool, data: &[u8]) ->
+    fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) ->
       Option<(String, String)> {
-        if sniff_scriptable {
+        let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::OFF;
+        let sniffed = if should_sniff_scriptable {
             self.scriptable_classifier.classify(data)
         } else {
             None
-        }.or_else(|| self.plaintext_classifier.classify(data))
-         .or_else(|| self.image_classifier.classify(data))
-         .or_else(|| self.audio_video_classifer.classify(data))
-         .or_else(|| self.archive_classifer.classify(data))
-         .or_else(|| self.binary_or_plaintext.classify(data))
+        };
+
+        sniffed.or_else(|| self.plaintext_classifier.classify(data))
+            .or_else(|| self.image_classifier.classify(data))
+            .or_else(|| self.audio_video_classifier.classify(data))
+            .or_else(|| self.archive_classifier.classify(data))
+            .or_else(|| self.binary_or_plaintext.classify(data))
     }
 
     fn sniff_text_or_data(&self, data: &[u8]) -> Option<(String, String)> {
@@ -93,6 +110,40 @@ impl MIMEClassifier {
     fn is_html(tp: &str, sub_tp: &str) -> bool {
         tp == "text" && sub_tp == "html"
     }
+
+    fn is_image(tp: &str) -> bool {
+        tp == "image"
+    }
+
+    fn is_audio_video(tp: &str, sub_tp: &str) -> bool {
+        tp == "audio" ||
+        tp == "video" ||
+        (tp == "application" && sub_tp == "ogg")
+    }
+
+    fn is_explicit_unknown(tp: &str, sub_tp: &str) -> bool {
+        match(tp, sub_tp) {
+            ("unknown", "unknown") |
+            ("application", "unknown") |
+            ("*", "*") => true,
+            _ => false
+        }
+    }
+
+    fn get_media_type(media_type: &String,
+                          media_subtype: &String) -> Option<MediaType> {
+        if MIMEClassifier::is_xml(media_type, media_subtype) {
+            Some(MediaType::Xml)
+        } else if MIMEClassifier::is_html(media_type, media_subtype) {
+           Some(MediaType::Html)
+        } else if MIMEClassifier::is_image(media_type) {
+            Some(MediaType::Image)
+        } else if MIMEClassifier::is_audio_video(media_type, media_subtype) {
+            Some(MediaType::AudioVideo)
+        } else {
+            None
+        }
+    }
 }
 
 pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<(String, String)> {
@@ -227,8 +278,8 @@ struct BinaryOrPlaintextClassifier;
 
 impl BinaryOrPlaintextClassifier {
     fn classify_impl(&self, data: &[u8]) -> (&'static str, &'static str) {
-        if data == &[0xFFu8, 0xFEu8] ||
-           data == &[0xFEu8, 0xFFu8] ||
+        if data.starts_with(&[0xFFu8, 0xFEu8]) ||
+           data.starts_with(&[0xFEu8, 0xFFu8]) ||
            data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
         {
             ("text", "plain")
@@ -265,7 +316,7 @@ impl GroupedClassifier {
             ]
         }
     }
-    fn audio_video_classifer() -> GroupedClassifier {
+    fn audio_video_classifier() -> GroupedClassifier {
         GroupedClassifier {
             byte_matchers: vec![
                 box ByteMatcher::video_webm(),
diff --git a/components/net/resource_task.rs b/components/net/resource_task.rs
index bf3d99c9f8b..1da1cd49a7b 100644
--- a/components/net/resource_task.rs
+++ b/components/net/resource_task.rs
@@ -10,8 +10,7 @@ use cookie_storage::CookieStorage;
 use data_loader;
 use file_loader;
 use http_loader::{self, create_http_connector, Connector};
-use mime_classifier::MIMEClassifier;
-
+use mime_classifier::{ApacheBugFlag, MIMEClassifier, NoSniffFlag};
 use net_traits::ProgressMsg::Done;
 use net_traits::{ControlMsg, LoadData, LoadResponse, LoadConsumer, CookieSource};
 use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction};
@@ -29,7 +28,9 @@ use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
 
 use std::borrow::ToOwned;
 use std::boxed::FnBox;
-use std::sync::Arc;
+
+use std::sync::{Arc};
+
 use std::sync::mpsc::{channel, Sender};
 
 pub enum ProgressSender {
@@ -72,21 +73,20 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
                                  -> Result<ProgressSender, ()> {
     if opts::get().sniff_mime_types {
         // TODO: should be calculated in the resource loader, from pull requeset #4094
-        let mut nosniff = false;
-        let mut check_for_apache_bug = false;
+        let mut no_sniff = NoSniffFlag::OFF;
+        let mut check_for_apache_bug = ApacheBugFlag::OFF;
 
         if let Some(ref headers) = metadata.headers {
             if let Some(ref raw_content_type) = headers.get_raw("content-type") {
                 if raw_content_type.len() > 0 {
                     let ref last_raw_content_type = raw_content_type[raw_content_type.len() - 1];
-                    check_for_apache_bug = last_raw_content_type == b"text/plain"
-                                        || last_raw_content_type == b"text/plain; charset=ISO-8859-1"
-                                        || last_raw_content_type == b"text/plain; charset=iso-8859-1"
-                                        || last_raw_content_type == b"text/plain; charset=UTF-8";
+                    check_for_apache_bug = apache_bug_predicate(last_raw_content_type)
                 }
             }
             if let Some(ref raw_content_type_options) = headers.get_raw("X-content-type-options") {
-                nosniff = raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff");
+                if raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff") {
+                    no_sniff = NoSniffFlag::ON
+                }
             }
         }
 
@@ -94,7 +94,7 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
             metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| {
             (format!("{}", toplevel), format!("{}", sublevel))
         });
-        metadata.content_type = classifier.classify(nosniff, check_for_apache_bug, &supplied_type,
+        metadata.content_type = classifier.classify(no_sniff, check_for_apache_bug, &supplied_type,
                                                     &partial_body).map(|(toplevel, sublevel)| {
             let mime_tp: TopLevel = toplevel.parse().unwrap();
             let mime_sb: SubLevel = sublevel.parse().unwrap();
@@ -106,6 +106,17 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
     start_sending_opt(start_chan, metadata)
 }
 
+fn apache_bug_predicate(last_raw_content_type: &[u8]) -> ApacheBugFlag {
+    if last_raw_content_type == b"text/plain"
+           || last_raw_content_type == b"text/plain; charset=ISO-8859-1"
+           || last_raw_content_type == b"text/plain; charset=iso-8859-1"
+           || last_raw_content_type == b"text/plain; charset=UTF-8" {
+        ApacheBugFlag::ON
+    } else {
+        ApacheBugFlag::OFF
+    }
+}
+
 /// For use by loaders in responding to a Load message.
 pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result<ProgressSender, ()> {
     match start_chan {
diff --git a/tests/unit/net/mime_classifier.rs b/tests/unit/net/mime_classifier.rs
index 2c62878872e..b8538dc9ab8 100644
--- a/tests/unit/net/mime_classifier.rs
+++ b/tests/unit/net/mime_classifier.rs
@@ -3,7 +3,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use net::mime_classifier::as_string_option;
-use net::mime_classifier::{Mp4Matcher, MIMEClassifier};
+use net::mime_classifier::{Mp4Matcher, MIMEClassifier, ApacheBugFlag, NoSniffFlag};
 use std::env;
 use std::fs::File;
 use std::io::{self, Read};
@@ -37,8 +37,12 @@ fn test_sniff_mp4_matcher() {
 }
 
 #[cfg(test)]
-fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
-                   supplied_type: Option<(&'static str, &'static str)>) {
+fn test_sniff_with_flags(filename_orig: &path::Path,
+                                 type_string: &str,
+                                 subtype_string: &str,
+                                 supplied_type: Option<(&'static str, &'static str)>,
+                                 no_sniff_flag: NoSniffFlag,
+                                 apache_bug_flag: ApacheBugFlag) {
     let current_working_directory = env::current_dir().unwrap();
     println!("The current directory is {}", current_working_directory.display());
 
@@ -51,7 +55,7 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
 
     match read_result {
         Ok(data) => {
-            match classifier.classify(false, false, &as_string_option(supplied_type), &data) {
+            match classifier.classify(no_sniff_flag, apache_bug_flag, &as_string_option(supplied_type), &data) {
                 Some((parsed_type, parsed_subtp)) => {
                     if (&parsed_type[..] != type_string) ||
                         (&parsed_subtp[..] != subtype_string) {
@@ -70,6 +74,17 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
 }
 
 #[cfg(test)]
+fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
+                   supplied_type: Option<(&'static str, &'static str)>) {
+    test_sniff_with_flags(filename_orig,
+                          type_string,
+                          subtype_string,
+                          supplied_type,
+                          NoSniffFlag::OFF,
+                          ApacheBugFlag::OFF)
+}
+
+#[cfg(test)]
 fn test_sniff_classification(file: &str, type_string: &str, subtype_string: &str,
                              supplied_type: Option<(&'static str, &'static str)>) {
     let mut x = PathBuf::from("./");
@@ -448,3 +463,79 @@ fn test_sniff_rss_feed() {
 fn test_sniff_atom_feed() {
     test_sniff_full(&PathBuf::from("text/xml/feed.atom"), "application", "atom+xml", Some(("text", "html")));
 }
+
+#[test]
+fn test_sniff_binary_file() {
+    test_sniff_full(&PathBuf::from("unknown/binary_file"), "application", "octet-stream", None);
+}
+
+#[test]
+fn test_sniff_atom_feed_with_no_sniff_flag_on() {
+    test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
+                                  "text",
+                                  "html",
+                                  Some(("text", "html")),
+                                  NoSniffFlag::ON,
+                                  ApacheBugFlag::OFF);
+}
+
+#[test]
+fn test_sniff_with_no_sniff_flag_on_and_apache_flag_on() {
+    test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
+                                  "text",
+                                  "html",
+                                  Some(("text", "html")),
+                                  NoSniffFlag::ON,
+                                  ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_utf_8_bom_with_apache_flag_on() {
+    test_sniff_with_flags(&PathBuf::from("text/plain/utf8bom.txt"),
+                                  "text",
+                                  "plain",
+                                  Some(("dummy", "text")),
+                                  NoSniffFlag::OFF,
+                                  ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_utf_16be_bom_with_apache_flag_on() {
+    test_sniff_with_flags(&PathBuf::from("text/plain/utf16bebom.txt"),
+                                  "text",
+                                  "plain",
+                                  Some(("dummy", "text")),
+                                  NoSniffFlag::OFF,
+                                  ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_utf_16le_bom_with_apache_flag_on() {
+    test_sniff_with_flags(&PathBuf::from("text/plain/utf16lebom.txt"),
+                                  "text",
+                                  "plain",
+                                  Some(("dummy", "text")),
+                                  NoSniffFlag::OFF,
+                                  ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_octet_stream_apache_flag_on() {
+    test_sniff_with_flags(&PathBuf::from("unknown/binary_file"),
+                                  "application",
+                                  "octet-stream",
+                                  Some(("dummy", "binary")),
+                                  NoSniffFlag::OFF,
+                                  ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_mp4_video_apache_flag_on() {
+    test_sniff_with_flags(&PathBuf::from("video/mp4/test.mp4"),
+                          "application",
+                          "octet-stream",
+                          Some(("video", "mp4")),
+                          NoSniffFlag::OFF,
+                          ApacheBugFlag::ON);
+}
+
diff --git a/tests/unit/net/parsable_mime/unknown/binary_file b/tests/unit/net/parsable_mime/unknown/binary_file
new file mode 100644
index 00000000000..ecf3bbcdf3e
--- /dev/null
+++ b/tests/unit/net/parsable_mime/unknown/binary_file
author	bors-servo <metajack+bors@gmail.com>	2015-09-18 07:22:59 -0600
committer	bors-servo <metajack+bors@gmail.com>	2015-09-18 07:22:59 -0600
commit	8a8204ffc8fa287dde2321c40d12b191b51960da (patch)
tree	f471d6cf869b72818540ff587276c7fd8b3c14bb
parent	eae3eaf97474febb2c50a7a4d392594cbc8d2da2 (diff)
parent	cc44448b0922f184b22cdb2ecc29af37496c52b0 (diff)
download	servo-8a8204ffc8fa287dde2321c40d12b191b51960da.tar.gz servo-8a8204ffc8fa287dde2321c40d12b191b51960da.zip