aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbors-servo <metajack+bors@gmail.com>2015-09-18 07:22:59 -0600
committerbors-servo <metajack+bors@gmail.com>2015-09-18 07:22:59 -0600
commit8a8204ffc8fa287dde2321c40d12b191b51960da (patch)
treef471d6cf869b72818540ff587276c7fd8b3c14bb
parenteae3eaf97474febb2c50a7a4d392594cbc8d2da2 (diff)
parentcc44448b0922f184b22cdb2ecc29af37496c52b0 (diff)
downloadservo-8a8204ffc8fa287dde2321c40d12b191b51960da.tar.gz
servo-8a8204ffc8fa287dde2321c40d12b191b51960da.zip
Auto merge of #7447 - ddrmanxbxfr:master, r=jdm
Issue #7382 Use descriptive enums instead of booleans for MIMEClassifier::classifer Hi guys i've done a small pass of refactor in the MIMEClassifier implementation. (See issue #7382 ) - Moved the predicates to separate functions - Added a mimetype enum so we can compare them easily after calling MIMEClassifier::get_media_type I hope it follows rust good pratices (care it's my first time doing rust). Improvements and tips are welcome :). Thanks for looking at it. <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/7447) <!-- Reviewable:end -->
-rw-r--r--components/net/mime_classifier.rs127
-rw-r--r--components/net/resource_task.rs33
-rw-r--r--tests/unit/net/mime_classifier.rs99
-rw-r--r--tests/unit/net/parsable_mime/unknown/binary_filebin0 -> 1024 bytes
4 files changed, 206 insertions, 53 deletions
diff --git a/components/net/mime_classifier.rs b/components/net/mime_classifier.rs
index de1b445abf4..a9eac63c8cf 100644
--- a/components/net/mime_classifier.rs
+++ b/components/net/mime_classifier.rs
@@ -6,45 +6,58 @@ use std::borrow::ToOwned;
pub struct MIMEClassifier {
image_classifier: GroupedClassifier,
- audio_video_classifer: GroupedClassifier,
+ audio_video_classifier: GroupedClassifier,
scriptable_classifier: GroupedClassifier,
plaintext_classifier: GroupedClassifier,
- archive_classifer: GroupedClassifier,
+ archive_classifier: GroupedClassifier,
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
}
+pub enum MediaType {
+ Xml,
+ Html,
+ AudioVideo,
+ Image,
+}
+
+pub enum ApacheBugFlag {
+ ON,
+ OFF
+}
+
+#[derive(PartialEq)]
+pub enum NoSniffFlag {
+ ON,
+ OFF
+}
+
impl MIMEClassifier {
//Performs MIME Type Sniffing Algorithm (section 7)
pub fn classify(&self,
- no_sniff: bool,
- check_for_apache_bug: bool,
+ no_sniff_flag: NoSniffFlag,
+ apache_bug_flag: ApacheBugFlag,
supplied_type: &Option<(String, String)>,
data: &[u8]) -> Option<(String, String)> {
-
match *supplied_type {
- None => self.sniff_unknown_type(!no_sniff, data),
+ None => self.sniff_unknown_type(no_sniff_flag, data),
Some((ref media_type, ref media_subtype)) => {
- match (&**media_type, &**media_subtype) {
- ("unknown", "unknown") |
- ("application", "unknown") |
- ("*", "*") => self.sniff_unknown_type(!no_sniff, data),
- _ => {
- if no_sniff {
- supplied_type.clone()
- } else if check_for_apache_bug {
- self.sniff_text_or_data(data)
- } else if MIMEClassifier::is_xml(media_type, media_subtype) {
- supplied_type.clone()
- } else if MIMEClassifier::is_html(media_type, media_subtype) {
- //Implied in section 7.3, but flow is not clear
- self.feeds_classifier.classify(data).or(supplied_type.clone())
- } else {
- match (&**media_type, &**media_subtype) {
- ("image", _) => self.image_classifier.classify(data),
- ("audio", _) | ("video", _) | ("application", "ogg") =>
- self.audio_video_classifer.classify(data),
- _ => None
+ if MIMEClassifier::is_explicit_unknown(media_type, media_subtype) {
+ self.sniff_unknown_type(no_sniff_flag, data)
+ } else {
+ match no_sniff_flag {
+ NoSniffFlag::ON => supplied_type.clone(),
+ NoSniffFlag::OFF => match apache_bug_flag {
+ ApacheBugFlag::ON => self.sniff_text_or_data(data),
+ ApacheBugFlag::OFF => match MIMEClassifier::get_media_type(media_type,
+ media_subtype) {
+ Some(MediaType::Xml) => supplied_type.clone(),
+ Some(MediaType::Html) =>
+ //Implied in section 7.3, but flow is not clear
+ self.feeds_classifier.classify(data).or(supplied_type.clone()),
+ Some(MediaType::Image) => self.image_classifier.classify(data),
+ Some(MediaType::AudioVideo) => self.audio_video_classifier.classify(data),
+ None => None
}.or(supplied_type.clone())
}
}
@@ -56,26 +69,30 @@ impl MIMEClassifier {
pub fn new() -> MIMEClassifier {
MIMEClassifier {
image_classifier: GroupedClassifier::image_classifer(),
- audio_video_classifer: GroupedClassifier::audio_video_classifer(),
+ audio_video_classifier: GroupedClassifier::audio_video_classifier(),
scriptable_classifier: GroupedClassifier::scriptable_classifier(),
plaintext_classifier: GroupedClassifier::plaintext_classifier(),
- archive_classifer: GroupedClassifier::archive_classifier(),
+ archive_classifier: GroupedClassifier::archive_classifier(),
binary_or_plaintext: BinaryOrPlaintextClassifier,
feeds_classifier: FeedsClassifier
}
}
+
//some sort of iterator over the classifiers might be better?
- fn sniff_unknown_type(&self, sniff_scriptable: bool, data: &[u8]) ->
+ fn sniff_unknown_type(&self, no_sniff_flag: NoSniffFlag, data: &[u8]) ->
Option<(String, String)> {
- if sniff_scriptable {
+ let should_sniff_scriptable = no_sniff_flag == NoSniffFlag::OFF;
+ let sniffed = if should_sniff_scriptable {
self.scriptable_classifier.classify(data)
} else {
None
- }.or_else(|| self.plaintext_classifier.classify(data))
- .or_else(|| self.image_classifier.classify(data))
- .or_else(|| self.audio_video_classifer.classify(data))
- .or_else(|| self.archive_classifer.classify(data))
- .or_else(|| self.binary_or_plaintext.classify(data))
+ };
+
+ sniffed.or_else(|| self.plaintext_classifier.classify(data))
+ .or_else(|| self.image_classifier.classify(data))
+ .or_else(|| self.audio_video_classifier.classify(data))
+ .or_else(|| self.archive_classifier.classify(data))
+ .or_else(|| self.binary_or_plaintext.classify(data))
}
fn sniff_text_or_data(&self, data: &[u8]) -> Option<(String, String)> {
@@ -93,6 +110,40 @@ impl MIMEClassifier {
fn is_html(tp: &str, sub_tp: &str) -> bool {
tp == "text" && sub_tp == "html"
}
+
+ fn is_image(tp: &str) -> bool {
+ tp == "image"
+ }
+
+ fn is_audio_video(tp: &str, sub_tp: &str) -> bool {
+ tp == "audio" ||
+ tp == "video" ||
+ (tp == "application" && sub_tp == "ogg")
+ }
+
+ fn is_explicit_unknown(tp: &str, sub_tp: &str) -> bool {
+ match(tp, sub_tp) {
+ ("unknown", "unknown") |
+ ("application", "unknown") |
+ ("*", "*") => true,
+ _ => false
+ }
+ }
+
+ fn get_media_type(media_type: &String,
+ media_subtype: &String) -> Option<MediaType> {
+ if MIMEClassifier::is_xml(media_type, media_subtype) {
+ Some(MediaType::Xml)
+ } else if MIMEClassifier::is_html(media_type, media_subtype) {
+ Some(MediaType::Html)
+ } else if MIMEClassifier::is_image(media_type) {
+ Some(MediaType::Image)
+ } else if MIMEClassifier::is_audio_video(media_type, media_subtype) {
+ Some(MediaType::AudioVideo)
+ } else {
+ None
+ }
+ }
}
pub fn as_string_option(tup: Option<(&'static str, &'static str)>) -> Option<(String, String)> {
@@ -227,8 +278,8 @@ struct BinaryOrPlaintextClassifier;
impl BinaryOrPlaintextClassifier {
fn classify_impl(&self, data: &[u8]) -> (&'static str, &'static str) {
- if data == &[0xFFu8, 0xFEu8] ||
- data == &[0xFEu8, 0xFFu8] ||
+ if data.starts_with(&[0xFFu8, 0xFEu8]) ||
+ data.starts_with(&[0xFEu8, 0xFFu8]) ||
data.starts_with(&[0xEFu8, 0xBBu8, 0xBFu8])
{
("text", "plain")
@@ -265,7 +316,7 @@ impl GroupedClassifier {
]
}
}
- fn audio_video_classifer() -> GroupedClassifier {
+ fn audio_video_classifier() -> GroupedClassifier {
GroupedClassifier {
byte_matchers: vec![
box ByteMatcher::video_webm(),
diff --git a/components/net/resource_task.rs b/components/net/resource_task.rs
index bf3d99c9f8b..1da1cd49a7b 100644
--- a/components/net/resource_task.rs
+++ b/components/net/resource_task.rs
@@ -10,8 +10,7 @@ use cookie_storage::CookieStorage;
use data_loader;
use file_loader;
use http_loader::{self, create_http_connector, Connector};
-use mime_classifier::MIMEClassifier;
-
+use mime_classifier::{ApacheBugFlag, MIMEClassifier, NoSniffFlag};
use net_traits::ProgressMsg::Done;
use net_traits::{ControlMsg, LoadData, LoadResponse, LoadConsumer, CookieSource};
use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, ResponseAction};
@@ -29,7 +28,9 @@ use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
use std::borrow::ToOwned;
use std::boxed::FnBox;
-use std::sync::Arc;
+
+use std::sync::{Arc};
+
use std::sync::mpsc::{channel, Sender};
pub enum ProgressSender {
@@ -72,21 +73,20 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
-> Result<ProgressSender, ()> {
if opts::get().sniff_mime_types {
// TODO: should be calculated in the resource loader, from pull requeset #4094
- let mut nosniff = false;
- let mut check_for_apache_bug = false;
+ let mut no_sniff = NoSniffFlag::OFF;
+ let mut check_for_apache_bug = ApacheBugFlag::OFF;
if let Some(ref headers) = metadata.headers {
if let Some(ref raw_content_type) = headers.get_raw("content-type") {
if raw_content_type.len() > 0 {
let ref last_raw_content_type = raw_content_type[raw_content_type.len() - 1];
- check_for_apache_bug = last_raw_content_type == b"text/plain"
- || last_raw_content_type == b"text/plain; charset=ISO-8859-1"
- || last_raw_content_type == b"text/plain; charset=iso-8859-1"
- || last_raw_content_type == b"text/plain; charset=UTF-8";
+ check_for_apache_bug = apache_bug_predicate(last_raw_content_type)
}
}
if let Some(ref raw_content_type_options) = headers.get_raw("X-content-type-options") {
- nosniff = raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff");
+ if raw_content_type_options.iter().any(|ref opt| *opt == b"nosniff") {
+ no_sniff = NoSniffFlag::ON
+ }
}
}
@@ -94,7 +94,7 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
metadata.content_type.map(|ContentType(Mime(toplevel, sublevel, _))| {
(format!("{}", toplevel), format!("{}", sublevel))
});
- metadata.content_type = classifier.classify(nosniff, check_for_apache_bug, &supplied_type,
+ metadata.content_type = classifier.classify(no_sniff, check_for_apache_bug, &supplied_type,
&partial_body).map(|(toplevel, sublevel)| {
let mime_tp: TopLevel = toplevel.parse().unwrap();
let mime_sb: SubLevel = sublevel.parse().unwrap();
@@ -106,6 +106,17 @@ pub fn start_sending_sniffed_opt(start_chan: LoadConsumer, mut metadata: Metadat
start_sending_opt(start_chan, metadata)
}
+fn apache_bug_predicate(last_raw_content_type: &[u8]) -> ApacheBugFlag {
+ if last_raw_content_type == b"text/plain"
+ || last_raw_content_type == b"text/plain; charset=ISO-8859-1"
+ || last_raw_content_type == b"text/plain; charset=iso-8859-1"
+ || last_raw_content_type == b"text/plain; charset=UTF-8" {
+ ApacheBugFlag::ON
+ } else {
+ ApacheBugFlag::OFF
+ }
+}
+
/// For use by loaders in responding to a Load message.
pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result<ProgressSender, ()> {
match start_chan {
diff --git a/tests/unit/net/mime_classifier.rs b/tests/unit/net/mime_classifier.rs
index 2c62878872e..b8538dc9ab8 100644
--- a/tests/unit/net/mime_classifier.rs
+++ b/tests/unit/net/mime_classifier.rs
@@ -3,7 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
use net::mime_classifier::as_string_option;
-use net::mime_classifier::{Mp4Matcher, MIMEClassifier};
+use net::mime_classifier::{Mp4Matcher, MIMEClassifier, ApacheBugFlag, NoSniffFlag};
use std::env;
use std::fs::File;
use std::io::{self, Read};
@@ -37,8 +37,12 @@ fn test_sniff_mp4_matcher() {
}
#[cfg(test)]
-fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
- supplied_type: Option<(&'static str, &'static str)>) {
+fn test_sniff_with_flags(filename_orig: &path::Path,
+ type_string: &str,
+ subtype_string: &str,
+ supplied_type: Option<(&'static str, &'static str)>,
+ no_sniff_flag: NoSniffFlag,
+ apache_bug_flag: ApacheBugFlag) {
let current_working_directory = env::current_dir().unwrap();
println!("The current directory is {}", current_working_directory.display());
@@ -51,7 +55,7 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
match read_result {
Ok(data) => {
- match classifier.classify(false, false, &as_string_option(supplied_type), &data) {
+ match classifier.classify(no_sniff_flag, apache_bug_flag, &as_string_option(supplied_type), &data) {
Some((parsed_type, parsed_subtp)) => {
if (&parsed_type[..] != type_string) ||
(&parsed_subtp[..] != subtype_string) {
@@ -70,6 +74,17 @@ fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string
}
#[cfg(test)]
+fn test_sniff_full(filename_orig: &path::Path, type_string: &str, subtype_string: &str,
+ supplied_type: Option<(&'static str, &'static str)>) {
+ test_sniff_with_flags(filename_orig,
+ type_string,
+ subtype_string,
+ supplied_type,
+ NoSniffFlag::OFF,
+ ApacheBugFlag::OFF)
+}
+
+#[cfg(test)]
fn test_sniff_classification(file: &str, type_string: &str, subtype_string: &str,
supplied_type: Option<(&'static str, &'static str)>) {
let mut x = PathBuf::from("./");
@@ -448,3 +463,79 @@ fn test_sniff_rss_feed() {
fn test_sniff_atom_feed() {
test_sniff_full(&PathBuf::from("text/xml/feed.atom"), "application", "atom+xml", Some(("text", "html")));
}
+
+#[test]
+fn test_sniff_binary_file() {
+ test_sniff_full(&PathBuf::from("unknown/binary_file"), "application", "octet-stream", None);
+}
+
+#[test]
+fn test_sniff_atom_feed_with_no_sniff_flag_on() {
+ test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
+ "text",
+ "html",
+ Some(("text", "html")),
+ NoSniffFlag::ON,
+ ApacheBugFlag::OFF);
+}
+
+#[test]
+fn test_sniff_with_no_sniff_flag_on_and_apache_flag_on() {
+ test_sniff_with_flags(&PathBuf::from("text/xml/feed.atom"),
+ "text",
+ "html",
+ Some(("text", "html")),
+ NoSniffFlag::ON,
+ ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_utf_8_bom_with_apache_flag_on() {
+ test_sniff_with_flags(&PathBuf::from("text/plain/utf8bom.txt"),
+ "text",
+ "plain",
+ Some(("dummy", "text")),
+ NoSniffFlag::OFF,
+ ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_utf_16be_bom_with_apache_flag_on() {
+ test_sniff_with_flags(&PathBuf::from("text/plain/utf16bebom.txt"),
+ "text",
+ "plain",
+ Some(("dummy", "text")),
+ NoSniffFlag::OFF,
+ ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_utf_16le_bom_with_apache_flag_on() {
+ test_sniff_with_flags(&PathBuf::from("text/plain/utf16lebom.txt"),
+ "text",
+ "plain",
+ Some(("dummy", "text")),
+ NoSniffFlag::OFF,
+ ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_octet_stream_apache_flag_on() {
+ test_sniff_with_flags(&PathBuf::from("unknown/binary_file"),
+ "application",
+ "octet-stream",
+ Some(("dummy", "binary")),
+ NoSniffFlag::OFF,
+ ApacheBugFlag::ON);
+}
+
+#[test]
+fn test_sniff_mp4_video_apache_flag_on() {
+ test_sniff_with_flags(&PathBuf::from("video/mp4/test.mp4"),
+ "application",
+ "octet-stream",
+ Some(("video", "mp4")),
+ NoSniffFlag::OFF,
+ ApacheBugFlag::ON);
+}
+
diff --git a/tests/unit/net/parsable_mime/unknown/binary_file b/tests/unit/net/parsable_mime/unknown/binary_file
new file mode 100644
index 00000000000..ecf3bbcdf3e
--- /dev/null
+++ b/tests/unit/net/parsable_mime/unknown/binary_file
Binary files differ