diff options
author | Simon Sapin <simon.sapin@exyr.org> | 2017-05-18 00:27:49 +0200 |
---|---|---|
committer | Simon Sapin <simon.sapin@exyr.org> | 2017-05-18 17:13:15 +0200 |
commit | 94b4a32c1894e6e95776c350b4cbc3645837af64 (patch) | |
tree | abdba2be3491f9303741f2d061f646a5d0b3c7e1 | |
parent | c5e37f3d2cdcf8b53d4b5f5876d2091e5676efed (diff) | |
download | servo-94b4a32c1894e6e95776c350b4cbc3645837af64.tar.gz servo-94b4a32c1894e6e95776c350b4cbc3645837af64.zip |
Make some attr values case-insensitive in selectors
https://bugzilla.mozilla.org/show_bug.cgi?id=1363531
-rw-r--r-- | Cargo.lock | 2 | ||||
-rw-r--r-- | components/selectors/Cargo.toml | 5 | ||||
-rw-r--r-- | components/selectors/attr.rs | 21 | ||||
-rw-r--r-- | components/selectors/build.rs | 75 | ||||
-rw-r--r-- | components/selectors/lib.rs | 1 | ||||
-rw-r--r-- | components/selectors/matching.rs | 25 | ||||
-rw-r--r-- | components/selectors/parser.rs | 50 | ||||
-rw-r--r-- | components/style/gecko/snapshot.rs | 4 | ||||
-rw-r--r-- | components/style/gecko/wrapper.rs | 4 | ||||
-rw-r--r-- | python/tidy/servo_tidy/tidy.py | 10 |
10 files changed, 167 insertions, 30 deletions
diff --git a/Cargo.lock b/Cargo.lock index b21ff19f356..f9acb8824b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2457,6 +2457,8 @@ dependencies = [ "cssparser 0.13.5 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "matches 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "phf 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)", + "phf_codegen 0.7.21 (registry+https://github.com/rust-lang/crates.io-index)", "precomputed-hash 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "size_of_test 0.0.1", "smallvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/components/selectors/Cargo.toml b/components/selectors/Cargo.toml index ceef176eae1..4ca9a240c67 100644 --- a/components/selectors/Cargo.toml +++ b/components/selectors/Cargo.toml @@ -10,6 +10,7 @@ repository = "https://github.com/servo/servo" readme = "README.md" keywords = ["css", "selectors"] license = "MPL-2.0" +build = "build.rs" [lib] name = "selectors" @@ -25,8 +26,12 @@ bitflags = "0.7" matches = "0.1" cssparser = "0.13.3" fnv = "1.0" +phf = "0.7.18" precomputed-hash = "0.1" smallvec = "0.3" [dev-dependencies] size_of_test = {path = "../size_of_test"} + +[build-dependencies] +phf_codegen = "0.7.18" diff --git a/components/selectors/attr.rs b/components/selectors/attr.rs index f18219b2204..e8ab10ce02f 100644 --- a/components/selectors/attr.rs +++ b/components/selectors/attr.rs @@ -119,13 +119,29 @@ pub static SELECTOR_WHITESPACE: &'static [char] = &[' ', '\t', '\n', '\r', '\x0C pub enum CaseSensitivity { CaseSensitive, // Selectors spec says language-defined, but HTML says sensitive. AsciiCaseInsensitive, + AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument, } impl CaseSensitivity { + pub fn to_definite(self, is_html_element_in_html_document: bool) -> Self { + if let CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument = self { + if is_html_element_in_html_document { + CaseSensitivity::AsciiCaseInsensitive + } else { + CaseSensitivity::CaseSensitive + } + } else { + self + } + } + pub fn eq(self, a: &[u8], b: &[u8]) -> bool { match self { CaseSensitivity::CaseSensitive => a == b, - CaseSensitivity::AsciiCaseInsensitive => a.eq_ignore_ascii_case(b) + CaseSensitivity::AsciiCaseInsensitive => a.eq_ignore_ascii_case(b), + CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => { + unreachable!("matching.rs should have called case_sensitivity.to_definite()"); + } } } @@ -152,6 +168,9 @@ impl CaseSensitivity { true } } + CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => { + unreachable!("matching.rs should have called case_sensitivity.to_definite()"); + } } } } diff --git a/components/selectors/build.rs b/components/selectors/build.rs new file mode 100644 index 00000000000..0d0a40256f3 --- /dev/null +++ b/components/selectors/build.rs @@ -0,0 +1,75 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +extern crate phf_codegen; + +use std::env; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +fn main() { + let path = Path::new(&env::var("OUT_DIR").unwrap()) + .join("ascii_case_insensitive_html_attributes.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + + write!(&mut file, "{{ static SET: ::phf::Set<&'static str> = ", + ).unwrap(); + let mut set = phf_codegen::Set::new(); + for name in ASCII_CASE_INSENSITIVE_HTML_ATTRIBUTES.split_whitespace() { + set.entry(name); + } + set.build(&mut file).unwrap(); + write!(&mut file, "; &SET }}").unwrap(); +} + +/// https://html.spec.whatwg.org/multipage/#selectors +static ASCII_CASE_INSENSITIVE_HTML_ATTRIBUTES: &'static str = r#" + accept + accept-charset + align + alink + axis + bgcolor + charset + checked + clear + codetype + color + compact + declare + defer + dir + direction + disabled + enctype + face + frame + hreflang + http-equiv + lang + language + link + media + method + multiple + nohref + noresize + noshade + nowrap + readonly + rel + rev + rules + scope + scrolling + selected + shape + target + text + type + valign + valuetype + vlink +"#; diff --git a/components/selectors/lib.rs b/components/selectors/lib.rs index 5e7b4c99ce8..3e413c0c08b 100644 --- a/components/selectors/lib.rs +++ b/components/selectors/lib.rs @@ -6,6 +6,7 @@ #[macro_use] extern crate cssparser; #[macro_use] extern crate matches; extern crate fnv; +extern crate phf; extern crate precomputed_hash; #[cfg(test)] #[macro_use] extern crate size_of_test; extern crate smallvec; diff --git a/components/selectors/matching.rs b/components/selectors/matching.rs index a11708818b3..2ed6a436687 100644 --- a/components/selectors/matching.rs +++ b/components/selectors/matching.rs @@ -5,7 +5,7 @@ use attr::{AttrSelectorOperation, NamespaceConstraint}; use bloom::BloomFilter; use parser::{Combinator, ComplexSelector, Component, LocalName}; -use parser::{Selector, SelectorInner, SelectorIter, SelectorImpl}; +use parser::{Selector, SelectorInner, SelectorIter}; use std::borrow::Borrow; use tree::Element; @@ -387,7 +387,8 @@ fn matches_simple_selector<E, F>( element.match_pseudo_element(pseudo, context) } Component::LocalName(LocalName { ref name, ref lower_name }) => { - element.get_local_name() == select_name(element, name, lower_name).borrow() + let is_html = element.is_html_element_in_html_document(); + element.get_local_name() == select_name(is_html, name, lower_name).borrow() } Component::ExplicitUniversalType | Component::ExplicitAnyNamespace => { @@ -410,9 +411,10 @@ fn matches_simple_selector<E, F>( element.has_class(class) } Component::AttributeInNoNamespaceExists { ref local_name, ref local_name_lower } => { + let is_html = element.is_html_element_in_html_document(); element.attr_matches( &NamespaceConstraint::Specific(&::parser::namespace_empty_string::<E::Impl>()), - select_name(element, local_name, local_name_lower), + select_name(is_html, local_name, local_name_lower), &AttrSelectorOperation::Exists ) } @@ -427,12 +429,13 @@ fn matches_simple_selector<E, F>( if never_matches { false } else { + let is_html = element.is_html_element_in_html_document(); element.attr_matches( &NamespaceConstraint::Specific(&::parser::namespace_empty_string::<E::Impl>()), - select_name(element, local_name, local_name_lower), + select_name(is_html, local_name, local_name_lower), &AttrSelectorOperation::WithValue { operator: operator, - case_sensitivity: case_sensitivity, + case_sensitivity: case_sensitivity.to_definite(is_html), expected_value: value, } ) @@ -442,9 +445,10 @@ fn matches_simple_selector<E, F>( if attr_sel.never_matches { return false } else { + let is_html = element.is_html_element_in_html_document(); element.attr_matches( &attr_sel.namespace(), - select_name(element, &attr_sel.local_name, &attr_sel.local_name_lower), + select_name(is_html, &attr_sel.local_name, &attr_sel.local_name_lower), &match attr_sel.operation { AttrSelectorOperation::Exists => AttrSelectorOperation::Exists, AttrSelectorOperation::WithValue { @@ -454,7 +458,7 @@ fn matches_simple_selector<E, F>( } => { AttrSelectorOperation::WithValue { operator: operator, - case_sensitivity: case_sensitivity, + case_sensitivity: case_sensitivity.to_definite(is_html), expected_value: expected_value, } } @@ -512,11 +516,8 @@ fn matches_simple_selector<E, F>( } } -fn select_name<'a, E>(element: &E, local_name: &'a <E::Impl as SelectorImpl>::LocalName, - local_name_lower: &'a <E::Impl as SelectorImpl>::LocalName) - -> &'a <E::Impl as SelectorImpl>::LocalName -where E: Element { - if element.is_html_element_in_html_document() { +fn select_name<'a, T>(is_html: bool, local_name: &'a T, local_name_lower: &'a T) -> &'a T { + if is_html { local_name_lower } else { local_name diff --git a/components/selectors/parser.rs b/components/selectors/parser.rs index befbfd084cc..915357887fb 100644 --- a/components/selectors/parser.rs +++ b/components/selectors/parser.rs @@ -33,6 +33,16 @@ pub trait PseudoElement : Sized + ToCss { } } +fn to_ascii_lowercase(s: &str) -> Cow<str> { + if let Some(first_uppercase) = s.bytes().position(|byte| byte >= b'A' && byte <= b'Z') { + let mut string = s.to_owned(); + string[first_uppercase..].make_ascii_lowercase(); + string.into() + } else { + s.into() + } +} + macro_rules! with_all_bounds { ( [ $( $InSelector: tt )* ] @@ -46,16 +56,6 @@ macro_rules! with_all_bounds { } } - fn from_ascii_lowercase<T>(s: &str) -> T where T: $($FromStr)* { - if let Some(first_uppercase) = s.bytes().position(|byte| byte >= b'A' && byte <= b'Z') { - let mut string = s.to_owned(); - string[first_uppercase..].make_ascii_lowercase(); - T::from(string) - } else { - T::from(s) - } - } - /// This trait allows to define the parser implementation in regards /// of pseudo-classes/elements /// @@ -735,7 +735,8 @@ impl<Impl: SelectorImpl> ToCss for Component<Impl> { write!(CssStringWriter::new(dest), "{}", value)?; dest.write_char('"')?; match case_sensitivity { - CaseSensitivity::CaseSensitive => {}, + CaseSensitivity::CaseSensitive | + CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {}, CaseSensitivity::AsciiCaseInsensitive => dest.write_str(" i")?, } dest.write_char(']') @@ -790,7 +791,8 @@ impl<Impl: SelectorImpl> ToCss for AttrSelectorWithNamespace<Impl> { write!(CssStringWriter::new(dest), "{}", expected_value)?; dest.write_char('"')?; match case_sensitivity { - CaseSensitivity::CaseSensitive => {}, + CaseSensitivity::CaseSensitive | + CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => {}, CaseSensitivity::AsciiCaseInsensitive => dest.write_str(" i")?, } }, @@ -1078,7 +1080,7 @@ fn parse_type_selector<P, Impl>(parser: &P, input: &mut CssParser, sequence: &mu match local_name { Some(name) => { sequence.push(Component::LocalName(LocalName { - lower_name: from_ascii_lowercase(&name), + lower_name: from_cow_str(to_ascii_lowercase(&name)), name: from_cow_str(name), })) } @@ -1190,13 +1192,11 @@ fn parse_attribute_selector<P, Impl>(parser: &P, input: &mut CssParser) { let namespace; let local_name; - let local_name_lower; match parse_qualified_name(parser, input, /* in_attr_selector = */ true)? { None => return Err(()), Some((_, None)) => unreachable!(), Some((ns, Some(ln))) => { - local_name_lower = from_ascii_lowercase(&ln); - local_name = from_cow_str(ln); + local_name = ln; namespace = match ns { QNamePrefix::ImplicitNoNamespace | QNamePrefix::ExplicitNoNamespace => { @@ -1222,6 +1222,8 @@ fn parse_attribute_selector<P, Impl>(parser: &P, input: &mut CssParser) match input.next() { // [foo] Err(()) => { + let local_name_lower = from_cow_str(to_ascii_lowercase(&local_name)); + let local_name = from_cow_str(local_name); if let Some(namespace) = namespace { return Ok(Component::AttributeOther(Box::new(AttrSelectorWithNamespace { namespace: namespace, @@ -1277,8 +1279,22 @@ fn parse_attribute_selector<P, Impl>(parser: &P, input: &mut CssParser) _ => return Err(()) } - let case_sensitivity = parse_attribute_flags(input)?; + let mut case_sensitivity = parse_attribute_flags(input)?; + let value = from_cow_str(value); + let local_name_lower; + { + let local_name_lower_cow = to_ascii_lowercase(&local_name); + if let CaseSensitivity::CaseSensitive = case_sensitivity { + if include!(concat!(env!("OUT_DIR"), "/ascii_case_insensitive_html_attributes.rs")) + .contains(&*local_name_lower_cow) + { + case_sensitivity = CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument + } + } + local_name_lower = from_cow_str(local_name_lower_cow); + } + let local_name = from_cow_str(local_name); if let Some(namespace) = namespace { Ok(Component::AttributeOther(Box::new(AttrSelectorWithNamespace { namespace: namespace, diff --git a/components/style/gecko/snapshot.rs b/components/style/gecko/snapshot.rs index 3bc1a946c5e..3814eff5810 100644 --- a/components/style/gecko/snapshot.rs +++ b/components/style/gecko/snapshot.rs @@ -72,6 +72,10 @@ impl GeckoElementSnapshot { let ignore_case = match case_sensitivity { CaseSensitivity::CaseSensitive => false, CaseSensitivity::AsciiCaseInsensitive => true, + CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => { + unreachable!("selectors/matching.rs should have \ + called case_sensitivity.to_definite()"); + } }; // FIXME: case sensitivity for operators other than Equal match operator { diff --git a/components/style/gecko/wrapper.rs b/components/style/gecko/wrapper.rs index 3d8ab270658..3df3381d804 100644 --- a/components/style/gecko/wrapper.rs +++ b/components/style/gecko/wrapper.rs @@ -1162,6 +1162,10 @@ impl<'le> ::selectors::Element for GeckoElement<'le> { let ignore_case = match case_sensitivity { CaseSensitivity::CaseSensitive => false, CaseSensitivity::AsciiCaseInsensitive => true, + CaseSensitivity::AsciiCaseInsensitiveIfInHtmlElementInHtmlDocument => { + unreachable!("selectors/matching.rs should have \ + called case_sensitivity.to_definite()"); + } }; // FIXME: case sensitivity for operators other than Equal match operator { diff --git a/python/tidy/servo_tidy/tidy.py b/python/tidy/servo_tidy/tidy.py index b8229e67d96..51e7e69f8d4 100644 --- a/python/tidy/servo_tidy/tidy.py +++ b/python/tidy/servo_tidy/tidy.py @@ -447,6 +447,7 @@ def check_rust(file_name, lines): prev_use = None prev_open_brace = False + multi_line_string = False current_indent = 0 prev_crate = {} prev_mod = {} @@ -464,6 +465,15 @@ def check_rust(file_name, lines): prev_indent = indent indent = len(original_line) - len(line) + # Hack for components/selectors/build.rs + if multi_line_string: + if line.startswith('"#'): + multi_line_string = False + else: + continue + if line.endswith('r#"'): + multi_line_string = True + is_attribute = re.search(r"#\[.*\]", line) is_comment = re.search(r"^//|^/\*|^\*", line) |