diff options
Diffstat (limited to 'components/script/dom/bindings/str.rs')
-rw-r--r-- | components/script/dom/bindings/str.rs | 638 |
1 files changed, 536 insertions, 102 deletions
diff --git a/components/script/dom/bindings/str.rs b/components/script/dom/bindings/str.rs index e75958d974a..0905c157143 100644 --- a/components/script/dom/bindings/str.rs +++ b/components/script/dom/bindings/str.rs @@ -1,22 +1,26 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ //! The `ByteString` struct. - -use html5ever_atoms::{LocalName, Namespace}; +use chrono::prelude::{Utc, Weekday}; +use chrono::{Datelike, TimeZone}; +use cssparser::CowRcStr; +use html5ever::{LocalName, Namespace}; +use regex::Regex; use servo_atoms::Atom; -use std::ascii::AsciiExt; use std::borrow::{Borrow, Cow, ToOwned}; +use std::default::Default; use std::fmt; use std::hash::{Hash, Hasher}; +use std::marker::PhantomData; use std::ops; use std::ops::{Deref, DerefMut}; use std::str; -use std::str::{Bytes, FromStr}; +use std::str::FromStr; /// Encapsulates the IDL `ByteString` type. -#[derive(JSTraceable, Clone, Eq, PartialEq, HeapSizeOf, Debug)] +#[derive(Clone, Debug, Default, Eq, JSTraceable, MallocSizeOf, PartialEq)] pub struct ByteString(Vec<u8>); impl ByteString { @@ -36,11 +40,6 @@ impl ByteString { self.0.len() } - /// Compare `self` to `other`, matching A–Z and a–z as equal. - pub fn eq_ignore_case(&self, other: &ByteString) -> bool { - self.0.eq_ignore_ascii_case(&other.0) - } - /// Returns `self` with A–Z replaced by a–z. pub fn to_lower(&self) -> ByteString { ByteString::new(self.0.to_ascii_lowercase()) @@ -75,9 +74,62 @@ impl ops::Deref for ByteString { /// A string that is constructed from a UCS-2 buffer by replacing invalid code /// points with the replacement character. -#[derive(Clone, HeapSizeOf)] +#[derive(Clone, Default, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)] pub struct USVString(pub String); +impl Borrow<str> for USVString { + #[inline] + fn borrow(&self) -> &str { + &self.0 + } +} + +impl Deref for USVString { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + &self.0 + } +} + +impl DerefMut for USVString { + #[inline] + fn deref_mut(&mut self) -> &mut str { + &mut self.0 + } +} + +impl AsRef<str> for USVString { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl fmt::Display for USVString { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +impl PartialEq<str> for USVString { + fn eq(&self, other: &str) -> bool { + &**self == other + } +} + +impl<'a> PartialEq<&'a str> for USVString { + fn eq(&self, other: &&'a str) -> bool { + &**self == *other + } +} + +impl From<String> for USVString { + fn from(contents: String) -> USVString { + USVString(contents) + } +} /// Returns whether `s` is a `token`, as defined by /// [RFC 2616](http://tools.ietf.org/html/rfc2616#page-17). @@ -88,96 +140,22 @@ pub fn is_token(s: &[u8]) -> bool { s.iter().all(|&x| { // http://tools.ietf.org/html/rfc2616#section-2.2 match x { - 0...31 | 127 => false, // CTLs - 40 | - 41 | - 60 | - 62 | - 64 | - 44 | - 59 | - 58 | - 92 | - 34 | - 47 | - 91 | - 93 | - 63 | - 61 | - 123 | - 125 | - 32 => false, // separators + 0..=31 | 127 => false, // CTLs + 40 | 41 | 60 | 62 | 64 | 44 | 59 | 58 | 92 | 34 | 47 | 91 | 93 | 63 | 61 | 123 | + 125 | 32 => false, // separators x if x > 127 => false, // non-CHARs _ => true, } }) } -/// Returns whether the language is matched, as defined by -/// [RFC 4647](https://tools.ietf.org/html/rfc4647#section-3.3.2). -pub fn extended_filtering(tag: &str, range: &str) -> bool { - let lang_ranges: Vec<&str> = range.split(',').collect(); - - lang_ranges.iter().any(|&lang_range| { - // step 1 - let range_subtags: Vec<&str> = lang_range.split('\x2d').collect(); - let tag_subtags: Vec<&str> = tag.split('\x2d').collect(); - - let mut range_iter = range_subtags.iter(); - let mut tag_iter = tag_subtags.iter(); - - // step 2 - // Note: [Level-4 spec](https://drafts.csswg.org/selectors/#lang-pseudo) check for wild card - if let (Some(range_subtag), Some(tag_subtag)) = (range_iter.next(), tag_iter.next()) { - if !(range_subtag.eq_ignore_ascii_case(tag_subtag) || range_subtag.eq_ignore_ascii_case("*")) { - return false; - } - } - - let mut current_tag_subtag = tag_iter.next(); - - // step 3 - for range_subtag in range_iter { - // step 3a - if range_subtag.eq_ignore_ascii_case("*") { - continue; - } - match current_tag_subtag.clone() { - Some(tag_subtag) => { - // step 3c - if range_subtag.eq_ignore_ascii_case(tag_subtag) { - current_tag_subtag = tag_iter.next(); - continue; - } else { - // step 3d - if tag_subtag.len() == 1 { - return false; - } else { - // else step 3e - continue with loop - current_tag_subtag = tag_iter.next(); - if current_tag_subtag.is_none() { - return false; - } - } - } - }, - // step 3b - None => { return false; } - } - } - // step 4 - true - }) -} - - /// A DOMString. /// /// This type corresponds to the [`DOMString`](idl) type in WebIDL. /// /// [idl]: https://heycam.github.io/webidl/#idl-DOMString /// -/// Cenceptually, a DOMString has the same value space as a JavaScript String, +/// Conceptually, a DOMString has the same value space as a JavaScript String, /// i.e., an array of 16-bit *code units* representing UTF-16, potentially with /// unpaired surrogates present (also sometimes called WTF-16). /// @@ -207,20 +185,18 @@ pub fn extended_filtering(tag: &str, range: &str) -> bool { /// /// This type is currently `!Send`, in order to help with an independent /// experiment to store `JSString`s rather than Rust `String`s. -#[derive(Clone, Debug, Eq, Hash, HeapSizeOf, Ord, PartialEq, PartialOrd)] -pub struct DOMString(String); - -impl !Send for DOMString {} +#[derive(Clone, Debug, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)] +pub struct DOMString(String, PhantomData<*const ()>); impl DOMString { /// Creates a new `DOMString`. pub fn new() -> DOMString { - DOMString(String::new()) + DOMString(String::new(), PhantomData) } /// Creates a new `DOMString` from a `String`. pub fn from_string(s: String) -> DOMString { - DOMString(s) + DOMString(s, PhantomData) } /// Appends a given string slice onto the end of this String. @@ -238,9 +214,332 @@ impl DOMString { self.0.truncate(new_len); } - /// An iterator over the bytes of this `DOMString`. - pub fn bytes(&self) -> Bytes { - self.0.bytes() + /// Removes newline characters according to <https://infra.spec.whatwg.org/#strip-newlines>. + pub fn strip_newlines(&mut self) { + self.0.retain(|c| c != '\r' && c != '\n'); + } + + /// Removes leading and trailing ASCII whitespaces according to + /// <https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace>. + pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) { + if self.0.len() == 0 { + return; + } + + let trailing_whitespace_len = self + .0 + .trim_end_matches(|ref c| char::is_ascii_whitespace(c)) + .len(); + self.0.truncate(trailing_whitespace_len); + if self.0.is_empty() { + return; + } + + let first_non_whitespace = self.0.find(|ref c| !char::is_ascii_whitespace(c)).unwrap(); + let _ = self.0.replace_range(0..first_non_whitespace, ""); + } + + /// Validates this `DOMString` is a time string according to + /// <https://html.spec.whatwg.org/multipage/#valid-time-string>. + pub fn is_valid_time_string(&self) -> bool { + enum State { + HourHigh, + HourLow09, + HourLow03, + MinuteColon, + MinuteHigh, + MinuteLow, + SecondColon, + SecondHigh, + SecondLow, + MilliStop, + MilliHigh, + MilliMiddle, + MilliLow, + Done, + Error, + } + let next_state = |valid: bool, next: State| -> State { + if valid { + next + } else { + State::Error + } + }; + + let state = self.chars().fold(State::HourHigh, |state, c| { + match state { + // Step 1 "HH" + State::HourHigh => match c { + '0' | '1' => State::HourLow09, + '2' => State::HourLow03, + _ => State::Error, + }, + State::HourLow09 => next_state(c.is_digit(10), State::MinuteColon), + State::HourLow03 => next_state(c.is_digit(4), State::MinuteColon), + + // Step 2 ":" + State::MinuteColon => next_state(c == ':', State::MinuteHigh), + + // Step 3 "mm" + State::MinuteHigh => next_state(c.is_digit(6), State::MinuteLow), + State::MinuteLow => next_state(c.is_digit(10), State::SecondColon), + + // Step 4.1 ":" + State::SecondColon => next_state(c == ':', State::SecondHigh), + // Step 4.2 "ss" + State::SecondHigh => next_state(c.is_digit(6), State::SecondLow), + State::SecondLow => next_state(c.is_digit(10), State::MilliStop), + + // Step 4.3.1 "." + State::MilliStop => next_state(c == '.', State::MilliHigh), + // Step 4.3.2 "SSS" + State::MilliHigh => next_state(c.is_digit(10), State::MilliMiddle), + State::MilliMiddle => next_state(c.is_digit(10), State::MilliLow), + State::MilliLow => next_state(c.is_digit(10), State::Done), + + _ => State::Error, + } + }); + + match state { + State::Done | + // Step 4 (optional) + State::SecondColon | + // Step 4.3 (optional) + State::MilliStop | + // Step 4.3.2 (only 1 digit required) + State::MilliMiddle | State::MilliLow => true, + _ => false + } + } + + /// A valid date string should be "YYYY-MM-DD" + /// YYYY must be four or more digits, MM and DD both must be two digits + /// https://html.spec.whatwg.org/multipage/#valid-date-string + pub fn is_valid_date_string(&self) -> bool { + self.parse_date_string().is_ok() + } + + /// https://html.spec.whatwg.org/multipage/#parse-a-date-string + pub fn parse_date_string(&self) -> Result<(i32, u32, u32), ()> { + let value = &self.0; + // Step 1, 2, 3 + let (year_int, month_int, day_int) = parse_date_component(value)?; + + // Step 4 + if value.split('-').nth(3).is_some() { + return Err(()); + } + + // Step 5, 6 + Ok((year_int, month_int, day_int)) + } + + /// https://html.spec.whatwg.org/multipage/#parse-a-time-string + pub fn parse_time_string(&self) -> Result<(u32, u32, f64), ()> { + let value = &self.0; + // Step 1, 2, 3 + let (hour_int, minute_int, second_float) = parse_time_component(value)?; + + // Step 4 + if value.split(':').nth(3).is_some() { + return Err(()); + } + + // Step 5, 6 + Ok((hour_int, minute_int, second_float)) + } + + /// A valid month string should be "YYYY-MM" + /// YYYY must be four or more digits, MM both must be two digits + /// https://html.spec.whatwg.org/multipage/#valid-month-string + pub fn is_valid_month_string(&self) -> bool { + self.parse_month_string().is_ok() + } + + /// https://html.spec.whatwg.org/multipage/#parse-a-month-string + pub fn parse_month_string(&self) -> Result<(i32, u32), ()> { + let value = &self; + // Step 1, 2, 3 + let (year_int, month_int) = parse_month_component(value)?; + + // Step 4 + if value.split("-").nth(2).is_some() { + return Err(()); + } + // Step 5 + Ok((year_int, month_int)) + } + + /// A valid week string should be like {YYYY}-W{WW}, such as "2017-W52" + /// YYYY must be four or more digits, WW both must be two digits + /// https://html.spec.whatwg.org/multipage/#valid-week-string + pub fn is_valid_week_string(&self) -> bool { + self.parse_week_string().is_ok() + } + + /// https://html.spec.whatwg.org/multipage/#parse-a-week-string + pub fn parse_week_string(&self) -> Result<(i32, u32), ()> { + let value = &self.0; + // Step 1, 2, 3 + let mut iterator = value.split('-'); + let year = iterator.next().ok_or(())?; + + // Step 4 + let year_int = year.parse::<i32>().map_err(|_| ())?; + if year.len() < 4 || year_int == 0 { + return Err(()); + } + + // Step 5, 6 + let week = iterator.next().ok_or(())?; + let (week_first, week_last) = week.split_at(1); + if week_first != "W" { + return Err(()); + } + + // Step 7 + let week_int = week_last.parse::<u32>().map_err(|_| ())?; + if week_last.len() != 2 { + return Err(()); + } + + // Step 8 + let max_week = max_week_in_year(year_int); + + // Step 9 + if week_int < 1 || week_int > max_week { + return Err(()); + } + + // Step 10 + if iterator.next().is_some() { + return Err(()); + } + + // Step 11 + Ok((year_int, week_int)) + } + + /// https://html.spec.whatwg.org/multipage/#valid-floating-point-number + pub fn is_valid_floating_point_number_string(&self) -> bool { + lazy_static! { + static ref RE: Regex = + Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap(); + } + RE.is_match(&self.0) && self.parse_floating_point_number().is_ok() + } + + /// https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values + pub fn parse_floating_point_number(&self) -> Result<f64, ()> { + // Steps 15-16 are telling us things about IEEE rounding modes + // for floating-point significands; this code assumes the Rust + // compiler already matches them in any cases where + // that actually matters. They are not + // related to f64::round(), which is for rounding to integers. + let input = &self.0; + match input.trim().parse::<f64>() { + Ok(val) + if !( + // A valid number is the same as what rust considers to be valid, + // except for +1., NaN, and Infinity. + val.is_infinite() || + val.is_nan() || + input.ends_with(".") || + input.starts_with("+") + ) => + { + Ok(val) + }, + _ => Err(()), + } + } + + /// https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number + pub fn set_best_representation_of_the_floating_point_number(&mut self) { + if let Ok(val) = self.parse_floating_point_number() { + self.0 = val.to_string(); + } + } + + /// A valid normalized local date and time string should be "{date}T{time}" + /// where date and time are both valid, and the time string must be as short as possible + /// https://html.spec.whatwg.org/multipage/#valid-normalised-local-date-and-time-string + pub fn convert_valid_normalized_local_date_and_time_string(&mut self) -> Result<(), ()> { + let ((year, month, day), (hour, minute, second)) = + self.parse_local_date_and_time_string()?; + if second == 0.0 { + self.0 = format!( + "{:04}-{:02}-{:02}T{:02}:{:02}", + year, month, day, hour, minute + ); + } else if second < 10.0 { + // we need exactly one leading zero on the seconds, + // whatever their total string length might be + self.0 = format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:0{}", + year, month, day, hour, minute, second + ); + } else { + // we need no leading zeroes on the seconds + self.0 = format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{}", + year, month, day, hour, minute, second + ); + } + Ok(()) + } + + /// https://html.spec.whatwg.org/multipage/#parse-a-local-date-and-time-string + pub fn parse_local_date_and_time_string( + &self, + ) -> Result<((i32, u32, u32), (u32, u32, f64)), ()> { + let value = &self; + // Step 1, 2, 4 + let mut iterator = if value.contains('T') { + value.split('T') + } else { + value.split(' ') + }; + + // Step 3 + let date = iterator.next().ok_or(())?; + let date_tuple = parse_date_component(date)?; + + // Step 5 + let time = iterator.next().ok_or(())?; + let time_tuple = parse_time_component(time)?; + + // Step 6 + if iterator.next().is_some() { + return Err(()); + } + + // Step 7, 8, 9 + Ok((date_tuple, time_tuple)) + } + + /// https://html.spec.whatwg.org/multipage/#valid-e-mail-address + pub fn is_valid_email_address_string(&self) -> bool { + lazy_static! { + static ref RE: Regex = Regex::new(concat!( + r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?", + r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$" + )) + .unwrap(); + } + RE.is_match(&self.0) + } + + /// https://html.spec.whatwg.org/multipage/#valid-simple-colour + pub fn is_valid_simple_color_string(&self) -> bool { + let mut chars = self.0.chars(); + if self.0.len() == 7 && chars.next() == Some('#') { + chars.all(|c| c.is_digit(16)) + } else { + false + } } } @@ -253,7 +552,7 @@ impl Borrow<str> for DOMString { impl Default for DOMString { fn default() -> Self { - DOMString(String::new()) + DOMString(String::new(), PhantomData) } } @@ -300,7 +599,7 @@ impl<'a> PartialEq<&'a str> for DOMString { impl From<String> for DOMString { fn from(contents: String) -> DOMString { - DOMString(contents) + DOMString(contents, PhantomData) } } @@ -355,8 +654,143 @@ impl<'a> Into<Cow<'a, str>> for DOMString { } } +impl<'a> Into<CowRcStr<'a>> for DOMString { + fn into(self) -> CowRcStr<'a> { + self.0.into() + } +} + impl Extend<char> for DOMString { - fn extend<I>(&mut self, iterable: I) where I: IntoIterator<Item=char> { + fn extend<I>(&mut self, iterable: I) + where + I: IntoIterator<Item = char>, + { self.0.extend(iterable) } } + +/// https://html.spec.whatwg.org/multipage/#parse-a-month-component +fn parse_month_component(value: &str) -> Result<(i32, u32), ()> { + // Step 3 + let mut iterator = value.split('-'); + let year = iterator.next().ok_or(())?; + let month = iterator.next().ok_or(())?; + + // Step 1, 2 + let year_int = year.parse::<i32>().map_err(|_| ())?; + if year.len() < 4 || year_int == 0 { + return Err(()); + } + + // Step 4, 5 + let month_int = month.parse::<u32>().map_err(|_| ())?; + if month.len() != 2 || month_int > 12 || month_int < 1 { + return Err(()); + } + + // Step 6 + Ok((year_int, month_int)) +} + +/// https://html.spec.whatwg.org/multipage/#parse-a-date-component +fn parse_date_component(value: &str) -> Result<(i32, u32, u32), ()> { + // Step 1 + let (year_int, month_int) = parse_month_component(value)?; + + // Step 3, 4 + let day = value.split('-').nth(2).ok_or(())?; + let day_int = day.parse::<u32>().map_err(|_| ())?; + if day.len() != 2 { + return Err(()); + } + + // Step 2, 5 + let max_day = max_day_in_month(year_int, month_int)?; + if day_int == 0 || day_int > max_day { + return Err(()); + } + + // Step 6 + Ok((year_int, month_int, day_int)) +} + +/// https://html.spec.whatwg.org/multipage/#parse-a-time-component +fn parse_time_component(value: &str) -> Result<(u32, u32, f64), ()> { + // Step 1 + let mut iterator = value.split(':'); + let hour = iterator.next().ok_or(())?; + if hour.len() != 2 { + return Err(()); + } + let hour_int = hour.parse::<u32>().map_err(|_| ())?; + + // Step 2 + if hour_int > 23 { + return Err(()); + } + + // Step 3, 4 + let minute = iterator.next().ok_or(())?; + if minute.len() != 2 { + return Err(()); + } + let minute_int = minute.parse::<u32>().map_err(|_| ())?; + + // Step 5 + if minute_int > 59 { + return Err(()); + } + + // Step 6, 7 + let second_float = match iterator.next() { + Some(second) => { + let mut second_iterator = second.split('.'); + if second_iterator.next().ok_or(())?.len() != 2 { + return Err(()); + } + match second_iterator.next() { + Some(second_last) => { + if second_last.len() > 3 { + return Err(()); + } + }, + None => {}, + } + + second.parse::<f64>().map_err(|_| ())? + }, + None => 0.0, + }; + + // Step 8 + Ok((hour_int, minute_int, second_float)) +} + +fn max_day_in_month(year_num: i32, month_num: u32) -> Result<u32, ()> { + match month_num { + 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31), + 4 | 6 | 9 | 11 => Ok(30), + 2 => { + if is_leap_year(year_num) { + Ok(29) + } else { + Ok(28) + } + }, + _ => Err(()), + } +} + +/// https://html.spec.whatwg.org/multipage/#week-number-of-the-last-day +fn max_week_in_year(year: i32) -> u32 { + match Utc.ymd(year as i32, 1, 1).weekday() { + Weekday::Thu => 53, + Weekday::Wed if is_leap_year(year) => 53, + _ => 52, + } +} + +#[inline] +fn is_leap_year(year: i32) -> bool { + year % 400 == 0 || (year % 4 == 0 && year % 100 != 0) +} |