aboutsummaryrefslogtreecommitdiffstats
path: root/components/script/dom/bindings/str.rs
diff options
context:
space:
mode:
Diffstat (limited to 'components/script/dom/bindings/str.rs')
-rw-r--r--components/script/dom/bindings/str.rs638
1 files changed, 536 insertions, 102 deletions
diff --git a/components/script/dom/bindings/str.rs b/components/script/dom/bindings/str.rs
index e75958d974a..0905c157143 100644
--- a/components/script/dom/bindings/str.rs
+++ b/components/script/dom/bindings/str.rs
@@ -1,22 +1,26 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
//! The `ByteString` struct.
-
-use html5ever_atoms::{LocalName, Namespace};
+use chrono::prelude::{Utc, Weekday};
+use chrono::{Datelike, TimeZone};
+use cssparser::CowRcStr;
+use html5ever::{LocalName, Namespace};
+use regex::Regex;
use servo_atoms::Atom;
-use std::ascii::AsciiExt;
use std::borrow::{Borrow, Cow, ToOwned};
+use std::default::Default;
use std::fmt;
use std::hash::{Hash, Hasher};
+use std::marker::PhantomData;
use std::ops;
use std::ops::{Deref, DerefMut};
use std::str;
-use std::str::{Bytes, FromStr};
+use std::str::FromStr;
/// Encapsulates the IDL `ByteString` type.
-#[derive(JSTraceable, Clone, Eq, PartialEq, HeapSizeOf, Debug)]
+#[derive(Clone, Debug, Default, Eq, JSTraceable, MallocSizeOf, PartialEq)]
pub struct ByteString(Vec<u8>);
impl ByteString {
@@ -36,11 +40,6 @@ impl ByteString {
self.0.len()
}
- /// Compare `self` to `other`, matching A–Z and a–z as equal.
- pub fn eq_ignore_case(&self, other: &ByteString) -> bool {
- self.0.eq_ignore_ascii_case(&other.0)
- }
-
/// Returns `self` with A–Z replaced by a–z.
pub fn to_lower(&self) -> ByteString {
ByteString::new(self.0.to_ascii_lowercase())
@@ -75,9 +74,62 @@ impl ops::Deref for ByteString {
/// A string that is constructed from a UCS-2 buffer by replacing invalid code
/// points with the replacement character.
-#[derive(Clone, HeapSizeOf)]
+#[derive(Clone, Default, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)]
pub struct USVString(pub String);
+impl Borrow<str> for USVString {
+ #[inline]
+ fn borrow(&self) -> &str {
+ &self.0
+ }
+}
+
+impl Deref for USVString {
+ type Target = str;
+
+ #[inline]
+ fn deref(&self) -> &str {
+ &self.0
+ }
+}
+
+impl DerefMut for USVString {
+ #[inline]
+ fn deref_mut(&mut self) -> &mut str {
+ &mut self.0
+ }
+}
+
+impl AsRef<str> for USVString {
+ fn as_ref(&self) -> &str {
+ &self.0
+ }
+}
+
+impl fmt::Display for USVString {
+ #[inline]
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ fmt::Display::fmt(&**self, f)
+ }
+}
+
+impl PartialEq<str> for USVString {
+ fn eq(&self, other: &str) -> bool {
+ &**self == other
+ }
+}
+
+impl<'a> PartialEq<&'a str> for USVString {
+ fn eq(&self, other: &&'a str) -> bool {
+ &**self == *other
+ }
+}
+
+impl From<String> for USVString {
+ fn from(contents: String) -> USVString {
+ USVString(contents)
+ }
+}
/// Returns whether `s` is a `token`, as defined by
/// [RFC 2616](http://tools.ietf.org/html/rfc2616#page-17).
@@ -88,96 +140,22 @@ pub fn is_token(s: &[u8]) -> bool {
s.iter().all(|&x| {
// http://tools.ietf.org/html/rfc2616#section-2.2
match x {
- 0...31 | 127 => false, // CTLs
- 40 |
- 41 |
- 60 |
- 62 |
- 64 |
- 44 |
- 59 |
- 58 |
- 92 |
- 34 |
- 47 |
- 91 |
- 93 |
- 63 |
- 61 |
- 123 |
- 125 |
- 32 => false, // separators
+ 0..=31 | 127 => false, // CTLs
+ 40 | 41 | 60 | 62 | 64 | 44 | 59 | 58 | 92 | 34 | 47 | 91 | 93 | 63 | 61 | 123 |
+ 125 | 32 => false, // separators
x if x > 127 => false, // non-CHARs
_ => true,
}
})
}
-/// Returns whether the language is matched, as defined by
-/// [RFC 4647](https://tools.ietf.org/html/rfc4647#section-3.3.2).
-pub fn extended_filtering(tag: &str, range: &str) -> bool {
- let lang_ranges: Vec<&str> = range.split(',').collect();
-
- lang_ranges.iter().any(|&lang_range| {
- // step 1
- let range_subtags: Vec<&str> = lang_range.split('\x2d').collect();
- let tag_subtags: Vec<&str> = tag.split('\x2d').collect();
-
- let mut range_iter = range_subtags.iter();
- let mut tag_iter = tag_subtags.iter();
-
- // step 2
- // Note: [Level-4 spec](https://drafts.csswg.org/selectors/#lang-pseudo) check for wild card
- if let (Some(range_subtag), Some(tag_subtag)) = (range_iter.next(), tag_iter.next()) {
- if !(range_subtag.eq_ignore_ascii_case(tag_subtag) || range_subtag.eq_ignore_ascii_case("*")) {
- return false;
- }
- }
-
- let mut current_tag_subtag = tag_iter.next();
-
- // step 3
- for range_subtag in range_iter {
- // step 3a
- if range_subtag.eq_ignore_ascii_case("*") {
- continue;
- }
- match current_tag_subtag.clone() {
- Some(tag_subtag) => {
- // step 3c
- if range_subtag.eq_ignore_ascii_case(tag_subtag) {
- current_tag_subtag = tag_iter.next();
- continue;
- } else {
- // step 3d
- if tag_subtag.len() == 1 {
- return false;
- } else {
- // else step 3e - continue with loop
- current_tag_subtag = tag_iter.next();
- if current_tag_subtag.is_none() {
- return false;
- }
- }
- }
- },
- // step 3b
- None => { return false; }
- }
- }
- // step 4
- true
- })
-}
-
-
/// A DOMString.
///
/// This type corresponds to the [`DOMString`](idl) type in WebIDL.
///
/// [idl]: https://heycam.github.io/webidl/#idl-DOMString
///
-/// Cenceptually, a DOMString has the same value space as a JavaScript String,
+/// Conceptually, a DOMString has the same value space as a JavaScript String,
/// i.e., an array of 16-bit *code units* representing UTF-16, potentially with
/// unpaired surrogates present (also sometimes called WTF-16).
///
@@ -207,20 +185,18 @@ pub fn extended_filtering(tag: &str, range: &str) -> bool {
///
/// This type is currently `!Send`, in order to help with an independent
/// experiment to store `JSString`s rather than Rust `String`s.
-#[derive(Clone, Debug, Eq, Hash, HeapSizeOf, Ord, PartialEq, PartialOrd)]
-pub struct DOMString(String);
-
-impl !Send for DOMString {}
+#[derive(Clone, Debug, Eq, Hash, MallocSizeOf, Ord, PartialEq, PartialOrd)]
+pub struct DOMString(String, PhantomData<*const ()>);
impl DOMString {
/// Creates a new `DOMString`.
pub fn new() -> DOMString {
- DOMString(String::new())
+ DOMString(String::new(), PhantomData)
}
/// Creates a new `DOMString` from a `String`.
pub fn from_string(s: String) -> DOMString {
- DOMString(s)
+ DOMString(s, PhantomData)
}
/// Appends a given string slice onto the end of this String.
@@ -238,9 +214,332 @@ impl DOMString {
self.0.truncate(new_len);
}
- /// An iterator over the bytes of this `DOMString`.
- pub fn bytes(&self) -> Bytes {
- self.0.bytes()
+ /// Removes newline characters according to <https://infra.spec.whatwg.org/#strip-newlines>.
+ pub fn strip_newlines(&mut self) {
+ self.0.retain(|c| c != '\r' && c != '\n');
+ }
+
+ /// Removes leading and trailing ASCII whitespaces according to
+ /// <https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace>.
+ pub fn strip_leading_and_trailing_ascii_whitespace(&mut self) {
+ if self.0.len() == 0 {
+ return;
+ }
+
+ let trailing_whitespace_len = self
+ .0
+ .trim_end_matches(|ref c| char::is_ascii_whitespace(c))
+ .len();
+ self.0.truncate(trailing_whitespace_len);
+ if self.0.is_empty() {
+ return;
+ }
+
+ let first_non_whitespace = self.0.find(|ref c| !char::is_ascii_whitespace(c)).unwrap();
+ let _ = self.0.replace_range(0..first_non_whitespace, "");
+ }
+
+ /// Validates this `DOMString` is a time string according to
+ /// <https://html.spec.whatwg.org/multipage/#valid-time-string>.
+ pub fn is_valid_time_string(&self) -> bool {
+ enum State {
+ HourHigh,
+ HourLow09,
+ HourLow03,
+ MinuteColon,
+ MinuteHigh,
+ MinuteLow,
+ SecondColon,
+ SecondHigh,
+ SecondLow,
+ MilliStop,
+ MilliHigh,
+ MilliMiddle,
+ MilliLow,
+ Done,
+ Error,
+ }
+ let next_state = |valid: bool, next: State| -> State {
+ if valid {
+ next
+ } else {
+ State::Error
+ }
+ };
+
+ let state = self.chars().fold(State::HourHigh, |state, c| {
+ match state {
+ // Step 1 "HH"
+ State::HourHigh => match c {
+ '0' | '1' => State::HourLow09,
+ '2' => State::HourLow03,
+ _ => State::Error,
+ },
+ State::HourLow09 => next_state(c.is_digit(10), State::MinuteColon),
+ State::HourLow03 => next_state(c.is_digit(4), State::MinuteColon),
+
+ // Step 2 ":"
+ State::MinuteColon => next_state(c == ':', State::MinuteHigh),
+
+ // Step 3 "mm"
+ State::MinuteHigh => next_state(c.is_digit(6), State::MinuteLow),
+ State::MinuteLow => next_state(c.is_digit(10), State::SecondColon),
+
+ // Step 4.1 ":"
+ State::SecondColon => next_state(c == ':', State::SecondHigh),
+ // Step 4.2 "ss"
+ State::SecondHigh => next_state(c.is_digit(6), State::SecondLow),
+ State::SecondLow => next_state(c.is_digit(10), State::MilliStop),
+
+ // Step 4.3.1 "."
+ State::MilliStop => next_state(c == '.', State::MilliHigh),
+ // Step 4.3.2 "SSS"
+ State::MilliHigh => next_state(c.is_digit(10), State::MilliMiddle),
+ State::MilliMiddle => next_state(c.is_digit(10), State::MilliLow),
+ State::MilliLow => next_state(c.is_digit(10), State::Done),
+
+ _ => State::Error,
+ }
+ });
+
+ match state {
+ State::Done |
+ // Step 4 (optional)
+ State::SecondColon |
+ // Step 4.3 (optional)
+ State::MilliStop |
+ // Step 4.3.2 (only 1 digit required)
+ State::MilliMiddle | State::MilliLow => true,
+ _ => false
+ }
+ }
+
+ /// A valid date string should be "YYYY-MM-DD"
+ /// YYYY must be four or more digits, MM and DD both must be two digits
+ /// https://html.spec.whatwg.org/multipage/#valid-date-string
+ pub fn is_valid_date_string(&self) -> bool {
+ self.parse_date_string().is_ok()
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#parse-a-date-string
+ pub fn parse_date_string(&self) -> Result<(i32, u32, u32), ()> {
+ let value = &self.0;
+ // Step 1, 2, 3
+ let (year_int, month_int, day_int) = parse_date_component(value)?;
+
+ // Step 4
+ if value.split('-').nth(3).is_some() {
+ return Err(());
+ }
+
+ // Step 5, 6
+ Ok((year_int, month_int, day_int))
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#parse-a-time-string
+ pub fn parse_time_string(&self) -> Result<(u32, u32, f64), ()> {
+ let value = &self.0;
+ // Step 1, 2, 3
+ let (hour_int, minute_int, second_float) = parse_time_component(value)?;
+
+ // Step 4
+ if value.split(':').nth(3).is_some() {
+ return Err(());
+ }
+
+ // Step 5, 6
+ Ok((hour_int, minute_int, second_float))
+ }
+
+ /// A valid month string should be "YYYY-MM"
+ /// YYYY must be four or more digits, MM both must be two digits
+ /// https://html.spec.whatwg.org/multipage/#valid-month-string
+ pub fn is_valid_month_string(&self) -> bool {
+ self.parse_month_string().is_ok()
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#parse-a-month-string
+ pub fn parse_month_string(&self) -> Result<(i32, u32), ()> {
+ let value = &self;
+ // Step 1, 2, 3
+ let (year_int, month_int) = parse_month_component(value)?;
+
+ // Step 4
+ if value.split("-").nth(2).is_some() {
+ return Err(());
+ }
+ // Step 5
+ Ok((year_int, month_int))
+ }
+
+ /// A valid week string should be like {YYYY}-W{WW}, such as "2017-W52"
+ /// YYYY must be four or more digits, WW both must be two digits
+ /// https://html.spec.whatwg.org/multipage/#valid-week-string
+ pub fn is_valid_week_string(&self) -> bool {
+ self.parse_week_string().is_ok()
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#parse-a-week-string
+ pub fn parse_week_string(&self) -> Result<(i32, u32), ()> {
+ let value = &self.0;
+ // Step 1, 2, 3
+ let mut iterator = value.split('-');
+ let year = iterator.next().ok_or(())?;
+
+ // Step 4
+ let year_int = year.parse::<i32>().map_err(|_| ())?;
+ if year.len() < 4 || year_int == 0 {
+ return Err(());
+ }
+
+ // Step 5, 6
+ let week = iterator.next().ok_or(())?;
+ let (week_first, week_last) = week.split_at(1);
+ if week_first != "W" {
+ return Err(());
+ }
+
+ // Step 7
+ let week_int = week_last.parse::<u32>().map_err(|_| ())?;
+ if week_last.len() != 2 {
+ return Err(());
+ }
+
+ // Step 8
+ let max_week = max_week_in_year(year_int);
+
+ // Step 9
+ if week_int < 1 || week_int > max_week {
+ return Err(());
+ }
+
+ // Step 10
+ if iterator.next().is_some() {
+ return Err(());
+ }
+
+ // Step 11
+ Ok((year_int, week_int))
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#valid-floating-point-number
+ pub fn is_valid_floating_point_number_string(&self) -> bool {
+ lazy_static! {
+ static ref RE: Regex =
+ Regex::new(r"^-?(?:\d+\.\d+|\d+|\.\d+)(?:(e|E)(\+|\-)?\d+)?$").unwrap();
+ }
+ RE.is_match(&self.0) && self.parse_floating_point_number().is_ok()
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#rules-for-parsing-floating-point-number-values
+ pub fn parse_floating_point_number(&self) -> Result<f64, ()> {
+ // Steps 15-16 are telling us things about IEEE rounding modes
+ // for floating-point significands; this code assumes the Rust
+ // compiler already matches them in any cases where
+ // that actually matters. They are not
+ // related to f64::round(), which is for rounding to integers.
+ let input = &self.0;
+ match input.trim().parse::<f64>() {
+ Ok(val)
+ if !(
+ // A valid number is the same as what rust considers to be valid,
+ // except for +1., NaN, and Infinity.
+ val.is_infinite() ||
+ val.is_nan() ||
+ input.ends_with(".") ||
+ input.starts_with("+")
+ ) =>
+ {
+ Ok(val)
+ },
+ _ => Err(()),
+ }
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#best-representation-of-the-number-as-a-floating-point-number
+ pub fn set_best_representation_of_the_floating_point_number(&mut self) {
+ if let Ok(val) = self.parse_floating_point_number() {
+ self.0 = val.to_string();
+ }
+ }
+
+ /// A valid normalized local date and time string should be "{date}T{time}"
+ /// where date and time are both valid, and the time string must be as short as possible
+ /// https://html.spec.whatwg.org/multipage/#valid-normalised-local-date-and-time-string
+ pub fn convert_valid_normalized_local_date_and_time_string(&mut self) -> Result<(), ()> {
+ let ((year, month, day), (hour, minute, second)) =
+ self.parse_local_date_and_time_string()?;
+ if second == 0.0 {
+ self.0 = format!(
+ "{:04}-{:02}-{:02}T{:02}:{:02}",
+ year, month, day, hour, minute
+ );
+ } else if second < 10.0 {
+ // we need exactly one leading zero on the seconds,
+ // whatever their total string length might be
+ self.0 = format!(
+ "{:04}-{:02}-{:02}T{:02}:{:02}:0{}",
+ year, month, day, hour, minute, second
+ );
+ } else {
+ // we need no leading zeroes on the seconds
+ self.0 = format!(
+ "{:04}-{:02}-{:02}T{:02}:{:02}:{}",
+ year, month, day, hour, minute, second
+ );
+ }
+ Ok(())
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#parse-a-local-date-and-time-string
+ pub fn parse_local_date_and_time_string(
+ &self,
+ ) -> Result<((i32, u32, u32), (u32, u32, f64)), ()> {
+ let value = &self;
+ // Step 1, 2, 4
+ let mut iterator = if value.contains('T') {
+ value.split('T')
+ } else {
+ value.split(' ')
+ };
+
+ // Step 3
+ let date = iterator.next().ok_or(())?;
+ let date_tuple = parse_date_component(date)?;
+
+ // Step 5
+ let time = iterator.next().ok_or(())?;
+ let time_tuple = parse_time_component(time)?;
+
+ // Step 6
+ if iterator.next().is_some() {
+ return Err(());
+ }
+
+ // Step 7, 8, 9
+ Ok((date_tuple, time_tuple))
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#valid-e-mail-address
+ pub fn is_valid_email_address_string(&self) -> bool {
+ lazy_static! {
+ static ref RE: Regex = Regex::new(concat!(
+ r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?",
+ r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
+ ))
+ .unwrap();
+ }
+ RE.is_match(&self.0)
+ }
+
+ /// https://html.spec.whatwg.org/multipage/#valid-simple-colour
+ pub fn is_valid_simple_color_string(&self) -> bool {
+ let mut chars = self.0.chars();
+ if self.0.len() == 7 && chars.next() == Some('#') {
+ chars.all(|c| c.is_digit(16))
+ } else {
+ false
+ }
}
}
@@ -253,7 +552,7 @@ impl Borrow<str> for DOMString {
impl Default for DOMString {
fn default() -> Self {
- DOMString(String::new())
+ DOMString(String::new(), PhantomData)
}
}
@@ -300,7 +599,7 @@ impl<'a> PartialEq<&'a str> for DOMString {
impl From<String> for DOMString {
fn from(contents: String) -> DOMString {
- DOMString(contents)
+ DOMString(contents, PhantomData)
}
}
@@ -355,8 +654,143 @@ impl<'a> Into<Cow<'a, str>> for DOMString {
}
}
+impl<'a> Into<CowRcStr<'a>> for DOMString {
+ fn into(self) -> CowRcStr<'a> {
+ self.0.into()
+ }
+}
+
impl Extend<char> for DOMString {
- fn extend<I>(&mut self, iterable: I) where I: IntoIterator<Item=char> {
+ fn extend<I>(&mut self, iterable: I)
+ where
+ I: IntoIterator<Item = char>,
+ {
self.0.extend(iterable)
}
}
+
+/// https://html.spec.whatwg.org/multipage/#parse-a-month-component
+fn parse_month_component(value: &str) -> Result<(i32, u32), ()> {
+ // Step 3
+ let mut iterator = value.split('-');
+ let year = iterator.next().ok_or(())?;
+ let month = iterator.next().ok_or(())?;
+
+ // Step 1, 2
+ let year_int = year.parse::<i32>().map_err(|_| ())?;
+ if year.len() < 4 || year_int == 0 {
+ return Err(());
+ }
+
+ // Step 4, 5
+ let month_int = month.parse::<u32>().map_err(|_| ())?;
+ if month.len() != 2 || month_int > 12 || month_int < 1 {
+ return Err(());
+ }
+
+ // Step 6
+ Ok((year_int, month_int))
+}
+
+/// https://html.spec.whatwg.org/multipage/#parse-a-date-component
+fn parse_date_component(value: &str) -> Result<(i32, u32, u32), ()> {
+ // Step 1
+ let (year_int, month_int) = parse_month_component(value)?;
+
+ // Step 3, 4
+ let day = value.split('-').nth(2).ok_or(())?;
+ let day_int = day.parse::<u32>().map_err(|_| ())?;
+ if day.len() != 2 {
+ return Err(());
+ }
+
+ // Step 2, 5
+ let max_day = max_day_in_month(year_int, month_int)?;
+ if day_int == 0 || day_int > max_day {
+ return Err(());
+ }
+
+ // Step 6
+ Ok((year_int, month_int, day_int))
+}
+
+/// https://html.spec.whatwg.org/multipage/#parse-a-time-component
+fn parse_time_component(value: &str) -> Result<(u32, u32, f64), ()> {
+ // Step 1
+ let mut iterator = value.split(':');
+ let hour = iterator.next().ok_or(())?;
+ if hour.len() != 2 {
+ return Err(());
+ }
+ let hour_int = hour.parse::<u32>().map_err(|_| ())?;
+
+ // Step 2
+ if hour_int > 23 {
+ return Err(());
+ }
+
+ // Step 3, 4
+ let minute = iterator.next().ok_or(())?;
+ if minute.len() != 2 {
+ return Err(());
+ }
+ let minute_int = minute.parse::<u32>().map_err(|_| ())?;
+
+ // Step 5
+ if minute_int > 59 {
+ return Err(());
+ }
+
+ // Step 6, 7
+ let second_float = match iterator.next() {
+ Some(second) => {
+ let mut second_iterator = second.split('.');
+ if second_iterator.next().ok_or(())?.len() != 2 {
+ return Err(());
+ }
+ match second_iterator.next() {
+ Some(second_last) => {
+ if second_last.len() > 3 {
+ return Err(());
+ }
+ },
+ None => {},
+ }
+
+ second.parse::<f64>().map_err(|_| ())?
+ },
+ None => 0.0,
+ };
+
+ // Step 8
+ Ok((hour_int, minute_int, second_float))
+}
+
+fn max_day_in_month(year_num: i32, month_num: u32) -> Result<u32, ()> {
+ match month_num {
+ 1 | 3 | 5 | 7 | 8 | 10 | 12 => Ok(31),
+ 4 | 6 | 9 | 11 => Ok(30),
+ 2 => {
+ if is_leap_year(year_num) {
+ Ok(29)
+ } else {
+ Ok(28)
+ }
+ },
+ _ => Err(()),
+ }
+}
+
+/// https://html.spec.whatwg.org/multipage/#week-number-of-the-last-day
+fn max_week_in_year(year: i32) -> u32 {
+ match Utc.ymd(year as i32, 1, 1).weekday() {
+ Weekday::Thu => 53,
+ Weekday::Wed if is_leap_year(year) => 53,
+ _ => 52,
+ }
+}
+
+#[inline]
+fn is_leap_year(year: i32) -> bool {
+ year % 400 == 0 || (year % 4 == 0 && year % 100 != 0)
+}