diff options
Diffstat (limited to 'components/script/dom')
-rw-r--r-- | components/script/dom/blob.rs | 29 | ||||
-rw-r--r-- | components/script/dom/document.rs | 10 | ||||
-rw-r--r-- | components/script/dom/globalscope.rs | 96 | ||||
-rw-r--r-- | components/script/dom/htmlstyleelement.rs | 16 | ||||
-rw-r--r-- | components/script/dom/servoparser/html.rs | 3 | ||||
-rw-r--r-- | components/script/dom/urlpattern.rs | 204 | ||||
-rw-r--r-- | components/script/dom/urlpattern/mod.rs | 810 | ||||
-rw-r--r-- | components/script/dom/urlpattern/pattern_parser.rs | 473 | ||||
-rw-r--r-- | components/script/dom/urlpattern/preprocessing.rs | 659 | ||||
-rw-r--r-- | components/script/dom/urlpattern/tokenizer.rs | 524 |
10 files changed, 285 insertions, 2539 deletions
diff --git a/components/script/dom/blob.rs b/components/script/dom/blob.rs index 27aa382c3fc..c5c5c480707 100644 --- a/components/script/dom/blob.rs +++ b/components/script/dom/blob.rs @@ -7,7 +7,7 @@ use std::ptr; use std::rc::Rc; use base::id::{BlobId, BlobIndex}; -use constellation_traits::BlobImpl; +use constellation_traits::{BlobData, BlobImpl}; use dom_struct::dom_struct; use encoding_rs::UTF_8; use js::jsapi::JSObject; @@ -33,7 +33,7 @@ use crate::dom::readablestream::ReadableStream; use crate::realms::{AlreadyInRealm, InRealm}; use crate::script_runtime::CanGc; -// https://w3c.github.io/FileAPI/#blob +/// <https://w3c.github.io/FileAPI/#dfn-Blob> #[dom_struct] pub(crate) struct Blob { reflector_: Reflector, @@ -198,7 +198,7 @@ impl BlobMethods<crate::DomTypeHolder> for Blob { self.get_stream(can_gc) } - // https://w3c.github.io/FileAPI/#slice-method-algo + /// <https://w3c.github.io/FileAPI/#slice-method-algo> fn Slice( &self, start: Option<i64>, @@ -206,11 +206,24 @@ impl BlobMethods<crate::DomTypeHolder> for Blob { content_type: Option<DOMString>, can_gc: CanGc, ) -> DomRoot<Blob> { - let type_string = - normalize_type_string(content_type.unwrap_or(DOMString::from("")).as_ref()); - let rel_pos = RelativePos::from_opts(start, end); - let blob_impl = BlobImpl::new_sliced(rel_pos, self.blob_id, type_string); - Blob::new(&self.global(), blob_impl, can_gc) + let global = self.global(); + let type_string = normalize_type_string(&content_type.unwrap_or_default()); + + // If our parent is already a sliced blob then we reference the data from the grandparent instead, + // to keep the blob ancestry chain short. + let (parent, range) = match *global.get_blob_data(&self.blob_id) { + BlobData::Sliced(grandparent, parent_range) => { + let range = RelativePos { + start: parent_range.start + start.unwrap_or_default(), + end: end.map(|end| end + parent_range.start).or(parent_range.end), + }; + (grandparent, range) + }, + _ => (self.blob_id, RelativePos::from_opts(start, end)), + }; + + let blob_impl = BlobImpl::new_sliced(range, parent, type_string); + Blob::new(&global, blob_impl, can_gc) } // https://w3c.github.io/FileAPI/#text-method-algo diff --git a/components/script/dom/document.rs b/components/script/dom/document.rs index ae48fa1fb2f..e3590461604 100644 --- a/components/script/dom/document.rs +++ b/components/script/dom/document.rs @@ -4307,16 +4307,16 @@ impl Document { type_: csp::InlineCheckType, source: &str, ) -> csp::CheckResult { - let element = csp::Element { - nonce: el - .get_attribute(&ns!(), &local_name!("nonce")) - .map(|attr| Cow::Owned(attr.value().to_string())), - }; let (result, violations) = match self.get_csp_list() { None => { return csp::CheckResult::Allowed; }, Some(csp_list) => { + let element = csp::Element { + nonce: el + .get_attribute(&ns!(), &local_name!("nonce")) + .map(|attr| Cow::Owned(attr.value().to_string())), + }; csp_list.should_elements_inline_type_behavior_be_blocked(&element, type_, source) }, }; diff --git a/components/script/dom/globalscope.rs b/components/script/dom/globalscope.rs index 527d03eed4e..98c4c3ed53d 100644 --- a/components/script/dom/globalscope.rs +++ b/components/script/dom/globalscope.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -use std::cell::{Cell, OnceCell}; +use std::cell::{Cell, OnceCell, Ref}; use std::collections::hash_map::Entry; use std::collections::{HashMap, VecDeque}; use std::ops::Index; @@ -1821,12 +1821,8 @@ impl GlobalScope { /// In the case of a File-backed blob, this might incur synchronous read and caching. pub(crate) fn get_blob_bytes(&self, blob_id: &BlobId) -> Result<Vec<u8>, ()> { let parent = { - let blob_state = self.blob_state.borrow(); - let blob_info = blob_state - .get(blob_id) - .expect("get_blob_bytes for an unknown blob."); - match blob_info.blob_impl.blob_data() { - BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())), + match *self.get_blob_data(blob_id) { + BlobData::Sliced(parent, rel_pos) => Some((parent, rel_pos)), _ => None, } }; @@ -1840,14 +1836,24 @@ impl GlobalScope { } } + /// Retrieve information about a specific blob from the blob store + /// + /// # Panics + /// This function panics if there is no blob with the given ID. + pub(crate) fn get_blob_data<'a>(&'a self, blob_id: &BlobId) -> Ref<'a, BlobData> { + Ref::map(self.blob_state.borrow(), |blob_state| { + blob_state + .get(blob_id) + .expect("get_blob_impl called for a unknown blob") + .blob_impl + .blob_data() + }) + } + /// Get bytes from a non-sliced blob fn get_blob_bytes_non_sliced(&self, blob_id: &BlobId) -> Result<Vec<u8>, ()> { - let blob_state = self.blob_state.borrow(); - let blob_info = blob_state - .get(blob_id) - .expect("get_blob_bytes_non_sliced called for a unknown blob."); - match blob_info.blob_impl.blob_data() { - BlobData::File(f) => { + match *self.get_blob_data(blob_id) { + BlobData::File(ref f) => { let (buffer, is_new_buffer) = match f.get_cache() { Some(bytes) => (bytes, false), None => { @@ -1863,7 +1869,7 @@ impl GlobalScope { Ok(buffer) }, - BlobData::Memory(s) => Ok(s.clone()), + BlobData::Memory(ref s) => Ok(s.clone()), BlobData::Sliced(_, _) => panic!("This blob doesn't have a parent."), } } @@ -1876,12 +1882,8 @@ impl GlobalScope { /// TODO: merge with `get_blob_bytes` by way of broader integration with blob streams. fn get_blob_bytes_or_file_id(&self, blob_id: &BlobId) -> BlobResult { let parent = { - let blob_state = self.blob_state.borrow(); - let blob_info = blob_state - .get(blob_id) - .expect("get_blob_bytes_or_file_id for an unknown blob."); - match blob_info.blob_impl.blob_data() { - BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())), + match *self.get_blob_data(blob_id) { + BlobData::Sliced(parent, rel_pos) => Some((parent, rel_pos)), _ => None, } }; @@ -1906,16 +1908,12 @@ impl GlobalScope { /// tweaked for integration with streams. /// TODO: merge with `get_blob_bytes` by way of broader integration with blob streams. fn get_blob_bytes_non_sliced_or_file_id(&self, blob_id: &BlobId) -> BlobResult { - let blob_state = self.blob_state.borrow(); - let blob_info = blob_state - .get(blob_id) - .expect("get_blob_bytes_non_sliced_or_file_id called for a unknown blob."); - match blob_info.blob_impl.blob_data() { - BlobData::File(f) => match f.get_cache() { + match *self.get_blob_data(blob_id) { + BlobData::File(ref f) => match f.get_cache() { Some(bytes) => BlobResult::Bytes(bytes.clone()), None => BlobResult::File(f.get_id(), f.get_size() as usize), }, - BlobData::Memory(s) => BlobResult::Bytes(s.clone()), + BlobData::Memory(ref s) => BlobResult::Bytes(s.clone()), BlobData::Sliced(_, _) => panic!("This blob doesn't have a parent."), } } @@ -1931,39 +1929,27 @@ impl GlobalScope { /// <https://w3c.github.io/FileAPI/#dfn-size> pub(crate) fn get_blob_size(&self, blob_id: &BlobId) -> u64 { - let blob_state = self.blob_state.borrow(); let parent = { - let blob_info = blob_state - .get(blob_id) - .expect("get_blob_size called for a unknown blob."); - match blob_info.blob_impl.blob_data() { - BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())), + match *self.get_blob_data(blob_id) { + BlobData::Sliced(parent, rel_pos) => Some((parent, rel_pos)), _ => None, } }; match parent { Some((parent_id, rel_pos)) => { - let parent_info = blob_state - .get(&parent_id) - .expect("Parent of blob whose size is unknown."); - let parent_size = match parent_info.blob_impl.blob_data() { - BlobData::File(f) => f.get_size(), - BlobData::Memory(v) => v.len() as u64, + let parent_size = match *self.get_blob_data(&parent_id) { + BlobData::File(ref f) => f.get_size(), + BlobData::Memory(ref v) => v.len() as u64, BlobData::Sliced(_, _) => panic!("Blob ancestry should be only one level."), }; rel_pos.to_abs_range(parent_size as usize).len() as u64 }, - None => { - let blob_info = blob_state - .get(blob_id) - .expect("Blob whose size is unknown."); - match blob_info.blob_impl.blob_data() { - BlobData::File(f) => f.get_size(), - BlobData::Memory(v) => v.len() as u64, - BlobData::Sliced(_, _) => { - panic!("It was previously checked that this blob does not have a parent.") - }, - } + None => match *self.get_blob_data(blob_id) { + BlobData::File(ref f) => f.get_size(), + BlobData::Memory(ref v) => v.len() as u64, + BlobData::Sliced(_, _) => { + panic!("It was previously checked that this blob does not have a parent.") + }, }, } } @@ -1979,7 +1965,7 @@ impl GlobalScope { blob_info.has_url = true; match blob_info.blob_impl.blob_data() { - BlobData::Sliced(parent, rel_pos) => Some((*parent, rel_pos.clone())), + BlobData::Sliced(parent, rel_pos) => Some((*parent, *rel_pos)), _ => None, } }; @@ -2020,12 +2006,8 @@ impl GlobalScope { let origin = get_blob_origin(&self.get_url()); let (tx, rx) = profile_ipc::channel(self.time_profiler_chan().clone()).unwrap(); - let msg = FileManagerThreadMsg::AddSlicedURLEntry( - *parent_file_id, - rel_pos.clone(), - tx, - origin.clone(), - ); + let msg = + FileManagerThreadMsg::AddSlicedURLEntry(*parent_file_id, *rel_pos, tx, origin.clone()); self.send_to_file_manager(msg); match rx.recv().expect("File manager thread is down.") { Ok(new_id) => { diff --git a/components/script/dom/htmlstyleelement.rs b/components/script/dom/htmlstyleelement.rs index 0deb507f283..194b81729fb 100644 --- a/components/script/dom/htmlstyleelement.rs +++ b/components/script/dom/htmlstyleelement.rs @@ -4,6 +4,7 @@ use std::cell::Cell; +use content_security_policy as csp; use dom_struct::dom_struct; use html5ever::{LocalName, Prefix}; use js::rust::HandleObject; @@ -97,8 +98,21 @@ impl HTMLStyleElement { return; } - let window = node.owner_window(); let doc = self.owner_document(); + + // Step 5: If the Should element's inline behavior be blocked by Content Security Policy? algorithm + // returns "Blocked" when executed upon the style element, "style", + // and the style element's child text content, then return. [CSP] + if doc.should_elements_inline_type_behavior_be_blocked( + self.upcast(), + csp::InlineCheckType::Style, + &node.child_text_content(), + ) == csp::CheckResult::Blocked + { + return; + } + + let window = node.owner_window(); let data = node .GetTextContent() .expect("Element.textContent must be a string"); diff --git a/components/script/dom/servoparser/html.rs b/components/script/dom/servoparser/html.rs index 07848c87678..7fd0429612a 100644 --- a/components/script/dom/servoparser/html.rs +++ b/components/script/dom/servoparser/html.rs @@ -302,11 +302,10 @@ pub(crate) fn serialize_html_fragment<S: Serializer>( serializer.write_processing_instruction(pi.target(), &data)?; }, - NodeTypeId::DocumentFragment(_) => {}, + NodeTypeId::DocumentFragment(_) | NodeTypeId::Attr => {}, NodeTypeId::Document(_) => panic!("Can't serialize Document node itself"), NodeTypeId::Element(_) => panic!("Element shouldn't appear here"), - NodeTypeId::Attr => panic!("Attr shouldn't appear here"), }, SerializationCommand::SerializeShadowRoot(shadow_root) => { // Shadow roots are serialized as template elements with a fixed set of diff --git a/components/script/dom/urlpattern.rs b/components/script/dom/urlpattern.rs new file mode 100644 index 00000000000..c811d3a9a70 --- /dev/null +++ b/components/script/dom/urlpattern.rs @@ -0,0 +1,204 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use dom_struct::dom_struct; +use js::rust::HandleObject; +use script_bindings::codegen::GenericUnionTypes::USVStringOrURLPatternInit; +use script_bindings::error::{Error, Fallible}; +use script_bindings::reflector::Reflector; +use script_bindings::root::DomRoot; +use script_bindings::script_runtime::CanGc; +use script_bindings::str::USVString; + +use crate::dom::bindings::codegen::Bindings::URLPatternBinding; +use crate::dom::bindings::codegen::Bindings::URLPatternBinding::URLPatternMethods; +use crate::dom::bindings::reflector::reflect_dom_object_with_proto; +use crate::dom::globalscope::GlobalScope; + +/// <https://urlpattern.spec.whatwg.org/#urlpattern> +#[dom_struct] +pub(crate) struct URLPattern { + reflector: Reflector, + + /// <https://urlpattern.spec.whatwg.org/#urlpattern-associated-url-pattern> + #[no_trace] + associated_url_pattern: urlpattern::UrlPattern, +} + +impl URLPattern { + #[cfg_attr(crown, allow(crown::unrooted_must_root))] + fn new_inherited(associated_url_pattern: urlpattern::UrlPattern) -> URLPattern { + URLPattern { + reflector: Reflector::new(), + associated_url_pattern, + } + } + + /// <https://urlpattern.spec.whatwg.org/#urlpattern-initialize> + pub(crate) fn initialize( + global: &GlobalScope, + proto: Option<HandleObject>, + input: USVStringOrURLPatternInit, + base_url: Option<USVString>, + options: &URLPatternBinding::URLPatternOptions, + can_gc: CanGc, + ) -> Fallible<DomRoot<URLPattern>> { + // The section below converts from servos types to the types used in the urlpattern crate + let base_url = base_url.map(|usv_string| usv_string.0); + let input = bindings_to_third_party::map_urlpattern_input(input, base_url.clone()); + let options = urlpattern::UrlPatternOptions { + ignore_case: options.ignoreCase, + }; + + // Parse and initialize the URL pattern. + let pattern_init = + urlpattern::quirks::process_construct_pattern_input(input, base_url.as_deref()) + .map_err(|error| Error::Type(format!("{error}")))?; + + let pattern = urlpattern::UrlPattern::parse(pattern_init, options) + .map_err(|error| Error::Type(format!("{error}")))?; + + let url_pattern = reflect_dom_object_with_proto( + Box::new(URLPattern::new_inherited(pattern)), + global, + proto, + can_gc, + ); + Ok(url_pattern) + } +} + +impl URLPatternMethods<crate::DomTypeHolder> for URLPattern { + // <https://urlpattern.spec.whatwg.org/#dom-urlpattern-urlpattern> + fn Constructor( + global: &GlobalScope, + proto: Option<HandleObject>, + can_gc: CanGc, + input: USVStringOrURLPatternInit, + base_url: USVString, + options: &URLPatternBinding::URLPatternOptions, + ) -> Fallible<DomRoot<URLPattern>> { + URLPattern::initialize(global, proto, input, Some(base_url), options, can_gc) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-urlpattern-input-options> + fn Constructor_( + global: &GlobalScope, + proto: Option<HandleObject>, + can_gc: CanGc, + input: USVStringOrURLPatternInit, + options: &URLPatternBinding::URLPatternOptions, + ) -> Fallible<DomRoot<URLPattern>> { + // Step 1. Run initialize given this, input, null, and options. + URLPattern::initialize(global, proto, input, None, options, can_gc) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-protocol> + fn Protocol(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s protocol component’s pattern string. + USVString(self.associated_url_pattern.protocol().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-username> + fn Username(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s username component’s pattern string. + USVString(self.associated_url_pattern.username().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-password> + fn Password(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s password component’s pattern string. + USVString(self.associated_url_pattern.password().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hostname> + fn Hostname(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s hostname component’s pattern string. + USVString(self.associated_url_pattern.hostname().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-port> + fn Port(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s port component’s pattern string. + USVString(self.associated_url_pattern.port().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-pathname> + fn Pathname(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s pathname component’s pattern string. + USVString(self.associated_url_pattern.pathname().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-search> + fn Search(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s search component’s pattern string. + USVString(self.associated_url_pattern.search().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hash> + fn Hash(&self) -> USVString { + // Step 1. Return this’s associated URL pattern’s hash component’s pattern string. + USVString(self.associated_url_pattern.hash().to_owned()) + } + + /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hasregexpgroups> + fn HasRegExpGroups(&self) -> bool { + // Step 1. If this’s associated URL pattern’s has regexp groups, then return true. + // Step 2. Return false. + self.associated_url_pattern.has_regexp_groups() + } +} + +mod bindings_to_third_party { + use crate::dom::urlpattern::USVStringOrURLPatternInit; + + pub(super) fn map_urlpattern_input( + input: USVStringOrURLPatternInit, + base_url: Option<String>, + ) -> urlpattern::quirks::StringOrInit { + match input { + USVStringOrURLPatternInit::USVString(usv_string) => { + urlpattern::quirks::StringOrInit::String(usv_string.0) + }, + USVStringOrURLPatternInit::URLPatternInit(pattern_init) => { + let pattern_init = urlpattern::quirks::UrlPatternInit { + protocol: pattern_init + .protocol + .as_ref() + .map(|usv_string| usv_string.to_string()), + username: pattern_init + .username + .as_ref() + .map(|usv_string| usv_string.to_string()), + password: pattern_init + .password + .as_ref() + .map(|usv_string| usv_string.to_string()), + hostname: pattern_init + .hostname + .as_ref() + .map(|usv_string| usv_string.to_string()), + port: pattern_init + .port + .as_ref() + .map(|usv_string| usv_string.to_string()), + pathname: pattern_init + .pathname + .as_ref() + .map(|usv_string| usv_string.to_string()), + search: pattern_init + .search + .as_ref() + .map(|usv_string| usv_string.to_string()), + hash: pattern_init + .hash + .as_ref() + .map(|usv_string| usv_string.to_string()), + base_url, + }; + urlpattern::quirks::StringOrInit::Init(pattern_init) + }, + } + } +} diff --git a/components/script/dom/urlpattern/mod.rs b/components/script/dom/urlpattern/mod.rs deleted file mode 100644 index e92963c672b..00000000000 --- a/components/script/dom/urlpattern/mod.rs +++ /dev/null @@ -1,810 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -mod pattern_parser; -mod preprocessing; -mod tokenizer; - -use std::ptr; - -use dom_struct::dom_struct; -use js::jsapi::{Heap, JSObject, RegExpFlag_IgnoreCase, RegExpFlag_UnicodeSets, RegExpFlags}; -use js::rust::HandleObject; -use pattern_parser::parse_a_pattern_string; -use preprocessing::{ - canonicalize_a_hash, canonicalize_a_hostname, canonicalize_a_password, canonicalize_a_pathname, - canonicalize_a_port, canonicalize_a_protocol, canonicalize_a_search, canonicalize_a_username, - escape_a_regexp_string, process_a_url_pattern_init, -}; -use script_bindings::error::{Error, Fallible}; -use script_bindings::reflector::Reflector; -use script_bindings::root::DomRoot; -use script_bindings::script_runtime::CanGc; -use script_bindings::str::USVString; - -use crate::dom::bindings::cell::RefCell; -use crate::dom::bindings::codegen::Bindings::URLPatternBinding::{ - URLPatternInit, URLPatternMethods, URLPatternOptions, -}; -use crate::dom::bindings::reflector::reflect_dom_object_with_proto; -use crate::dom::globalscope::GlobalScope; -use crate::dom::htmlinputelement::new_js_regex; - -/// <https://urlpattern.spec.whatwg.org/#full-wildcard-regexp-value> -const FULL_WILDCARD_REGEXP_VALUE: &str = ".*"; - -/// <https://urlpattern.spec.whatwg.org/#urlpattern> -#[dom_struct] -pub(crate) struct URLPattern { - reflector: Reflector, - - /// <https://urlpattern.spec.whatwg.org/#urlpattern-associated-url-pattern> - associated_url_pattern: RefCell<URLPatternInternal>, -} - -#[derive(JSTraceable, MallocSizeOf)] -#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] -struct URLPatternInternal { - /// <https://urlpattern.spec.whatwg.org/#url-pattern-protocol-component> - protocol: Component, - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-username-component> - username: Component, - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-password-component> - password: Component, - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-hostname-component> - hostname: Component, - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-port-component> - port: Component, - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-pathname-component> - pathname: Component, - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-search-component> - search: Component, - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-hash-component> - hash: Component, -} - -/// <https://urlpattern.spec.whatwg.org/#component> -#[derive(JSTraceable, MallocSizeOf)] -#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] -struct Component { - /// <https://urlpattern.spec.whatwg.org/#component-pattern-string> - pattern_string: USVString, - - /// <https://urlpattern.spec.whatwg.org/#component-regular-expression> - #[ignore_malloc_size_of = "mozjs"] - regular_expression: Box<Heap<*mut JSObject>>, - - /// <https://urlpattern.spec.whatwg.org/#component-group-name-list> - group_name_list: Vec<USVString>, - - /// <https://urlpattern.spec.whatwg.org/#component-has-regexp-groups> - has_regexp_groups: bool, -} - -/// <https://urlpattern.spec.whatwg.org/#part> -#[derive(Debug)] -struct Part { - /// <https://urlpattern.spec.whatwg.org/#part-type> - part_type: PartType, - - /// <https://urlpattern.spec.whatwg.org/#part-value> - value: String, - - /// <https://urlpattern.spec.whatwg.org/#part-modifier> - modifier: PartModifier, - - /// <https://urlpattern.spec.whatwg.org/#part-name> - name: String, - - /// <https://urlpattern.spec.whatwg.org/#part-prefix> - prefix: String, - - /// <https://urlpattern.spec.whatwg.org/#part-suffix> - suffix: String, -} - -/// <https://urlpattern.spec.whatwg.org/#part-type> -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum PartType { - /// <https://urlpattern.spec.whatwg.org/#part-type-fixed-text> - FixedText, - - /// <https://urlpattern.spec.whatwg.org/#part-type-regexp> - Regexp, - - /// <https://urlpattern.spec.whatwg.org/#part-type-segment-wildcard> - SegmentWildcard, - - /// <https://urlpattern.spec.whatwg.org/#part-type-full-wildcard> - FullWildcard, -} - -/// <https://urlpattern.spec.whatwg.org/#part-modifier> -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -#[allow(dead_code)] // Parser is not implemented yet -enum PartModifier { - /// <https://urlpattern.spec.whatwg.org/#part-modifier-none> - None, - - /// <https://urlpattern.spec.whatwg.org/#part-modifier-optional> - Optional, - - /// <https://urlpattern.spec.whatwg.org/#part-modifier-zero-or-more> - ZeroOrMore, - - /// <https://urlpattern.spec.whatwg.org/#part-modifier-one-or-more> - OneOrMore, -} - -/// <https://urlpattern.spec.whatwg.org/#options> -#[derive(Clone, Copy, Default)] -#[allow(dead_code)] // Parser is not fully implemented yet -struct Options { - /// <https://urlpattern.spec.whatwg.org/#options-delimiter-code-point> - delimiter_code_point: Option<char>, - - /// <https://urlpattern.spec.whatwg.org/#options-prefix-code-point> - prefix_code_point: Option<char>, - - /// <https://urlpattern.spec.whatwg.org/#options-ignore-case> - ignore_case: bool, -} - -impl Component { - fn new_unrooted() -> Self { - Self { - pattern_string: Default::default(), - regular_expression: Heap::boxed(ptr::null_mut()), - group_name_list: Default::default(), - has_regexp_groups: false, - } - } -} - -impl URLPattern { - #[cfg_attr(crown, allow(crown::unrooted_must_root))] - fn new_inherited() -> URLPattern { - let associated_url_pattern = URLPatternInternal { - protocol: Component::new_unrooted(), - username: Component::new_unrooted(), - password: Component::new_unrooted(), - hostname: Component::new_unrooted(), - port: Component::new_unrooted(), - pathname: Component::new_unrooted(), - search: Component::new_unrooted(), - hash: Component::new_unrooted(), - }; - - URLPattern { - reflector: Reflector::new(), - associated_url_pattern: RefCell::new(associated_url_pattern), - } - } - - #[cfg_attr(crown, allow(crown::unrooted_must_root))] - pub(crate) fn new_with_proto( - global: &GlobalScope, - proto: Option<HandleObject>, - can_gc: CanGc, - ) -> DomRoot<URLPattern> { - reflect_dom_object_with_proto(Box::new(URLPattern::new_inherited()), global, proto, can_gc) - } - - /// <https://urlpattern.spec.whatwg.org/#urlpattern-initialize> - fn initialize( - global: &GlobalScope, - proto: Option<HandleObject>, - input: &URLPatternInit, - options: &URLPatternOptions, - can_gc: CanGc, - ) -> Fallible<DomRoot<URLPattern>> { - // Step 1. Set this’s associated URL pattern to the result of create given input, baseURL, and options. - let pattern = URLPattern::new_with_proto(global, proto, can_gc); - URLPatternInternal::create( - input, - options, - &mut pattern.associated_url_pattern.borrow_mut(), - )?; - - Ok(pattern) - } -} - -impl URLPatternMethods<crate::DomTypeHolder> for URLPattern { - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-urlpattern-input-options> - fn Constructor( - global: &GlobalScope, - proto: Option<HandleObject>, - can_gc: CanGc, - input: &URLPatternInit, - options: &URLPatternOptions, - ) -> Fallible<DomRoot<URLPattern>> { - // Step 1. Run initialize given this, input, null, and options. - URLPattern::initialize(global, proto, input, options, can_gc) - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-protocol> - fn Protocol(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s protocol component’s pattern string. - self.associated_url_pattern - .borrow() - .protocol - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-username> - fn Username(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s username component’s pattern string. - self.associated_url_pattern - .borrow() - .username - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-password> - fn Password(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s password component’s pattern string. - self.associated_url_pattern - .borrow() - .password - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hostname> - fn Hostname(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s hostname component’s pattern string. - self.associated_url_pattern - .borrow() - .hostname - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-port> - fn Port(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s port component’s pattern string. - self.associated_url_pattern - .borrow() - .port - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-pathname> - fn Pathname(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s pathname component’s pattern string. - self.associated_url_pattern - .borrow() - .pathname - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-search> - fn Search(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s search component’s pattern string. - self.associated_url_pattern - .borrow() - .search - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hash> - fn Hash(&self) -> USVString { - // Step 1. Return this’s associated URL pattern’s hash component’s pattern string. - self.associated_url_pattern - .borrow() - .hash - .pattern_string - .clone() - } - - /// <https://urlpattern.spec.whatwg.org/#dom-urlpattern-hasregexpgroups> - fn HasRegExpGroups(&self) -> bool { - // Step 1. If this’s associated URL pattern’s has regexp groups, then return true. - // Step 2. Return false. - self.associated_url_pattern.borrow().has_regexp_groups() - } -} - -impl URLPatternInternal { - /// <https://urlpattern.spec.whatwg.org/#url-pattern-create> - fn create(input: &URLPatternInit, options: &URLPatternOptions, out: &mut Self) -> Fallible<()> { - // Step 1. Let init be null. - // Step 2. If input is a scalar value string then: - // NOTE: We don't support strings as input yet - // Step 3. Otherwise: - // Step 3.1 Assert: input is a URLPatternInit. - // Step 3.2 If baseURL is not null, then throw a TypeError. - if input.baseURL.is_some() { - return Err(Error::Type("baseURL must be none".into())); - } - - // Step 3.3 Set init to input. - let init = input; - - // Step 4. Let processedInit be the result of process a URLPatternInit given init, "pattern", null, null, - // null, null, null, null, null, and null. - let mut processed_init = process_a_url_pattern_init(init, PatternInitType::Pattern)?; - - // Step 5. For each componentName of « "protocol", "username", "password", "hostname", "port", - // "pathname", "search", "hash" »: - // Step 5.1 If processedInit[componentName] does not exist, then set processedInit[componentName] to "*". - // NOTE: We do this later on - - // Step 6. If processedInit["protocol"] is a special scheme and processedInit["port"] is a string - // which represents its corresponding default port in radix-10 using ASCII digits then set - // processedInit["port"] to the empty string. - let default_port = processed_init - .protocol - .as_deref() - .and_then(default_port_for_special_scheme); - let given_port = processed_init - .port - .as_deref() - .map(str::parse) - .transpose() - .ok() - .flatten(); - if default_port.is_some() && default_port == given_port { - processed_init.port = Some(Default::default()); - } - - // Step 7. Let urlPattern be a new URL pattern. - // NOTE: We construct the pattern provided as the out parameter. - - // Step 8. Set urlPattern’s protocol component to the result of compiling a component given - // processedInit["protocol"], canonicalize a protocol, and default options. - Component::compile( - processed_init.protocol.as_deref().unwrap_or("*"), - Box::new(canonicalize_a_protocol), - Options::default(), - &mut out.protocol, - )?; - - // Step 9. Set urlPattern’s username component to the result of compiling a component given - // processedInit["username"], canonicalize a username, and default options. - Component::compile( - processed_init.username.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_username(i))), - Options::default(), - &mut out.username, - )?; - - // Step 10. Set urlPattern’s password component to the result of compiling a component given - // processedInit["password"], canonicalize a password, and default options. - Component::compile( - processed_init.password.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_password(i))), - Options::default(), - &mut out.password, - )?; - - // FIXME: Steps 11 and 12: Compile host pattern correctly - Component::compile( - processed_init.hostname.as_deref().unwrap_or("*"), - Box::new(canonicalize_a_hostname), - Options::HOSTNAME, - &mut out.hostname, - )?; - - // Step 13. Set urlPattern’s port component to the result of compiling a component given - // processedInit["port"], canonicalize a port, and default options. - Component::compile( - processed_init.port.as_deref().unwrap_or("*"), - Box::new(|i| canonicalize_a_port(i, None)), - Options::default(), - &mut out.port, - )?; - - // FIXME: Step 14: respect ignore case option from here on out - let _ = options; - - // FIXME: Steps 15-16: Compile path pattern correctly - Component::compile( - processed_init.pathname.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_pathname(i))), - Options::PATHNAME, - &mut out.pathname, - )?; - - // Step 17. Set urlPattern’s search component to the result of compiling a component given - // processedInit["search"], canonicalize a search, and compileOptions. - Component::compile( - processed_init.search.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_search(i))), - Options::default(), - &mut out.search, - )?; - - // Step 18. Set urlPattern’s hash component to the result of compiling a component given - // processedInit["hash"], canonicalize a hash, and compileOptions. - Component::compile( - processed_init.hash.as_deref().unwrap_or("*"), - Box::new(|i| Ok(canonicalize_a_hash(i))), - Options::default(), - &mut out.hash, - )?; - - // Step 19. Return urlPattern. - // NOTE: not necessary since we use an out parameter - Ok(()) - } - - /// <https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups> - fn has_regexp_groups(&self) -> bool { - self.protocol.has_regexp_groups || - self.username.has_regexp_groups || - self.password.has_regexp_groups || - self.hostname.has_regexp_groups || - self.port.has_regexp_groups || - self.pathname.has_regexp_groups || - self.search.has_regexp_groups || - self.hash.has_regexp_groups - } -} - -impl Component { - /// <https://urlpattern.spec.whatwg.org/#compile-a-component> - fn compile( - input: &str, - encoding_callback: EncodingCallback, - options: Options, - out: &mut Self, - ) -> Fallible<()> { - // Step 1. Let part list be the result of running parse a pattern string given input, options, - // and encoding callback. - let part_list = parse_a_pattern_string(input, options, encoding_callback)?; - - // Step 2. Let (regular expression string, name list) be the result of running generate a regular expression and - // name list given part list and options. - let (regular_expression_string, name_list) = - generate_a_regular_expression_and_name_list(&part_list, options); - - log::debug!("Compiled {input:?} (URLPattern) to {regular_expression_string:?} (Regex)"); - - // Step 3. Let flags be an empty string. - // Step 4. If options’s ignore case is true then set flags to "vi". - let flags = if options.ignore_case { - RegExpFlags { - flags_: RegExpFlag_UnicodeSets | RegExpFlag_IgnoreCase, - } - } - // Step 5. Otherwise set flags to "v" - else { - RegExpFlags { - flags_: RegExpFlag_UnicodeSets, - } - }; - - // Step 6. Let regular expression be RegExpCreate(regular expression string, flags). - // If this throws an exception, catch it, and throw a TypeError. - let cx = GlobalScope::get_cx(); - rooted!(in(*cx) let mut regular_expression: *mut JSObject = ptr::null_mut()); - let succeeded = new_js_regex( - cx, - ®ular_expression_string, - flags, - regular_expression.handle_mut(), - ); - if !succeeded { - return Err(Error::Type(format!( - "Failed to compile {regular_expression_string:?} as a regular expression" - ))); - } - - // TODO Step 7. Let pattern string be the result of running generate a pattern string given - // part list and options. - let pattern_string = Default::default(); - - // Step 8. Let has regexp groups be false. - // Step 9. For each part of part list: - // Step 9.1 If part’s type is "regexp", then set has regexp groups to true. - let has_regexp_groups = part_list - .iter() - .any(|part| part.part_type == PartType::Regexp); - - // Step 10. Return a new component whose pattern string is pattern string, regular expression - // is regular expression, group name list is name list, and has regexp groups is has regexp groups. - out.pattern_string = pattern_string; - out.regular_expression.set(*regular_expression.handle()); - out.group_name_list = name_list; - out.has_regexp_groups = has_regexp_groups; - - Ok(()) - } -} - -/// <https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list> -fn generate_a_regular_expression_and_name_list( - part_list: &[Part], - options: Options, -) -> (String, Vec<USVString>) { - // Step 1. Let result be "^". - let mut result = String::from("^"); - - // Step 2. Let name list be a new list. - let mut name_list = vec![]; - - // Step 3. For each part of part list: - for part in part_list { - // Step 3.1 If part’s type is "fixed-text": - if part.part_type == PartType::FixedText { - // Step 3.1.1 If part’s modifier is "none", then append the result of running escape a regexp string given - // part’s value to the end of result. - if part.modifier == PartModifier::None { - result.push_str(&escape_a_regexp_string(&part.value)); - } - // Step 3.1.2 Otherwise: - else { - // Step 3.1.2.1 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.1.2.2 Append the result of running escape a regexp string given part’s value - // to the end of result. - result.push_str(&escape_a_regexp_string(&part.value)); - - // Step 3.1.2.3 Append ")" to the end of result. - result.push(')'); - - // Step 3.1.2.4 Append the result of running convert a modifier to a string given part’s - // modifier to the end of result. - result.push_str(part.modifier.convert_to_string()); - } - - // Step 3.1.3 Continue. - continue; - } - - // Step 3.2 Assert: part’s name is not the empty string. - debug_assert!(!part.name.is_empty()); - - // Step 3.3 Append part’s name to name list. - name_list.push(USVString(part.name.to_string())); - - // Step 3.4 Let regexp value be part’s value. - let mut regexp_value = part.value.clone(); - - // Step 3.5 If part’s type is "segment-wildcard", then set regexp value to the result of running - // generate a segment wildcard regexp given options. - if part.part_type == PartType::SegmentWildcard { - regexp_value = generate_a_segment_wildcard_regexp(options); - } - // Step 3.6 Otherwise if part’s type is "full-wildcard", then set regexp value to full wildcard regexp value. - else if part.part_type == PartType::FullWildcard { - regexp_value = FULL_WILDCARD_REGEXP_VALUE.into(); - } - - // Step 3.7 If part’s prefix is the empty string and part’s suffix is the empty string: - if part.prefix.is_empty() && part.suffix.is_empty() { - // Step 3.7.1 If part’s modifier is "none" or "optional", then: - if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - // Step 3.7.1.1 Append "(" to the end of result. - result.push('('); - - // Step 3.7.1.2 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.7.1.3 Append ")" to the end of result. - result.push(')'); - - // Step 3.7.1.4 Append the result of running convert a modifier to a string given part’s modifier - // to the end of result. - result.push_str(part.modifier.convert_to_string()); - } - // Step 3.7.2 Otherwise: - else { - // Step 3.7.2.1 Append "((?:" to the end of result. - result.push_str("((?:"); - - // Step 3.7.2.2 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.7.2.3 Append ")" to the end of result. - result.push(')'); - - // Step 3.7.2.4 Append the result of running convert a modifier to a string given part’s modifier - // to the end of result. - result.push_str(part.modifier.convert_to_string()); - - // Step 3.7.2.5 Append ")" to the end of result. - result.push(')'); - } - - // Step 3.7.3 Continue. - continue; - } - - // Step 3.8 If part’s modifier is "none" or "optional": - if matches!(part.modifier, PartModifier::None | PartModifier::Optional) { - // Step 3.8.1 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.8.2 Append the result of running escape a regexp string given part’s prefix - // to the end of result. - result.push_str(&escape_a_regexp_string(&part.prefix)); - - // Step 3.8.3 Append "(" to the end of result. - result.push('('); - - // Step 3.8.4 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.8.5 Append ")" to the end of result. - result.push(')'); - - // Step 3.8.6 Append the result of running escape a regexp string given part’s suffix - // to the end of result. - result.push_str(&escape_a_regexp_string(&part.suffix)); - - // Step 3.8.7 Append ")" to the end of result. - result.push(')'); - - // Step 3.8.8 Append the result of running convert a modifier to a string given part’s modifier to - // the end of result. - result.push_str(part.modifier.convert_to_string()); - - // Step 3.8.9 Continue. - continue; - } - - // Step 3.9 Assert: part’s modifier is "zero-or-more" or "one-or-more". - debug_assert!(matches!( - part.modifier, - PartModifier::ZeroOrMore | PartModifier::OneOrMore - )); - - // Step 3.10 Assert: part’s prefix is not the empty string or part’s suffix is not the empty string. - debug_assert!(!part.prefix.is_empty() || !part.suffix.is_empty()); - - // Step 3.11 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.12 Append the result of running escape a regexp string given part’s prefix to the end of result. - result.push_str(&escape_a_regexp_string(&part.prefix)); - - // Step 3.13 Append "((?:" to the end of result. - result.push_str("((?:"); - - // Step 3.14 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.15 Append ")(?:" to the end of result. - result.push_str(")(?:"); - - // Step 3.16 Append the result of running escape a regexp string given part’s suffix to the end of result. - result.push_str(&escape_a_regexp_string(&part.suffix)); - - // Step 3.17 Append the result of running escape a regexp string given part’s prefix to the end of result. - result.push_str(&escape_a_regexp_string(&part.prefix)); - - // Step 3.18 Append "(?:" to the end of result. - result.push_str("(?:"); - - // Step 3.19 Append regexp value to the end of result. - result.push_str(®exp_value); - - // Step 3.20 Append "))*)" to the end of result. - result.push_str("))*)"); - - // Step 3.21 Append the result of running escape a regexp string given part’s suffix to the end of result. - result.push_str(&escape_a_regexp_string(&part.suffix)); - - // Step 3.22 Append ")" to the end of result. - result.push(')'); - - // Step 3.23 If part’s modifier is "zero-or-more" then append "?" to the end of result. - if part.modifier == PartModifier::ZeroOrMore { - result.push('?'); - } - } - - // Step 4. Append "$" to the end of result. - result.push('$'); - - // Step 5. Return (result, name list). - (result, name_list) -} - -/// <https://urlpattern.spec.whatwg.org/#encoding-callback> -type EncodingCallback = Box<dyn Fn(&str) -> Fallible<String>>; - -// FIXME: Deduplicate this with the url crate -/// <https://url.spec.whatwg.org/#special-scheme> -fn default_port_for_special_scheme(scheme: &str) -> Option<u16> { - match scheme { - "ftp" => Some(21), - "http" | "ws" => Some(80), - "https" | "wss" => Some(443), - _ => None, - } -} - -/// <https://url.spec.whatwg.org/#special-scheme> -fn is_special_scheme(scheme: &str) -> bool { - matches!(scheme, "ftp" | "http" | "https" | "ws" | "wss") -} - -/// <https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp> -fn generate_a_segment_wildcard_regexp(options: Options) -> String { - // Step 1. Let result be "[^". - let mut result = String::from("[^"); - - // Step 2. Append the result of running escape a regexp string given options’s - // delimiter code point to the end of result. - result.push_str(&escape_a_regexp_string( - &options - .delimiter_code_point - .map(|c| c.to_string()) - .unwrap_or_default(), - )); - - // Step 3. Append "]+?" to the end of result. - result.push_str("]+?"); - - // Step 4. Return result. - result -} - -impl PartModifier { - /// <https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string> - fn convert_to_string(&self) -> &'static str { - match self { - // Step 1. If modifier is "zero-or-more", then return "*". - Self::ZeroOrMore => "*", - // Step 2. If modifier is "optional", then return "?". - Self::Optional => "?", - // Step 3. If modifier is "one-or-more", then return "+". - Self::OneOrMore => "+", - // Step 4. Return the empty string. - _ => "", - } - } -} - -impl Options { - /// <https://urlpattern.spec.whatwg.org/#hostname-options> - const HOSTNAME: Self = Self { - delimiter_code_point: Some('.'), - prefix_code_point: None, - ignore_case: false, - }; - - /// <https://urlpattern.spec.whatwg.org/#pathname-options> - const PATHNAME: Self = Self { - delimiter_code_point: Some('/'), - prefix_code_point: Some('/'), - ignore_case: false, - }; -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum PatternInitType { - Pattern, - Url, -} - -impl Part { - fn new(part_type: PartType, value: String, modifier: PartModifier) -> Self { - Self { - part_type, - value, - modifier, - name: String::new(), - prefix: String::new(), - suffix: String::new(), - } - } -} diff --git a/components/script/dom/urlpattern/pattern_parser.rs b/components/script/dom/urlpattern/pattern_parser.rs deleted file mode 100644 index 3147c5649f4..00000000000 --- a/components/script/dom/urlpattern/pattern_parser.rs +++ /dev/null @@ -1,473 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use script_bindings::error::{Error, Fallible}; - -use crate::dom::urlpattern::tokenizer::{Token, TokenType, TokenizePolicy, tokenize}; -use crate::dom::urlpattern::{ - EncodingCallback, FULL_WILDCARD_REGEXP_VALUE, Options, Part, PartModifier, PartType, - generate_a_segment_wildcard_regexp, -}; - -/// <https://urlpattern.spec.whatwg.org/#parse-a-pattern-string> -pub(super) fn parse_a_pattern_string( - input: &str, - options: Options, - encoding_callback: EncodingCallback, -) -> Fallible<Vec<Part>> { - // Step 1. Let parser be a new pattern parser whose encoding callback is encoding callback and - // segment wildcard regexp is the result of running generate a segment wildcard regexp given options. - let mut parser = PatternParser::new( - generate_a_segment_wildcard_regexp(options), - encoding_callback, - ); - - // Step 2. Set parser’s token list to the result of running tokenize given input and "strict". - parser.token_list = tokenize(input, TokenizePolicy::Strict)?; - - // Step 3. While parser’s index is less than parser’s token list’s size: - while parser.index < parser.token_list.len() { - // Step 3.1 Let char token be the result of running try to consume a token given parser and "char". - let char_token = parser.try_to_consume_a_token(TokenType::Char); - - // Step 3.2 Let name token be the result of running try to consume a token given parser and "name". - let mut name_token = parser.try_to_consume_a_token(TokenType::Name); - - // Step 3.3 Let regexp or wildcard token be the result of running try to consume a - // regexp or wildcard token given parser and name token. - let mut regexp_or_wildcard_token = - parser.try_to_consume_a_regexp_or_wildcard_token(name_token); - - // Step 3.4 If name token is not null or regexp or wildcard token is not null: - if name_token.is_some() || regexp_or_wildcard_token.is_some() { - // Step 3.4.1 Let prefix be the empty string. - let mut prefix = ""; - - // Step 3.4.2 If char token is not null then set prefix to char token’s value. - if let Some(char_token) = char_token { - prefix = char_token.value; - } - - // Step 3.4.3 If prefix is not the empty string and not options’s prefix code point: - let prefix_is_prefix_code_point = options.prefix_code_point.is_some_and(|c| { - let mut buffer = [0; 4]; - prefix == c.encode_utf8(&mut buffer) - }); - if !prefix.is_empty() && !prefix_is_prefix_code_point { - // Step 3.4.3.1 Append prefix to the end of parser’s pending fixed value. - parser.pending_fixed_value.push_str(prefix); - - // Step 3.4.3.2 Set prefix to the empty string. - prefix = ""; - } - - // Step 3.4.4 Run maybe add a part from the pending fixed value given parser. - parser.maybe_add_a_part_from_the_pending_fixed_value()?; - - // Step 3.4.5 Let modifier token be the result of running try to consume a modifier token given parser. - let modifier_token = parser.try_to_consume_a_modifier_token(); - - // Step 3.4.6 Run add a part given parser, prefix, name token, regexp or wildcard token, - // the empty string, and modifier token. - parser.add_a_part( - prefix, - name_token, - regexp_or_wildcard_token, - "", - modifier_token, - )?; - - // Step 3.4.7 Continue. - continue; - } - - // Step 3.5 Let fixed token be char token. - let mut fixed_token = char_token; - - // Step 3.6 If fixed token is null, then set fixed token to the result of running - // try to consume a token given parser and "escaped-char". - if fixed_token.is_none() { - fixed_token = parser.try_to_consume_a_token(TokenType::EscapedChar); - } - - // Step 3.7 If fixed token is not null: - if let Some(fixed_token) = fixed_token { - // Step 3.7.1 Append fixed token’s value to parser’s pending fixed value. - parser.pending_fixed_value.push_str(fixed_token.value); - - // Step 3.7.2 Continue. - continue; - } - - // Step 3.8 Let open token be the result of running try to consume a token given parser and "open". - let open_token = parser.try_to_consume_a_token(TokenType::Open); - - // Step 3.9 If open token is not null: - if open_token.is_some() { - // Step 3.9.1 Let prefix be the result of running consume text given parser. - let prefix = parser.consume_text(); - - // Step 3.9.2 Set name token to the result of running try to consume a token given parser and "name". - name_token = parser.try_to_consume_a_token(TokenType::Name); - - // Step 3.9.3 Set regexp or wildcard token to the result of running try to consume a regexp or wildcard - // token given parser and name token. - regexp_or_wildcard_token = parser.try_to_consume_a_regexp_or_wildcard_token(name_token); - - // Step 3.9.4 Let suffix be the result of running consume text given parser. - let suffix = parser.consume_text(); - - // Step 3.9.5 Run consume a required token given parser and "close". - parser.consume_a_required_token(TokenType::Close)?; - - // Step 3.9.6 Let modifier token be the result of running try to consume a modifier token given parser. - let modifier_token = parser.try_to_consume_a_modifier_token(); - - // Step 3.9.7 Run add a part given parser, prefix, name token, regexp or wildcard token, - // suffix, and modifier token. - parser.add_a_part( - &prefix, - name_token, - regexp_or_wildcard_token, - &suffix, - modifier_token, - )?; - - // Step 3.9.8 Continue. - continue; - } - - // Step 3.10 Run maybe add a part from the pending fixed value given parser. - parser.maybe_add_a_part_from_the_pending_fixed_value()?; - - // Step 3.11 Run consume a required token given parser and "end". - parser.consume_a_required_token(TokenType::End)?; - } - - Ok(parser.part_list) -} - -/// <https://urlpattern.spec.whatwg.org/#pattern-parser> -struct PatternParser<'a> { - /// <https://urlpattern.spec.whatwg.org/#pattern-parser-token-list> - token_list: Vec<Token<'a>>, - - /// <https://urlpattern.spec.whatwg.org/#pattern-parser-encoding-callback> - encoding_callback: EncodingCallback, - - /// <https://urlpattern.spec.whatwg.org/#pattern-parser-segment-wildcard-regexp> - segment_wildcard_regexp: String, - - /// <https://urlpattern.spec.whatwg.org/#pattern-parser-part-list> - part_list: Vec<Part>, - - /// <https://urlpattern.spec.whatwg.org/#pattern-parser-pending-fixed-value> - pending_fixed_value: String, - - /// <https://urlpattern.spec.whatwg.org/#pattern-parser-index> - index: usize, - - /// <https://urlpattern.spec.whatwg.org/#pattern-parser-next-numeric-name> - next_numeric_name: usize, -} - -impl<'a> PatternParser<'a> { - fn new(segment_wildcard_regexp: String, encoding_callback: EncodingCallback) -> Self { - Self { - token_list: vec![], - segment_wildcard_regexp, - part_list: vec![], - pending_fixed_value: String::new(), - index: 0, - next_numeric_name: 0, - encoding_callback, - } - } - - /// <https://urlpattern.spec.whatwg.org/#try-to-consume-a-token> - fn try_to_consume_a_token(&mut self, token_type: TokenType) -> Option<Token<'a>> { - // Step 1. Assert: parser’s index is less than parser’s token list size. - debug_assert!(self.index < self.token_list.len()); - - // Step 2. Let next token be parser’s token list[parser’s index]. - let next_token = self.token_list[self.index]; - - // Step 3. If next token’s type is not type return null. - if next_token.token_type != token_type { - return None; - } - - // Step 4. Increment parser’s index by 1. - self.index += 1; - - // Step 5. Return next token. - Some(next_token) - } - - /// <https://urlpattern.spec.whatwg.org/#try-to-consume-a-modifier-token> - fn try_to_consume_a_modifier_token(&mut self) -> Option<Token<'a>> { - // Step 1. Let token be the result of running try to consume a token given parser and "other-modifier". - let token = self.try_to_consume_a_token(TokenType::OtherModifier); - - // Step 2. If token is not null, then return token. - if token.is_some() { - return token; - } - - // Step 3. Set token to the result of running try to consume a token given parser and "asterisk". - let token = self.try_to_consume_a_token(TokenType::Asterisk); - - // Step 4. Return token. - token - } - - /// <https://urlpattern.spec.whatwg.org/#consume-a-required-token> - fn consume_a_required_token(&mut self, token_type: TokenType) -> Fallible<Token<'a>> { - // Step 1. Let result be the result of running try to consume a token given parser and type. - let result = self.try_to_consume_a_token(token_type); - - // Step 2. If result is null, then throw a TypeError. - let Some(result) = result else { - return Err(Error::Type(format!( - "Missing required token {token_type:?}" - ))); - }; - - // Step 3. Return result. - Ok(result) - } - - /// <https://urlpattern.spec.whatwg.org/#try-to-consume-a-regexp-or-wildcard-token> - fn try_to_consume_a_regexp_or_wildcard_token( - &mut self, - name_token: Option<Token<'a>>, - ) -> Option<Token<'a>> { - // Step 1. Let token be the result of running try to consume a token given parser and "regexp". - let mut token = self.try_to_consume_a_token(TokenType::Regexp); - - // Step 2. If name token is null and token is null, then set token to the result of running - // try to consume a token given parser and "asterisk". - if name_token.is_none() && token.is_none() { - token = self.try_to_consume_a_token(TokenType::Asterisk); - } - - // Step 3. Return token. - token - } - - /// <https://urlpattern.spec.whatwg.org/#maybe-add-a-part-from-the-pending-fixed-value> - fn maybe_add_a_part_from_the_pending_fixed_value(&mut self) -> Fallible<()> { - // Step 1. If parser’s pending fixed value is the empty string, then return. - if self.pending_fixed_value.is_empty() { - return Ok(()); - } - - // Step 2. Let encoded value be the result of running parser’s encoding callback - // given parser’s pending fixed value. - let encoded_value = (self.encoding_callback)(&self.pending_fixed_value)?; - - // Step 3. Set parser’s pending fixed value to the empty string. - self.pending_fixed_value.clear(); - - // Step 4. Let part be a new part whose type is "fixed-text", value is encoded value, and modifier is "none". - let part = Part::new(PartType::FixedText, encoded_value, PartModifier::None); - - // Step 5. Append part to parser’s part list. - self.part_list.push(part); - - Ok(()) - } - - /// <https://urlpattern.spec.whatwg.org/#add-a-part> - fn add_a_part( - &mut self, - prefix: &str, - name_token: Option<Token<'a>>, - regexp_or_wildcard_token: Option<Token<'a>>, - suffix: &str, - modifier_token: Option<Token<'a>>, - ) -> Fallible<()> { - // Step 1. Let modifier be "none". - let mut modifier = PartModifier::None; - - // Step 2. If modifier token is not null: - if let Some(modifier_token) = modifier_token { - // Step 2.1 If modifier token’s value is "?" then set modifier to "optional". - if modifier_token.value == "?" { - modifier = PartModifier::Optional; - } - // Step 2.2 Otherwise if modifier token’s value is "*" then set modifier to "zero-or-more". - else if modifier_token.value == "*" { - modifier = PartModifier::ZeroOrMore; - } - // Step 2.3 Otherwise if modifier token’s value is "+" then set modifier to "one-or-more". - else if modifier_token.value == "+" { - modifier = PartModifier::OneOrMore; - } - } - - // Step 3. If name token is null and regexp or wildcard token is null and modifier is "none": - if name_token.is_none() && - regexp_or_wildcard_token.is_none() && - modifier == PartModifier::None - { - // Step 3.1 Append prefix to the end of parser’s pending fixed value. - self.pending_fixed_value.push_str(prefix); - - // Step 3.2 Return - return Ok(()); - } - - // Step 4. Run maybe add a part from the pending fixed value given parser. - self.maybe_add_a_part_from_the_pending_fixed_value()?; - - // Step 5. If name token is null and regexp or wildcard token is null: - if name_token.is_none() && regexp_or_wildcard_token.is_none() { - // Step 5.1 Assert: suffix is the empty string. - debug_assert!(suffix.is_empty()); - - // Step 5.2 If prefix is the empty string, then return. - if prefix.is_empty() { - return Ok(()); - } - - // Step 5.3 Let encoded value be the result of running parser’s encoding callback given prefix. - let encoded_value = (self.encoding_callback)(prefix)?; - - // Step 5.4 Let part be a new part whose type is "fixed-text", - // value is encoded value, and modifier is modifier. - let part = Part::new(PartType::FixedText, encoded_value, modifier); - - // Step 5.5 Append part to parser’s part list. - self.part_list.push(part); - - // Step 6. Return. - return Ok(()); - } - - // Step 6. Let regexp value be the empty string. - let mut regexp_value = { - // Step 7. If regexp or wildcard token is null, then set regexp value to parser’s segment wildcard regexp. - match regexp_or_wildcard_token { - None => self.segment_wildcard_regexp.clone(), - Some(token) => { - // Step 8. Otherwise if regexp or wildcard token’s type is "asterisk", - // then set regexp value to the full wildcard regexp value. - if token.token_type == TokenType::Asterisk { - FULL_WILDCARD_REGEXP_VALUE.into() - } - // Step 9. Otherwise set regexp value to regexp or wildcard token’s value. - else { - token.value.to_owned() - } - }, - } - }; - - // Step 10. Let type be "regexp". - let mut part_type = PartType::Regexp; - - // Step 11. If regexp value is parser’s segment wildcard regexp: - if regexp_value == self.segment_wildcard_regexp { - // Step 11.1 Set type to "segment-wildcard". - part_type = PartType::SegmentWildcard; - - // Step 11.2 Set regexp value to the empty string. - regexp_value.clear(); - } - // Step 12. Otherwise if regexp value is the full wildcard regexp value: - else if regexp_value == FULL_WILDCARD_REGEXP_VALUE { - // Step 12.1 Set type to "full-wildcard". - part_type = PartType::FullWildcard; - - // Step 12.2 Set regexp value to the empty string. - regexp_value.clear(); - } - - // Step 13. Let name be the empty string. - let mut name = String::new(); - - // Step 14. If name token is not null, then set name to name token’s value. - if let Some(name_token) = name_token { - name = name_token.value.to_owned(); - } - // Step 15. Otherwise if regexp or wildcard token is not null: - else if regexp_or_wildcard_token.is_some() { - // Step 15.1 Set name to parser’s next numeric name, serialized. - name = self.next_numeric_name.to_string(); - - // Step 15.2 Increment parser’s next numeric name by 1. - self.next_numeric_name = self.next_numeric_name.wrapping_add(1); - } - - // Step 16. If the result of running is a duplicate name given parser and name is true, then throw a TypeError. - if self.is_a_duplicate_name(&name) { - return Err(Error::Type(format!("Duplicate part name: {name:?}"))); - } - - // Step 17. Let encoded prefix be the result of running parser’s encoding callback given prefix. - let encoded_prefix = (self.encoding_callback)(prefix)?; - - // Step 18. Let encoded suffix be the result of running parser’s encoding callback given suffix. - let encoded_suffix = (self.encoding_callback)(suffix)?; - - // Step 19. Let part be a new part whose type is type, value is regexp value, modifier is modifier, - // name is name, prefix is encoded prefix, and suffix is encoded suffix. - let part = Part { - part_type, - value: regexp_value, - modifier, - name, - prefix: encoded_prefix, - suffix: encoded_suffix, - }; - - // Step 20. Append part to parser’s part list. - self.part_list.push(part); - - Ok(()) - } - - // <https://urlpattern.spec.whatwg.org/#is-a-duplicate-name> - fn is_a_duplicate_name(&self, name: &str) -> bool { - // Step 1. For each part of parser’s part list: - for part in &self.part_list { - // Step 1.1 If part’s name is name, then return true. - if part.name == name { - return true; - } - } - - // Step 2. Return false. - false - } - - /// <https://urlpattern.spec.whatwg.org/#consume-text> - fn consume_text(&mut self) -> String { - // Step 1. Let result be the empty string. - let mut result = String::new(); - - // Step 2. While true: - loop { - // Step 2.1 Let token be the result of running try to consume a token given parser and "char". - let mut token = self.try_to_consume_a_token(TokenType::Char); - - // Step 2.2 If token is null, then set token to the result of running - // try to consume a token given parser and "escaped-char". - if token.is_none() { - token = self.try_to_consume_a_token(TokenType::EscapedChar); - } - - // Step 2.3 If token is null, then break. - let Some(token) = token else { - break; - }; - - // Step 2.4 Append token’s value to the end of result. - result.push_str(token.value); - } - - result - } -} diff --git a/components/script/dom/urlpattern/preprocessing.rs b/components/script/dom/urlpattern/preprocessing.rs deleted file mode 100644 index 7fc3c136315..00000000000 --- a/components/script/dom/urlpattern/preprocessing.rs +++ /dev/null @@ -1,659 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use script_bindings::error::{Error, Fallible}; -use script_bindings::str::USVString; -use url::Url; - -use crate::dom::bindings::codegen::Bindings::URLPatternBinding::URLPatternInit; -use crate::dom::urlpattern::{PatternInitType, default_port_for_special_scheme, is_special_scheme}; - -/// <https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit> -pub(super) fn process_a_url_pattern_init( - init: &URLPatternInit, - init_type: PatternInitType, -) -> Fallible<URLPatternInit> { - // Step 1. Let result be the result of creating a new URLPatternInit. - let mut result = URLPatternInit::default(); - - // TODO Step 2. If protocol is not null, set result["protocol"] to protocol. - // TODO Step 3. If username is not null, set result["username"] to username. - // TODO Step 4. If password is not null, set result["password"] to password. - // TODO Step 5. If hostname is not null, set result["hostname"] to hostname. - // TODO Step 6. If port is not null, set result["port"] to port. - // TODO Step 7. If pathname is not null, set result["pathname"] to pathname. - // TODO Step 8. If search is not null, set result["search"] to search. - // TODO Step 9. If hash is not null, set result["hash"] to hash. - - // Step 10. Let baseURL be null. - let mut base_url: Option<Url> = None; - - // Step 11. If init["baseURL"] exists: - if let Some(init_base_url) = init.baseURL.as_ref() { - // Step 11.1 Set baseURL to the result of running the basic URL parser on init["baseURL"]. - let Ok(parsed_base_url) = init_base_url.0.parse() else { - // Step 11.2 If baseURL is failure, then throw a TypeError. - return Err(Error::Type(format!( - "Failed to parse {:?} as URL", - init_base_url.0 - ))); - }; - let base_url = base_url.insert(parsed_base_url); - - // Step 11.3 If init["protocol"] does not exist, then set result["protocol"] to the result of - // processing a base URL string given baseURL’s scheme and type. - if init.protocol.is_none() { - result.protocol = Some(USVString(process_a_base_url_string( - base_url.scheme(), - init_type, - ))); - } - - // Step 11.4. If type is not "pattern" and init contains none of "protocol", "hostname", - // "port" and "username", then set result["username"] to the result of processing a base URL string - // given baseURL’s username and type. - if init_type != PatternInitType::Pattern && - init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.username.is_none() - { - result.username = Some(USVString(process_a_base_url_string( - base_url.username(), - init_type, - ))); - } - - // Step 11.5 If type is not "pattern" and init contains none of "protocol", "hostname", "port", - // "username" and "password", then set result["password"] to the result of processing a base URL string - // given baseURL’s password and type. - if init_type != PatternInitType::Pattern && - init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.username.is_none() && - init.password.is_none() - { - result.password = Some(USVString(process_a_base_url_string( - base_url.password().unwrap_or_default(), - init_type, - ))); - } - - // Step 11.6 If init contains neither "protocol" nor "hostname", then: - if init.protocol.is_none() && init.hostname.is_none() { - // Step 11.6.1 Let baseHost be the empty string. - // Step 11.6.2 If baseURL’s host is not null, then set baseHost to its serialization. - let base_host = base_url - .host() - .map(|host| host.to_string()) - .unwrap_or_default(); - - // Step 11.6.3 Set result["hostname"] to the result of processing a base URL string given baseHost and type. - result.hostname = Some(USVString(process_a_base_url_string(&base_host, init_type))); - } - - // Step 11.7 If init contains none of "protocol", "hostname", and "port", then: - if init.protocol.is_none() && init.hostname.is_none() && init.port.is_none() { - match base_url.port() { - // Step 11.7.1 If baseURL’s port is null, then set result["port"] to the empty string. - None => { - result.port = Some(USVString(String::new())); - }, - // Step 11.7.2 Otherwise, set result["port"] to baseURL’s port, serialized. - Some(port) => { - result.port = Some(USVString(port.to_string())); - }, - } - } - - // Step 11.8 If init contains none of "protocol", "hostname", "port", and "pathname", then set - // result["pathname"] to the result of processing a base URL string given the result of - // URL path serializing baseURL and type. - if init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.pathname.is_none() - { - result.pathname = Some(USVString(process_a_base_url_string( - base_url.path(), - init_type, - ))); - } - - // Step 11.9 If init contains none of "protocol", "hostname", "port", "pathname", - // and "search", then: - if init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.pathname.is_none() && - init.search.is_none() - { - // Step 11.9.1 Let baseQuery be baseURL’s query. - let base_query = base_url.query(); - - // Step 11.9.2 If baseQuery is null, then set baseQuery to the empty string. - let base_query = base_query.unwrap_or_default(); - - // Step 11.9.3 Set result["search"] to the result of processing a base URL string given baseQuery and type. - result.search = Some(USVString(process_a_base_url_string(base_query, init_type))); - } - - // Step 11.10 If init contains none of "protocol", "hostname", - // "port", "pathname", "search", and "hash", then: - if init.protocol.is_none() && - init.hostname.is_none() && - init.port.is_none() && - init.pathname.is_none() && - init.search.is_none() && - init.hash.is_none() - { - // Step 11.10.1 Let baseFragment be baseURL’s fragment. - let base_fragment = base_url.fragment(); - - // Step 11.10.2 If baseFragment is null, then set baseFragment to the empty string. - let base_fragment = base_fragment.unwrap_or_default(); - - // Step 11.10.3 Set result["hash"] to the result of processing a base URL string - // given baseFragment and type. - result.hash = Some(USVString(process_a_base_url_string( - base_fragment, - init_type, - ))); - } - } - - // Step 12. If init["protocol"] exists, then set result["protocol"] to the result of process protocol for init - // given init["protocol"] and type. - if let Some(protocol) = &init.protocol { - result.protocol = Some(USVString(process_a_protocol_for_init(protocol, init_type)?)); - } - - // Step 13. If init["username"] exists, then set result["username"] to the result of - // process username for init given init["username"] and type. - if let Some(username) = &init.username { - result.username = Some(USVString(process_username_for_init(username, init_type))); - } - - // Step 14. If init["password"] exists, then set result["password"] to the result of - // process password for init given init["password"] and type. - if let Some(password) = &init.password { - result.password = Some(USVString(process_password_for_init(password, init_type))); - } - - // Step 15. If init["hostname"] exists, then set result["hostname"] to the result of - // process hostname for init given init["hostname"] and type. - if let Some(hostname) = &init.hostname { - result.hostname = Some(USVString(process_hostname_for_init(hostname, init_type)?)); - } - - // Step 16. Let resultProtocolString be result["protocol"] if it exists; otherwise the empty string. - let result_protocol_string = result.protocol.as_deref().unwrap_or_default(); - - // Step 17. If init["port"] exists, then set result["port"] to the result of process port for init - // given init["port"], resultProtocolString, and type. - if let Some(port) = &init.port { - result.port = Some(USVString(process_port_for_init( - port, - result_protocol_string, - init_type, - )?)); - } - - // Step 18. If init["pathname"] exists: - if let Some(path_name) = &init.pathname { - // Step 18.1 Set result["pathname"] to init["pathname"]. - // NOTE: This is not necessary - the spec uses result["pathname"] in the following section, - // but it could just as well use init["pathname"]. Storing the string in an intermediate - // variable makes the code simpler - let mut result_pathname = path_name.to_string(); - - // Step 18.2 If the following are all true: - // * baseURL is not null; - // * baseURL does not have an opaque path; and - // * the result of running is an absolute pathname given result["pathname"] and type is false, - if let Some(base_url) = base_url { - if !base_url.cannot_be_a_base() && !is_an_absolute_pathname(path_name, init_type) { - // Step 18.2.1 Let baseURLPath be the result of running process a base URL string given the result - // of URL path serializing baseURL and type. - let base_url_path = process_a_base_url_string(base_url.path(), init_type); - - // Step 18.2.2 Let slash index be the index of the last U+002F (/) code point found in baseURLPath, - // interpreted as a sequence of code points, or null if there are no instances of the code point. - let slash_index = base_url_path.rfind('/'); - - // Step 18.2.3 If slash index is not null: - if let Some(slash_index) = slash_index { - // Step 18.2.3.1 Let new pathname be the code point substring from 0 to slash index + 1 - // within baseURLPath. - let mut new_pathname = base_url_path[..=slash_index].to_owned(); - - // Step 18.2.3.2 Append result["pathname"] to the end of new pathname. - new_pathname.push_str(path_name); - - // Step 18.2.3.3 Set result["pathname"] to new pathname. - result_pathname = new_pathname; - } - } - } - - // Step 18.3 Set result["pathname"] to the result of process pathname for init given result["pathname"], - // resultProtocolString, and type. - result.pathname = Some(USVString(process_pathname_for_init( - &result_pathname, - result_protocol_string, - init_type, - )?)); - } - - // Step 19. If init["search"] exists then set result["search"] to the result of - // process search for init given init["search"] and type. - if let Some(search) = &init.search { - result.search = Some(USVString(process_search_for_init(search, init_type))); - } - - // Step 20. If init["hash"] exists then set result["hash"] to the result of - // process hash for init given init["hash"] and type. - if let Some(hash) = &init.hash { - result.hash = Some(USVString(process_hash_for_init(hash, init_type))); - } - - // Step 21. Return result. - Ok(result) -} - -/// <https://urlpattern.spec.whatwg.org/#process-protocol-for-init> -fn process_a_protocol_for_init(input: &str, init_type: PatternInitType) -> Fallible<String> { - // Step 1. Let strippedValue be the given value with a single trailing U+003A (:) removed, if any. - let stripped_value = input.strip_suffix(':').unwrap_or(input); - - // Step 2. If type is "pattern" then return strippedValue. - if init_type == PatternInitType::Pattern { - return Ok(stripped_value.to_owned()); - } - - // Step 3. Return the result of running canonicalize a protocol given strippedValue. - canonicalize_a_protocol(stripped_value) -} - -/// <https://urlpattern.spec.whatwg.org/#process-username-for-init> -fn process_username_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. If type is "pattern" then return value. - if init_type == PatternInitType::Pattern { - return value.to_owned(); - } - - // Step 2. Return the result of running canonicalize a username given value. - canonicalize_a_username(value) -} - -/// <https://urlpattern.spec.whatwg.org/#process-password-for-init> -fn process_password_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. If type is "pattern" then return value. - if init_type == PatternInitType::Pattern { - return value.to_owned(); - } - - // Step 2. Return the result of running canonicalize a password given value. - canonicalize_a_password(value) -} - -/// <https://urlpattern.spec.whatwg.org/#process-hostname-for-init> -fn process_hostname_for_init(value: &str, init_type: PatternInitType) -> Fallible<String> { - // Step 1. If type is "pattern" then return value. - if init_type == PatternInitType::Pattern { - return Ok(value.to_owned()); - } - - // Step 2. Return the result of running canonicalize a hostname given value. - canonicalize_a_hostname(value) -} - -/// <https://urlpattern.spec.whatwg.org/#process-port-for-init> -fn process_port_for_init( - port_value: &str, - protocol_value: &str, - init_type: PatternInitType, -) -> Fallible<String> { - // Step 1. If type is "pattern" then return portValue. - if init_type == PatternInitType::Pattern { - return Ok(port_value.to_owned()); - } - - // Step 2. Return the result of running canonicalize a port given portValue and protocolValue. - canonicalize_a_port(port_value, Some(protocol_value)) -} - -/// <https://urlpattern.spec.whatwg.org/#process-pathname-for-init> -fn process_pathname_for_init( - path_name_value: &str, - protocol_value: &str, - init_type: PatternInitType, -) -> Fallible<String> { - // Step 1. If type is "pattern" then return pathnameValue. - if init_type == PatternInitType::Pattern { - return Ok(path_name_value.to_owned()); - } - - // Step 2. If protocolValue is a special scheme or the empty string, then return the result of - // running canonicalize a pathname given pathnameValue. - if is_special_scheme(protocol_value) || protocol_value.is_empty() { - return Ok(canonicalize_a_pathname(path_name_value)); - } - - // Step 2. Return the result of running canonicalize an opaque pathname given pathnameValue. - canonicalize_an_opaque_pathname(path_name_value) -} - -/// <https://urlpattern.spec.whatwg.org/#process-search-for-init> -fn process_search_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. Let strippedValue be the given value with a single leading U+003F (?) removed, if any. - let stripped_value = value.strip_prefix('?').unwrap_or(value); - - // Step 2. If type is "pattern" then return strippedValue. - if init_type == PatternInitType::Pattern { - return stripped_value.to_owned(); - } - - // Step 3. Return the result of running canonicalize a search given strippedValue. - canonicalize_a_search(stripped_value) -} - -/// <https://urlpattern.spec.whatwg.org/#process-hash-for-init> -fn process_hash_for_init(value: &str, init_type: PatternInitType) -> String { - // Step 1. Let strippedValue be the given value with a single leading U+0023 (#) removed, if any. - let stripped_value = value.strip_prefix('#').unwrap_or(value); - - // Step 2. If type is "pattern" then return strippedValue. - if init_type == PatternInitType::Pattern { - return stripped_value.to_owned(); - } - - // Step 3. Return the result of running canonicalize a hash given strippedValue. - canonicalize_a_hash(stripped_value) -} - -/// <https://urlpattern.spec.whatwg.org/#url-pattern-create-a-dummy-url> -fn create_a_dummy_url() -> Url { - // Step 1. Let dummyInput be "https://dummy.invalid/". - let dummy_input = "https://dummy.invalid/"; - - // Step 2. Return the result of running the basic URL parser on dummyInput. - dummy_input - .parse() - .expect("parsing dummy input cannot fail") -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol> -pub(super) fn canonicalize_a_protocol(value: &str) -> Fallible<String> { - // Step 1. If value is the empty string, return value. - if value.is_empty() { - return Ok(String::new()); - } - - // Step 2. Let parseResult be the result of running the basic URL parser - // given value followed by "://dummy.invalid/". - let Ok(parse_result) = Url::parse(&format!("{value}://dummy.invalid/")) else { - // Step 3. If parseResult is failure, then throw a TypeError. - return Err(Error::Type(format!( - "Failed to canonicalize {value:?} as a protocol" - ))); - }; - - // Step 4. Return parseResult’s scheme. - Ok(parse_result.scheme().to_owned()) -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-username> -pub(super) fn canonicalize_a_username(input: &str) -> String { - // Step 1. If value is the empty string, return value. - if input.is_empty() { - return input.to_owned(); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // Step 3. Set the username given dummyURL and value. - dummy_url.set_username(input).unwrap(); - - // Step 4. Return dummyURL’s username. - dummy_url.username().to_owned() -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-password> -pub(super) fn canonicalize_a_password(input: &str) -> String { - // Step 1. If value is the empty string, return value. - if input.is_empty() { - return input.to_owned(); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // Step 3. Set the password given dummyURL and value. - dummy_url.set_password(Some(input)).unwrap(); - - // Step 4. Return dummyURL’s password. - dummy_url.password().unwrap().to_owned() -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-hostname> -pub(super) fn canonicalize_a_hostname(input: &str) -> Fallible<String> { - // Step 1. If value is the empty string, return value. - if input.is_empty() { - return Ok(String::new()); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // FIXME: The rest of the algorithm needs functionality that the url crate - // does not expose. We need to figure out if there's a way around that or - // if we want to reimplement that functionality here - - if dummy_url.set_host(Some(input)).is_err() { - return Err(Error::Type(format!( - "Failed to canonicalize hostname: {input:?}" - ))); - } - - Ok(dummy_url.host_str().unwrap().to_owned()) -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-port> -pub(super) fn canonicalize_a_port( - port_value: &str, - protocol_value: Option<&str>, -) -> Fallible<String> { - // Step 1. If portValue is the empty string, return portValue. - if port_value.is_empty() { - return Ok(String::new()); - } - - // Step 2. Let dummyURL be the result of creating a dummy URL. - let mut dummy_url = create_a_dummy_url(); - - // Step 3. If protocolValue was given, then set dummyURL’s scheme to protocolValue. - if let Some(protocol_value) = protocol_value { - dummy_url.set_scheme(protocol_value).unwrap(); - } - - // Step 4. Let parseResult be the result of running basic URL parser given portValue - // with dummyURL as url and port state as state override. - // NOTE: The url crate does not expose these parsing concepts, so we try - // to recreate the parsing step here. - let port_value = port_value.trim(); - let Ok(port) = port_value.parse::<u16>() else { - // Step 5. If parseResult is failure, then throw a TypeError. - return Err(Error::Type(format!( - "{port_value:?} is not a valid port number" - ))); - }; - - // Step 6. Return dummyURL’s port, serialized, or empty string if it is null. - if let Some(scheme) = protocol_value { - if default_port_for_special_scheme(scheme) == Some(port) { - return Ok(String::new()); - } - } - Ok(port.to_string()) -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-pathname> -pub(super) fn canonicalize_a_pathname(value: &str) -> String { - // Step 1. If value is the empty string, then return value. - if value.is_empty() { - return String::new(); - } - - // NOTE: This is not what the spec says, but the url crate does not expose the required functionality. - // TODO: Investigate whether this is different in practice - let mut dummy_url = create_a_dummy_url(); - dummy_url.set_path(value); - - dummy_url.path().to_owned() -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-an-opaque-pathname> -pub(super) fn canonicalize_an_opaque_pathname(value: &str) -> Fallible<String> { - // NOTE: The url crate doesn't expose the functionality needed by this algorithm. - // Instead we create a url with an opaque path that is value and then return that opaque path, - // which should be equivalent. - let Ok(url) = Url::parse(&format!("foo:{value}")) else { - return Err(Error::Type(format!( - "Could not parse {value:?} as opaque path" - ))); - }; - - Ok(url.path().to_owned()) -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-search> -pub(super) fn canonicalize_a_search(value: &str) -> String { - if value.is_empty() { - return String::new(); - } - - let Ok(url) = Url::parse(&format!("http://example.com?{value}")) else { - log::warn!("canonicalizing a search should never fail"); - return String::new(); - }; - - url.query().unwrap_or_default().to_owned() -} - -/// <https://urlpattern.spec.whatwg.org/#canonicalize-a-hash> -pub(super) fn canonicalize_a_hash(value: &str) -> String { - if value.is_empty() { - return String::new(); - } - - let Ok(url) = Url::parse(&format!("http://example.com#{value}")) else { - log::warn!("canonicalizing a hash should never fail"); - return String::new(); - }; - - url.fragment().unwrap_or_default().to_owned() -} - -/// <https://urlpattern.spec.whatwg.org/#is-an-absolute-pathname> -fn is_an_absolute_pathname(input: &str, init_type: PatternInitType) -> bool { - let mut chars = input.chars(); - - // Step 1. If input is the empty string, then return false. - let Some(first_char) = chars.next() else { - return false; - }; - - // Step 2. If input[0] is U+002F (/), then return true. - if first_char == '/' { - return true; - } - - // Step 3. If type is "url", then return false. - if init_type == PatternInitType::Url { - return false; - } - - // Step 4. If input’s code point length is less than 2, then return false. - let Some(second_char) = chars.next() else { - return false; - }; - - // Step 5. If input[0] is U+005C (\) and input[1] is U+002F (/), then return true. - if first_char == '\\' && second_char == '/' { - return true; - } - - // Step 6. If input[0] is U+007B ({) and input[1] is U+002F (/), then return true. - if first_char == '{' && second_char == '/' { - return true; - } - - // Step 7. Return false. - false -} - -/// <https://urlpattern.spec.whatwg.org/#process-a-base-url-string> -fn process_a_base_url_string(input: &str, init_type: PatternInitType) -> String { - // Step 1. Assert: input is not null. - // NOTE: The type system ensures that already - - // Step 2. If type is not "pattern" return input. - if init_type != PatternInitType::Pattern { - return input.to_owned(); - } - - // Step 3. Return the result of escaping a pattern string given input. - escape_a_pattern_string(input) -} - -/// Implements functionality that is shared between <https://urlpattern.spec.whatwg.org/#escape-a-pattern-string> -/// and <https://urlpattern.spec.whatwg.org/#escape-a-regexp-string>. -/// -/// These two algorithms are identical except for the set of characters that they escape, so implementing them -/// seperately does not make sense. -fn escape_a_string(input: &str, to_escape: &[char]) -> String { - // Step 1. Assert: input is an ASCII string. - debug_assert!( - input.is_ascii(), - "Expected input to be ASCII, got {input:?}" - ); - - // Step 2. Let result be the empty string. - let mut result = String::with_capacity(input.len()); - - // Step 3. Let index be 0. - // Step 4. While index is less than input’s length: - // Step 4.1 Let c be input[index]. - // Step 4.2 Increment index by 1. - for c in input.chars() { - // Step 4.3 If c is one of: [..] then append "\" to the end of result. - if to_escape.contains(&c) { - result.push('\\'); - } - - // Step 4.4 Append c to the end of result. - result.push(c); - } - - // Step 5. Return result. - result -} - -/// <https://urlpattern.spec.whatwg.org/#escape-a-pattern-string> -fn escape_a_pattern_string(input: &str) -> String { - escape_a_string(input, &['+', '*', '?', ':', '{', '}', '(', ')', '\\']) -} - -/// <https://urlpattern.spec.whatwg.org/#escape-a-regexp-string> -pub(super) fn escape_a_regexp_string(input: &str) -> String { - escape_a_string( - input, - &[ - '.', '+', '*', '?', '^', '$', '{', '}', '(', ')', '[', ']', '|', '/', '\\', - ], - ) -} diff --git a/components/script/dom/urlpattern/tokenizer.rs b/components/script/dom/urlpattern/tokenizer.rs deleted file mode 100644 index e2d70217c3f..00000000000 --- a/components/script/dom/urlpattern/tokenizer.rs +++ /dev/null @@ -1,524 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ - -use script_bindings::error::{Error, Fallible}; - -/// <https://urlpattern.spec.whatwg.org/#tokenize> -pub(super) fn tokenize(input: &str, policy: TokenizePolicy) -> Fallible<Vec<Token>> { - // Step 1. Let tokenizer be a new tokenizer. - // Step 2. Set tokenizer’s input to input. - // Step 3. Set tokenizer’s policy to policy. - let mut tokenizer = Tokenizer { - input, - policy, - index: 0, - next_index: 0, - token_list: vec![], - code_point: char::MIN, - }; - - // Step 4. While tokenizer’s index is less than tokenizer’s input’s code point length: - while tokenizer.index < tokenizer.input.len() { - // Step 4.1 Run seek and get the next code point given tokenizer and tokenizer’s index. - tokenizer.seek_and_get_the_next_code_point(tokenizer.index); - - match tokenizer.code_point { - // Step 4.2 If tokenizer’s code point is U+002A (*): - '*' => { - // Step 4.2.1 Run add a token with default position and length given tokenizer and "asterisk". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Asterisk); - - // Step 4.2.2 Continue. - continue; - }, - // Step 4.3 If tokenizer’s code point is U+002B (+) or U+003F (?): - '+' | '?' => { - // Step 4.3.1 Run add a token with default position and length given tokenizer and "other-modifier". - tokenizer.add_a_token_with_default_position_and_length(TokenType::OtherModifier); - - // Step 4.3.2 Continue. - continue; - }, - // Step 4.4 If tokenizer’s code point is U+005C (\): - '\\' => { - // Step 4.4.1 If tokenizer’s index is equal to tokenizer’s input’s code point length − 1: - if tokenizer.is_done() { - // Step 4.4.1.1 Run process a tokenizing error given tokenizer, tokenizer’s next index, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(tokenizer.next_index, tokenizer.index)?; - - // Step 4.4.1.2 Continue. - continue; - } - - // Step 4.4.2 Let escaped index be tokenizer’s next index. - let escaped_index = tokenizer.index; - - // Step 4.4.3 Run get the next code point given tokenizer. - tokenizer.get_the_next_code_point(); - - // Step 4.4.4 Run add a token with default length given tokenizer, "escaped-char", - // tokenizer’s next index, and escaped index. - tokenizer.add_a_token_with_default_length( - TokenType::EscapedChar, - tokenizer.next_index, - escaped_index, - ); - - // Step 4.4.5 Continue. - continue; - }, - // Step 4.5 If tokenizer’s code point is U+007B ({): - '{' => { - // Step 4.5.1 Run add a token with default position and length given tokenizer and "open". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Open); - - // Step 4.5.2 Continue. - continue; - }, - // Step 4.6 If tokenizer’s code point is U+007D (}): - '}' => { - // Step 4.6.1 Run add a token with default position and length given tokenizer and "close". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Close); - - // Step 4.6.2 Continue. - continue; - }, - // Step 4.7 If tokenizer’s code point is U+003A (:): - ':' => { - // Step 4.7.1 Let name position be tokenizer’s next index. - let mut name_position = tokenizer.next_index; - - // Step 4.7.2 Let name start be name position. - let name_start = name_position; - - // Step 4.7.3 While name position is less than tokenizer’s input’s code point length: - while name_position < tokenizer.input.len() { - // Step 4.7.3.1 Run seek and get the next code point given tokenizer and name position. - tokenizer.seek_and_get_the_next_code_point(name_position); - - // Step 4.7.3.2 Let first code point be true if name position equals name start - // and false otherwise. - let first_code_point = name_position == name_start; - - // Step 4.7.3.3 Let valid code point be the result of running is a valid name - // code point given tokenizer’s code point and first code point. - let valid_code_point = - is_a_valid_name_code_point(tokenizer.code_point, first_code_point); - - // Step 4.7.3.4 If valid code point is false break. - if !valid_code_point { - break; - } - - // Step 4.6.3.5 Set name position to tokenizer’s next index. - name_position = tokenizer.next_index; - } - - // Step 4.7.4 If name position is less than or equal to name start: - if name_position <= name_start { - // Step 4.7.4.1 Run process a tokenizing error given tokenizer, name start, and tokenizer’s index. - tokenizer.process_a_tokenizing_error(name_start, tokenizer.index)?; - - // Step 4.7.4.2 Continue. - continue; - } - - // Step 4.7.5 Run add a token with default length given tokenizer, "name", name position, - // and name start. - tokenizer.add_a_token_with_default_length( - TokenType::Name, - name_position, - name_start, - ); - - // Step 4.7.6 Continue. - continue; - }, - // Step 4.8 If tokenizer’s code point is U+0028 ((): - '(' => { - // Step 4.8.1 Let depth be 1. - let mut depth = 1; - - // Step 4.8.2 Let regexp position be tokenizer’s next index. - let mut regexp_position = tokenizer.next_index; - - // Step 4.8.3 Let regexp start be regexp position. - let regexp_start = regexp_position; - - // Step 4.8.4 Let error be false. - let mut error = false; - - // Step 4.8.5 While regexp position is less than tokenizer’s input’s code point length: - while regexp_position < tokenizer.input.len() { - // Step 4.8.5.1 Run seek and get the next code point given tokenizer and regexp position. - tokenizer.seek_and_get_the_next_code_point(regexp_position); - - // Step 4.8.5.2 If tokenizer’s code point is not an ASCII code point: - if !tokenizer.code_point.is_ascii() { - // Step 4.8.5.1.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.1.2 Set error to true. - error = true; - - // Step 4.8.5.1.2 Break. - break; - } - - // Step 4.8.5.3 If regexp position equals regexp start and tokenizer’s code point is U+003F (?): - if regexp_position == regexp_start && tokenizer.code_point == '?' { - // Step 4.8.5.3.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.3.2 Set error to true. - error = true; - - // Step 4.8.5.3.3 Break. - break; - } - - // Step 4.8.5.4 If tokenizer’s code point is U+005C (\): - if tokenizer.code_point == '\\' { - // Step 4.8.5.4.1 If regexp position equals tokenizer’s input’s code point length − 1: - if tokenizer.is_last_character(regexp_position) { - // Step 4.8.5.4.1.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.4.1.2 Set error to true. - error = true; - - // Step 4.8.5.4.1.3 Break - break; - } - - // Step 4.8.5.4.2 Run get the next code point given tokenizer. - tokenizer.get_the_next_code_point(); - - // Step 4.8.5.4.3 If tokenizer’s code point is not an ASCII code point: - if !tokenizer.code_point.is_ascii() { - // Step 4.8.5.4.3.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.4.3.2 Set error to true. - error = true; - - // Step 4.8.5.4.3.3 Break - break; - } - - // Step 4.8.5.4.4 Set regexp position to tokenizer’s next index. - regexp_position = tokenizer.next_index; - - // Step 4.8.5.4.5 Continue. - continue; - } - - // Step 4.8.5.5 If tokenizer’s code point is U+0029 ()): - if tokenizer.code_point == ')' { - // Step 4.8.5.5.1 Decrement depth by 1. - depth -= 1; - - // Step 4.8.5.5.2 If depth is 0: - if depth == 0 { - // Step 4.8.5.5.2.1 Set regexp position to tokenizer’s next index. - regexp_position = tokenizer.next_index; - - // Step 4.8.5.5.2.2 Break. - break; - } - } - // Step 4.8.5.6 Otherwise if tokenizer’s code point is U+0028 ((): - else if tokenizer.code_point == '(' { - // Step 4.8.5.6.1 Increment depth by 1. - depth += 1; - - // Step 4.8.5.6.2 If regexp position equals tokenizer’s input’s code point length − 1: - if tokenizer.is_last_character(regexp_position) { - // Step 4.8.5.6.2.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.6.2.2 Set error to true. - error = true; - - // Step 4.8.5.6.2.3 Break - break; - } - - // Step 4.8.5.6.3 Let temporary position be tokenizer’s next index. - let temporary_position = tokenizer.next_index; - - // Step 4.8.5.6.4 Run get the next code point given tokenizer. - tokenizer.get_the_next_code_point(); - - // Step 4.8.5.6.5 If tokenizer’s code point is not U+003F (?): - if tokenizer.code_point != '?' { - // Step 4.8.5.6.5.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.5.6.5.2 Set error to true. - error = true; - - // Step 4.8.5.6.5.3 Break. - break; - } - - // Step 4.8.5.6.6 Set tokenizer’s next index to temporary position. - tokenizer.next_index = temporary_position; - } - - // Step 4.8.5.7 Set regexp position to tokenizer’s next index. - regexp_position = tokenizer.next_index; - } - - // Step 4.8.6 If error is true continue. - if error { - continue; - } - - // Step 4.8.7 If depth is not zero: - if depth != 0 { - // Step 4.8.7.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.7.2 Continue. - continue; - } - - // Step 4.8.8 Let regexp length be regexp position − regexp start − 1. - let regexp_length = regexp_position - regexp_start - 1; - - // Step 4.8.9 If regexp length is zero: - if regexp_length == 0 { - // Step 4.8.9.1 Run process a tokenizing error given tokenizer, regexp start, - // and tokenizer’s index. - tokenizer.process_a_tokenizing_error(regexp_start, tokenizer.index)?; - - // Step 4.8.9.2 Continue. - continue; - } - - // Step 4.8.10 Run add a token given tokenizer, "regexp", regexp position, - // regexp start, and regexp length. - tokenizer.add_a_token( - TokenType::Regexp, - regexp_position, - regexp_start, - regexp_length, - ); - - // Step 4.8.11 Continue. - continue; - }, - _ => { - // Step 4.9 Run add a token with default position and length given tokenizer and "char". - tokenizer.add_a_token_with_default_position_and_length(TokenType::Char); - }, - } - } - - // Step 5. Run add a token with default length given tokenizer, "end", tokenizer’s index, and tokenizer’s index. - tokenizer.add_a_token_with_default_length(TokenType::End, tokenizer.index, tokenizer.index); - - // Step 6.Return tokenizer’s token list. - Ok(tokenizer.token_list) -} - -/// <https://urlpattern.spec.whatwg.org/#tokenizer> -struct Tokenizer<'a> { - /// <https://urlpattern.spec.whatwg.org/#tokenizer-input> - input: &'a str, - - /// <https://urlpattern.spec.whatwg.org/#tokenizer-policy> - policy: TokenizePolicy, - - /// <https://urlpattern.spec.whatwg.org/#tokenizer-index> - /// - /// Note that we deviate the from the spec and index bytes, not code points. - index: usize, - - /// <https://urlpattern.spec.whatwg.org/#tokenizer-next-index> - /// - /// Note that we deviate the from the spec and index bytes, not code points. - next_index: usize, - - /// <https://urlpattern.spec.whatwg.org/#tokenizer-token-list> - token_list: Vec<Token<'a>>, - - /// <https://urlpattern.spec.whatwg.org/#tokenizer-code-point> - code_point: char, -} - -/// <https://urlpattern.spec.whatwg.org/#token> -#[derive(Clone, Copy, Debug)] -#[allow(dead_code)] // index isn't used yet, because constructor strings aren't parsed -pub(super) struct Token<'a> { - /// <https://urlpattern.spec.whatwg.org/#token-index> - pub(super) index: usize, - - /// <https://urlpattern.spec.whatwg.org/#token-value> - pub(super) value: &'a str, - - /// <https://urlpattern.spec.whatwg.org/#token-type> - pub(super) token_type: TokenType, -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(super) enum TokenType { - /// <https://urlpattern.spec.whatwg.org/#token-type-open> - Open, - - /// <https://urlpattern.spec.whatwg.org/#token-type-close> - Close, - - /// <https://urlpattern.spec.whatwg.org/#token-type-regexp> - Regexp, - - /// <https://urlpattern.spec.whatwg.org/#token-type-name> - Name, - - /// <https://urlpattern.spec.whatwg.org/#token-type-char> - Char, - - /// <https://urlpattern.spec.whatwg.org/#token-type-escaped-char> - EscapedChar, - - /// <https://urlpattern.spec.whatwg.org/#token-type-other-modifier> - OtherModifier, - - /// <https://urlpattern.spec.whatwg.org/#token-type-asterisk> - Asterisk, - - /// <https://urlpattern.spec.whatwg.org/#token-type-end> - End, - - /// <https://urlpattern.spec.whatwg.org/#token-type-invalid-char> - InvalidChar, -} - -/// <https://urlpattern.spec.whatwg.org/#tokenize-policy> -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub(super) enum TokenizePolicy { - /// <https://urlpattern.spec.whatwg.org/#tokenize-policy-strict> - Strict, - - /// <https://urlpattern.spec.whatwg.org/#tokenize-policy-lenient> - Lenient, -} - -impl Tokenizer<'_> { - fn is_last_character(&self, position: usize) -> bool { - self.input[position..].chars().count() == 1 - } - - fn is_done(&self) -> bool { - self.input[self.next_index..].is_empty() - } - - /// <https://urlpattern.spec.whatwg.org/#get-the-next-code-point> - fn get_the_next_code_point(&mut self) { - // Step 1. Set tokenizer’s code point to the Unicode code point in tokenizer’s - // input at the position indicated by tokenizer’s next index. - self.code_point = self.input[self.next_index..] - .chars() - .next() - .expect("URLPattern tokenizer is trying to read out of bounds"); - - // Step 2. Increment tokenizer’s next index by 1. - // NOTE: Because our next_index is indexing bytes (not code points) we use - // the utf8 length of the code point instead. - self.next_index = self.next_index.wrapping_add(self.code_point.len_utf8()); - } - - /// <https://urlpattern.spec.whatwg.org/#seek-and-get-the-next-code-point> - fn seek_and_get_the_next_code_point(&mut self, index: usize) { - // Step 1. Set tokenizer’s next index to index. - self.next_index = index; - - // Step 2. Run get the next code point given tokenizer. - self.get_the_next_code_point(); - } - - /// <https://urlpattern.spec.whatwg.org/#add-a-token> - fn add_a_token( - &mut self, - token_type: TokenType, - next_position: usize, - value_position: usize, - value_length: usize, - ) { - // Step 1. Let token be a new token. - // Step 2. Set token’s type to type. - // Step 3. Set token’s index to tokenizer’s index. - // Step 4. Set token’s value to the code point substring from value position - // with length value length within tokenizer’s input. - let token = Token { - token_type, - index: self.index, - value: &self.input[value_position..][..value_length], - }; - - // Step 5. Append token to the back of tokenizer’s token list. - self.token_list.push(token); - - // Step 6. Set tokenizer’s index to next position. - self.index = next_position; - } - - /// <https://urlpattern.spec.whatwg.org/#add-a-token-with-default-position-and-length> - fn add_a_token_with_default_position_and_length(&mut self, token_type: TokenType) { - // Step 1. Run add a token with default length given tokenizer, type, - // tokenizer’s next index, and tokenizer’s index. - self.add_a_token_with_default_length(token_type, self.next_index, self.index); - } - - /// <https://urlpattern.spec.whatwg.org/#add-a-token-with-default-length> - fn add_a_token_with_default_length( - &mut self, - token_type: TokenType, - next_position: usize, - value_position: usize, - ) { - // Step 1. Let computed length be next position − value position. - let computed_length = next_position - value_position; - - // Step 2. Run add a token given tokenizer, type, next position, value position, and computed length. - self.add_a_token(token_type, next_position, value_position, computed_length); - } - - /// <https://urlpattern.spec.whatwg.org/#process-a-tokenizing-error> - fn process_a_tokenizing_error( - &mut self, - next_position: usize, - value_position: usize, - ) -> Fallible<()> { - // Step 1. If tokenizer’s policy is "strict", then throw a TypeError. - if self.policy == TokenizePolicy::Strict { - return Err(Error::Type("Failed to tokenize URL pattern".into())); - } - - // Step 2. Assert: tokenizer’s policy is "lenient". - debug_assert_eq!(self.policy, TokenizePolicy::Lenient); - - // Step 3. Run add a token with default length given tokenizer, "invalid-char", - // next position, and value position. - self.add_a_token_with_default_length(TokenType::InvalidChar, next_position, value_position); - - Ok(()) - } -} - -/// <https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point> -fn is_a_valid_name_code_point(code_point: char, first: bool) -> bool { - // FIXME: implement this check - _ = first; - code_point.is_alphabetic() -} |