aboutsummaryrefslogtreecommitdiffstats
path: root/components
diff options
context:
space:
mode:
authorSimon Wülker <simon.wuelker@arcor.de>2025-04-07 20:08:22 +0200
committerGitHub <noreply@github.com>2025-04-07 18:08:22 +0000
commit8f1c23692eb9736aa7978cafad314520b977ef8e (patch)
tree31a2e91e0a352c1f38017fdcb1aeb15be8ba26b1 /components
parent2b63e60e8f2740f55a3a4fa0d918097e3b6fec97 (diff)
downloadservo-8f1c23692eb9736aa7978cafad314520b977ef8e.tar.gz
servo-8f1c23692eb9736aa7978cafad314520b977ef8e.zip
Create a parser for URLPatterns (#36382)
This change implements the pattern parser, completing the "parsing pipeline" for URL patterns. Testing: Primarily `urlpattern/urlpattern-hasregexpgroups.any.js`, some other subtests start to pass too. --------- Signed-off-by: Simon Wülker <simon.wuelker@arcor.de>
Diffstat (limited to 'components')
-rw-r--r--components/script/dom/urlpattern.rs458
1 files changed, 449 insertions, 9 deletions
diff --git a/components/script/dom/urlpattern.rs b/components/script/dom/urlpattern.rs
index 603db4e08b8..d4d80ccac0f 100644
--- a/components/script/dom/urlpattern.rs
+++ b/components/script/dom/urlpattern.rs
@@ -349,7 +349,7 @@ impl URLPatternInternal {
.transpose()
.ok()
.flatten();
- if default_port == given_port {
+ if default_port.is_some() && default_port == given_port {
processed_init.port = Some(Default::default());
}
@@ -534,8 +534,126 @@ fn parse_a_pattern_string(
// Step 2. Set parser’s token list to the result of running tokenize given input and "strict".
parser.token_list = tokenize(input, TokenizePolicy::Strict)?;
- // TODO: Implement the rest of this algorithm
- Ok(vec![])
+ // Step 3. While parser’s index is less than parser’s token list’s size:
+ while parser.index < parser.token_list.len() {
+ // Step 3.1 Let char token be the result of running try to consume a token given parser and "char".
+ let char_token = parser.try_to_consume_a_token(TokenType::Char);
+
+ // Step 3.2 Let name token be the result of running try to consume a token given parser and "name".
+ let mut name_token = parser.try_to_consume_a_token(TokenType::Name);
+
+ // Step 3.3 Let regexp or wildcard token be the result of running try to consume a
+ // regexp or wildcard token given parser and name token.
+ let mut regexp_or_wildcard_token =
+ parser.try_to_consume_a_regexp_or_wildcard_token(name_token);
+
+ // Step 3.4 If name token is not null or regexp or wildcard token is not null:
+ if name_token.is_some() || regexp_or_wildcard_token.is_some() {
+ // Step 3.4.1 Let prefix be the empty string.
+ let mut prefix = "";
+
+ // Step 3.4.2 If char token is not null then set prefix to char token’s value.
+ if let Some(char_token) = char_token {
+ prefix = char_token.value;
+ }
+
+ // Step 3.4.3 If prefix is not the empty string and not options’s prefix code point:
+ let prefix_is_prefix_code_point = options.prefix_code_point.is_some_and(|c| {
+ let mut buffer = [0; 4];
+ prefix == c.encode_utf8(&mut buffer)
+ });
+ if !prefix.is_empty() && !prefix_is_prefix_code_point {
+ // Step 3.4.3.1 Append prefix to the end of parser’s pending fixed value.
+ parser.pending_fixed_value.push_str(prefix);
+
+ // Step 3.4.3.2 Set prefix to the empty string.
+ prefix = "";
+ }
+
+ // Step 3.4.4 Run maybe add a part from the pending fixed value given parser.
+ parser.maybe_add_a_part_from_the_pending_fixed_value()?;
+
+ // Step 3.4.5 Let modifier token be the result of running try to consume a modifier token given parser.
+ let modifier_token = parser.try_to_consume_a_modifier_token();
+
+ // Step 3.4.6 Run add a part given parser, prefix, name token, regexp or wildcard token,
+ // the empty string, and modifier token.
+ parser.add_a_part(
+ prefix,
+ name_token,
+ regexp_or_wildcard_token,
+ "",
+ modifier_token,
+ )?;
+
+ // Step 3.4.7 Continue.
+ continue;
+ }
+
+ // Step 3.5 Let fixed token be char token.
+ let mut fixed_token = char_token;
+
+ // Step 3.6 If fixed token is null, then set fixed token to the result of running
+ // try to consume a token given parser and "escaped-char".
+ if fixed_token.is_none() {
+ fixed_token = parser.try_to_consume_a_token(TokenType::EscapedChar);
+ }
+
+ // Step 3.7 If fixed token is not null:
+ if let Some(fixed_token) = fixed_token {
+ // Step 3.7.1 Append fixed token’s value to parser’s pending fixed value.
+ parser.pending_fixed_value.push_str(fixed_token.value);
+
+ // Step 3.7.2 Continue.
+ continue;
+ }
+
+ // Step 3.8 Let open token be the result of running try to consume a token given parser and "open".
+ let open_token = parser.try_to_consume_a_token(TokenType::Open);
+
+ // Step 3.9 If open token is not null:
+ if open_token.is_some() {
+ // Step 3.9.1 Let prefix be the result of running consume text given parser.
+ let prefix = parser.consume_text();
+
+ // Step 3.9.2 Set name token to the result of running try to consume a token given parser and "name".
+ name_token = parser.try_to_consume_a_token(TokenType::Name);
+
+ // Step 3.9.3 Set regexp or wildcard token to the result of running try to consume a regexp or wildcard
+ // token given parser and name token.
+ regexp_or_wildcard_token = parser.try_to_consume_a_regexp_or_wildcard_token(name_token);
+
+ // Step 3.9.4 Let suffix be the result of running consume text given parser.
+ let suffix = parser.consume_text();
+
+ // Step 3.9.5 Run consume a required token given parser and "close".
+ parser.consume_a_required_token(TokenType::Close)?;
+
+ // Step 3.9.6 Let modifier token be the result of running try to consume a modifier token given parser.
+ let modifier_token = parser.try_to_consume_a_modifier_token();
+
+ // Step 3.9.7 Run add a part given parser, prefix, name token, regexp or wildcard token,
+ // suffix, and modifier token.
+ parser.add_a_part(
+ &prefix,
+ name_token,
+ regexp_or_wildcard_token,
+ &suffix,
+ modifier_token,
+ )?;
+
+ // Step 3.9.8 Continue.
+ continue;
+ }
+
+ // Step 3.10 Run maybe add a part from the pending fixed value given parser.
+ parser.maybe_add_a_part_from_the_pending_fixed_value()?;
+
+ // Step 3.11 Run consume a required token given parser and "end".
+ parser.consume_a_required_token(TokenType::End)?;
+ }
+
+ Ok(parser.part_list)
}
/// <https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list>
@@ -1037,6 +1155,24 @@ enum TokenType {
struct PatternParser<'a> {
/// <https://urlpattern.spec.whatwg.org/#pattern-parser-token-list>
token_list: Vec<Token<'a>>,
+
+ /// <https://urlpattern.spec.whatwg.org/#pattern-parser-encoding-callback>
+ encoding_callback: EncodingCallback,
+
+ /// <https://urlpattern.spec.whatwg.org/#pattern-parser-segment-wildcard-regexp>
+ segment_wildcard_regexp: String,
+
+ /// <https://urlpattern.spec.whatwg.org/#pattern-parser-part-list>
+ part_list: Vec<Part>,
+
+ /// <https://urlpattern.spec.whatwg.org/#pattern-parser-pending-fixed-value>
+ pending_fixed_value: String,
+
+ /// <https://urlpattern.spec.whatwg.org/#pattern-parser-index>
+ index: usize,
+
+ /// <https://urlpattern.spec.whatwg.org/#pattern-parser-next-numeric-name>
+ next_numeric_name: usize,
}
/// <https://urlpattern.spec.whatwg.org/#tokenize-policy>
@@ -1124,12 +1260,303 @@ enum PatternInitType {
Url,
}
-impl PatternParser<'_> {
+impl<'a> PatternParser<'a> {
fn new(segment_wildcard_regexp: String, encoding_callback: EncodingCallback) -> Self {
- // This function will look more useful when the parser is implemented
- _ = segment_wildcard_regexp;
- _ = encoding_callback;
- Self { token_list: vec![] }
+ Self {
+ token_list: vec![],
+ segment_wildcard_regexp,
+ part_list: vec![],
+ pending_fixed_value: String::new(),
+ index: 0,
+ next_numeric_name: 0,
+ encoding_callback,
+ }
+ }
+
+ /// <https://urlpattern.spec.whatwg.org/#try-to-consume-a-token>
+ fn try_to_consume_a_token(&mut self, token_type: TokenType) -> Option<Token<'a>> {
+ // Step 1. Assert: parser’s index is less than parser’s token list size.
+ debug_assert!(self.index < self.token_list.len());
+
+ // Step 2. Let next token be parser’s token list[parser’s index].
+ let next_token = self.token_list[self.index];
+
+ // Step 3. If next token’s type is not type return null.
+ if next_token.token_type != token_type {
+ return None;
+ }
+
+ // Step 4. Increment parser’s index by 1.
+ self.index += 1;
+
+ // Step 5. Return next token.
+ Some(next_token)
+ }
+
+ /// <https://urlpattern.spec.whatwg.org/#try-to-consume-a-modifier-token>
+ fn try_to_consume_a_modifier_token(&mut self) -> Option<Token<'a>> {
+ // Step 1. Let token be the result of running try to consume a token given parser and "other-modifier".
+ let token = self.try_to_consume_a_token(TokenType::OtherModifier);
+
+ // Step 2. If token is not null, then return token.
+ if token.is_some() {
+ return token;
+ }
+
+ // Step 3. Set token to the result of running try to consume a token given parser and "asterisk".
+ let token = self.try_to_consume_a_token(TokenType::Asterisk);
+
+ // Step 4. Return token.
+ token
+ }
+
+ /// <https://urlpattern.spec.whatwg.org/#consume-a-required-token>
+ fn consume_a_required_token(&mut self, token_type: TokenType) -> Fallible<Token<'a>> {
+ // Step 1. Let result be the result of running try to consume a token given parser and type.
+ let result = self.try_to_consume_a_token(token_type);
+
+ // Step 2. If result is null, then throw a TypeError.
+ let Some(result) = result else {
+ return Err(Error::Type(format!(
+ "Missing required token {token_type:?}"
+ )));
+ };
+
+ // Step 3. Return result.
+ Ok(result)
+ }
+
+ /// <https://urlpattern.spec.whatwg.org/#try-to-consume-a-regexp-or-wildcard-token>
+ fn try_to_consume_a_regexp_or_wildcard_token(
+ &mut self,
+ name_token: Option<Token<'a>>,
+ ) -> Option<Token<'a>> {
+ // Step 1. Let token be the result of running try to consume a token given parser and "regexp".
+ let mut token = self.try_to_consume_a_token(TokenType::Regexp);
+
+ // Step 2. If name token is null and token is null, then set token to the result of running
+ // try to consume a token given parser and "asterisk".
+ if name_token.is_none() && token.is_none() {
+ token = self.try_to_consume_a_token(TokenType::Asterisk);
+ }
+
+ // Step 3. Return token.
+ token
+ }
+
+ /// <https://urlpattern.spec.whatwg.org/#maybe-add-a-part-from-the-pending-fixed-value>
+ fn maybe_add_a_part_from_the_pending_fixed_value(&mut self) -> Fallible<()> {
+ // Step 1. If parser’s pending fixed value is the empty string, then return.
+ if self.pending_fixed_value.is_empty() {
+ return Ok(());
+ }
+
+ // Step 2. Let encoded value be the result of running parser’s encoding callback
+ // given parser’s pending fixed value.
+ let encoded_value = (self.encoding_callback)(&self.pending_fixed_value)?;
+
+ // Step 3. Set parser’s pending fixed value to the empty string.
+ self.pending_fixed_value.clear();
+
+ // Step 4. Let part be a new part whose type is "fixed-text", value is encoded value, and modifier is "none".
+ let part = Part::new(PartType::FixedText, encoded_value, PartModifier::None);
+
+ // Step 5. Append part to parser’s part list.
+ self.part_list.push(part);
+
+ Ok(())
+ }
+
+ /// <https://urlpattern.spec.whatwg.org/#add-a-part>
+ fn add_a_part(
+ &mut self,
+ prefix: &str,
+ name_token: Option<Token<'a>>,
+ regexp_or_wildcard_token: Option<Token<'a>>,
+ suffix: &str,
+ modifier_token: Option<Token<'a>>,
+ ) -> Fallible<()> {
+ // Step 1. Let modifier be "none".
+ let mut modifier = PartModifier::None;
+
+ // Step 2. If modifier token is not null:
+ if let Some(modifier_token) = modifier_token {
+ // Step 2.1 If modifier token’s value is "?" then set modifier to "optional".
+ if modifier_token.value == "?" {
+ modifier = PartModifier::Optional;
+ }
+ // Step 2.2 Otherwise if modifier token’s value is "*" then set modifier to "zero-or-more".
+ else if modifier_token.value == "*" {
+ modifier = PartModifier::ZeroOrMore;
+ }
+ // Step 2.3 Otherwise if modifier token’s value is "+" then set modifier to "one-or-more".
+ else if modifier_token.value == "+" {
+ modifier = PartModifier::OneOrMore;
+ }
+ }
+
+ // Step 3. If name token is null and regexp or wildcard token is null and modifier is "none":
+ if name_token.is_none() &&
+ regexp_or_wildcard_token.is_none() &&
+ modifier == PartModifier::None
+ {
+ // Step 3.1 Append prefix to the end of parser’s pending fixed value.
+ self.pending_fixed_value.push_str(prefix);
+
+ // Step 3.2 Return
+ return Ok(());
+ }
+
+ // Step 4. Run maybe add a part from the pending fixed value given parser.
+ self.maybe_add_a_part_from_the_pending_fixed_value()?;
+
+ // Step 5. If name token is null and regexp or wildcard token is null:
+ if name_token.is_none() && regexp_or_wildcard_token.is_none() {
+ // Step 5.1 Assert: suffix is the empty string.
+ debug_assert!(suffix.is_empty());
+
+ // Step 5.2 If prefix is the empty string, then return.
+ if prefix.is_empty() {
+ return Ok(());
+ }
+
+ // Step 5.3 Let encoded value be the result of running parser’s encoding callback given prefix.
+ let encoded_value = (self.encoding_callback)(prefix)?;
+
+ // Step 5.4 Let part be a new part whose type is "fixed-text",
+ // value is encoded value, and modifier is modifier.
+ let part = Part::new(PartType::FixedText, encoded_value, modifier);
+
+ // Step 5.5 Append part to parser’s part list.
+ self.part_list.push(part);
+
+ // Step 6. Return.
+ return Ok(());
+ }
+
+ // Step 6. Let regexp value be the empty string.
+ let mut regexp_value = {
+ // Step 7. If regexp or wildcard token is null, then set regexp value to parser’s segment wildcard regexp.
+ match regexp_or_wildcard_token {
+ None => self.segment_wildcard_regexp.clone(),
+ Some(token) => {
+ // Step 8. Otherwise if regexp or wildcard token’s type is "asterisk",
+ // then set regexp value to the full wildcard regexp value.
+ if token.token_type == TokenType::Asterisk {
+ FULL_WILDCARD_REGEXP_VALUE.into()
+ }
+ // Step 9. Otherwise set regexp value to regexp or wildcard token’s value.
+ else {
+ token.value.to_owned()
+ }
+ },
+ }
+ };
+
+ // Step 10. Let type be "regexp".
+ let mut part_type = PartType::Regexp;
+
+ // Step 11. If regexp value is parser’s segment wildcard regexp:
+ if regexp_value == self.segment_wildcard_regexp {
+ // Step 11.1 Set type to "segment-wildcard".
+ part_type = PartType::SegmentWildcard;
+
+ // Step 11.2 Set regexp value to the empty string.
+ regexp_value.clear();
+ }
+ // Step 12. Otherwise if regexp value is the full wildcard regexp value:
+ else if regexp_value == FULL_WILDCARD_REGEXP_VALUE {
+ // Step 12.1 Set type to "full-wildcard".
+ part_type = PartType::FullWildcard;
+
+ // Step 12.2 Set regexp value to the empty string.
+ regexp_value.clear();
+ }
+
+ // Step 13. Let name be the empty string.
+ let mut name = String::new();
+
+ // Step 14. If name token is not null, then set name to name token’s value.
+ if let Some(name_token) = name_token {
+ name = name_token.value.to_owned();
+ }
+ // Step 15. Otherwise if regexp or wildcard token is not null:
+ else if regexp_or_wildcard_token.is_some() {
+ // Step 15.1 Set name to parser’s next numeric name, serialized.
+ name = self.next_numeric_name.to_string();
+
+ // Step 15.2 Increment parser’s next numeric name by 1.
+ self.next_numeric_name = self.next_numeric_name.wrapping_add(1);
+ }
+
+ // Step 16. If the result of running is a duplicate name given parser and name is true, then throw a TypeError.
+ if self.is_a_duplicate_name(&name) {
+ return Err(Error::Type(format!("Duplicate part name: {name:?}")));
+ }
+
+ // Step 17. Let encoded prefix be the result of running parser’s encoding callback given prefix.
+ let encoded_prefix = (self.encoding_callback)(prefix)?;
+
+ // Step 18. Let encoded suffix be the result of running parser’s encoding callback given suffix.
+ let encoded_suffix = (self.encoding_callback)(suffix)?;
+
+ // Step 19. Let part be a new part whose type is type, value is regexp value, modifier is modifier,
+ // name is name, prefix is encoded prefix, and suffix is encoded suffix.
+ let part = Part {
+ part_type,
+ value: regexp_value,
+ modifier,
+ name,
+ prefix: encoded_prefix,
+ suffix: encoded_suffix,
+ };
+
+ // Step 20. Append part to parser’s part list.
+ self.part_list.push(part);
+
+ Ok(())
+ }
+
+ // <https://urlpattern.spec.whatwg.org/#is-a-duplicate-name>
+ fn is_a_duplicate_name(&self, name: &str) -> bool {
+ // Step 1. For each part of parser’s part list:
+ for part in &self.part_list {
+ // Step 1.1 If part’s name is name, then return true.
+ if part.name == name {
+ return true;
+ }
+ }
+
+ // Step 2. Return false.
+ false
+ }
+
+ /// <https://urlpattern.spec.whatwg.org/#consume-text>
+ fn consume_text(&mut self) -> String {
+ // Step 1. Let result be the empty string.
+ let mut result = String::new();
+
+ // Step 2. While true:
+ loop {
+ // Step 2.1 Let token be the result of running try to consume a token given parser and "char".
+ let mut token = self.try_to_consume_a_token(TokenType::Char);
+
+ // Step 2.2 If token is null, then set token to the result of running
+ // try to consume a token given parser and "escaped-char".
+ if token.is_none() {
+ token = self.try_to_consume_a_token(TokenType::EscapedChar);
+ }
+
+ // Step 2.3 If token is null, then break.
+ let Some(token) = token else {
+ break;
+ };
+
+ // Step 2.4 Append token’s value to the end of result.
+ result.push_str(token.value);
+ }
+
+ result
}
}
@@ -1594,6 +2021,19 @@ impl Tokenizer<'_> {
}
}
+impl Part {
+ fn new(part_type: PartType, value: String, modifier: PartModifier) -> Self {
+ Self {
+ part_type,
+ value,
+ modifier,
+ name: String::new(),
+ prefix: String::new(),
+ suffix: String::new(),
+ }
+ }
+}
+
/// <https://urlpattern.spec.whatwg.org/#process-a-base-url-string>
fn process_a_base_url_string(input: &str, init_type: PatternInitType) -> String {
// Step 1. Assert: input is not null.
@@ -1659,7 +2099,7 @@ fn escape_a_regexp_string(input: &str) -> String {
/// <https://urlpattern.spec.whatwg.org/#process-protocol-for-init>
fn process_a_protocol_for_init(input: &str, init_type: PatternInitType) -> Fallible<String> {
// Step 1. Let strippedValue be the given value with a single trailing U+003A (:) removed, if any.
- let stripped_value = input.strip_prefix(':').unwrap_or(input);
+ let stripped_value = input.strip_suffix(':').unwrap_or(input);
// Step 2. If type is "pattern" then return strippedValue.
if init_type == PatternInitType::Pattern {