diff options
author | Ville Lindholm <ville@lindholm.dev> | 2024-12-08 04:01:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-08 02:01:50 +0000 |
commit | bc7fe41a02b044b52a54c7347f347557a4bd0e8d (patch) | |
tree | 328133ab7eea5f406965c24a3fd2c6cc4858c55c /components/script/xpath/parser.rs | |
parent | 264c0f972fd1a732ee1d1490d78a1d26a65c6f5f (diff) | |
download | servo-bc7fe41a02b044b52a54c7347f347557a4bd0e8d.tar.gz servo-bc7fe41a02b044b52a54c7347f347557a4bd0e8d.zip |
Add XPath parser/evaluator (#34463)
* Add XPath parser/evaluator
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* Correctly annotate XPathEvaluator IDL
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review]: have bindings pass in `can_gc`
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review]: add docstrings
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review]: implement PartialEq for Value for readability
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review]: add docstrings for CoreFunctions
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review]: simplify node test code
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review]: add unit tests for string handling xpath functions
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* put xpath features behind dom.xpath.enabled pref
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review] remove rstest and insta dev-deps
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* update wpt test expectations
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* [PR review]: tweak metadata files
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
* update wpt test expectations AGAIN
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
---------
Signed-off-by: Ville Lindholm <ville@lindholm.dev>
Diffstat (limited to 'components/script/xpath/parser.rs')
-rw-r--r-- | components/script/xpath/parser.rs | 1209 |
1 files changed, 1209 insertions, 0 deletions
diff --git a/components/script/xpath/parser.rs b/components/script/xpath/parser.rs new file mode 100644 index 00000000000..3d439b8a4ca --- /dev/null +++ b/components/script/xpath/parser.rs @@ -0,0 +1,1209 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use nom::branch::alt; +use nom::bytes::complete::{tag, take_while1}; +use nom::character::complete::{alpha1, alphanumeric1, char, digit1, multispace0}; +use nom::combinator::{map, opt, recognize, value}; +use nom::error::{Error as NomError, ErrorKind as NomErrorKind}; +use nom::multi::{many0, separated_list0}; +use nom::sequence::{delimited, pair, preceded, tuple}; +use nom::{Finish, IResult}; + +pub fn parse(input: &str) -> Result<Expr, OwnedParserError> { + let (_, ast) = expr(input).finish().map_err(OwnedParserError::from)?; + Ok(ast) +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum Expr { + Or(Box<Expr>, Box<Expr>), + And(Box<Expr>, Box<Expr>), + Equality(Box<Expr>, EqualityOp, Box<Expr>), + Relational(Box<Expr>, RelationalOp, Box<Expr>), + Additive(Box<Expr>, AdditiveOp, Box<Expr>), + Multiplicative(Box<Expr>, MultiplicativeOp, Box<Expr>), + Unary(UnaryOp, Box<Expr>), + Union(Box<Expr>, Box<Expr>), + Path(PathExpr), +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum EqualityOp { + Eq, + NotEq, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum RelationalOp { + Lt, + Gt, + LtEq, + GtEq, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum AdditiveOp { + Add, + Sub, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum MultiplicativeOp { + Mul, + Div, + Mod, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum UnaryOp { + Minus, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub struct PathExpr { + pub is_absolute: bool, + pub is_descendant: bool, + pub steps: Vec<StepExpr>, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub struct PredicateListExpr { + pub predicates: Vec<PredicateExpr>, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub struct PredicateExpr { + pub expr: Expr, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub struct FilterExpr { + pub primary: PrimaryExpr, + pub predicates: PredicateListExpr, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum StepExpr { + Filter(FilterExpr), + Axis(AxisStep), +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub struct AxisStep { + pub axis: Axis, + pub node_test: NodeTest, + pub predicates: PredicateListExpr, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum Axis { + Child, + Descendant, + Attribute, + Self_, + DescendantOrSelf, + FollowingSibling, + Following, + Namespace, + Parent, + Ancestor, + PrecedingSibling, + Preceding, + AncestorOrSelf, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum NodeTest { + Name(QName), + Wildcard, + Kind(KindTest), +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub struct QName { + pub prefix: Option<String>, + pub local_part: String, +} + +impl std::fmt::Display for QName { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.prefix { + Some(prefix) => write!(f, "{}:{}", prefix, self.local_part), + None => write!(f, "{}", self.local_part), + } + } +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum KindTest { + PI(Option<String>), + Comment, + Text, + Node, +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum PrimaryExpr { + Literal(Literal), + Variable(QName), + Parenthesized(Box<Expr>), + ContextItem, + /// We only support the built-in core functions + Function(CoreFunction), +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum Literal { + Numeric(NumericLiteral), + String(String), +} + +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum NumericLiteral { + Integer(u64), + Decimal(f64), +} + +/// In the DOM we do not support custom functions, so we can enumerate the usable ones +#[derive(Clone, Debug, MallocSizeOf, PartialEq)] +pub enum CoreFunction { + // Node Set Functions + /// last() + Last, + /// position() + Position, + /// count(node-set) + Count(Box<Expr>), + /// id(object) + Id(Box<Expr>), + /// local-name(node-set?) + LocalName(Option<Box<Expr>>), + /// namespace-uri(node-set?) + NamespaceUri(Option<Box<Expr>>), + /// name(node-set?) + Name(Option<Box<Expr>>), + + // String Functions + /// string(object?) + String(Option<Box<Expr>>), + /// concat(string, string, ...) + Concat(Vec<Expr>), + /// starts-with(string, string) + StartsWith(Box<Expr>, Box<Expr>), + /// contains(string, string) + Contains(Box<Expr>, Box<Expr>), + /// substring-before(string, string) + SubstringBefore(Box<Expr>, Box<Expr>), + /// substring-after(string, string) + SubstringAfter(Box<Expr>, Box<Expr>), + /// substring(string, number, number?) + Substring(Box<Expr>, Box<Expr>, Option<Box<Expr>>), + /// string-length(string?) + StringLength(Option<Box<Expr>>), + /// normalize-space(string?) + NormalizeSpace(Option<Box<Expr>>), + /// translate(string, string, string) + Translate(Box<Expr>, Box<Expr>, Box<Expr>), + + // Number Functions + /// number(object?) + Number(Option<Box<Expr>>), + /// sum(node-set) + Sum(Box<Expr>), + /// floor(number) + Floor(Box<Expr>), + /// ceiling(number) + Ceiling(Box<Expr>), + /// round(number) + Round(Box<Expr>), + + // Boolean Functions + /// boolean(object) + Boolean(Box<Expr>), + /// not(boolean) + Not(Box<Expr>), + /// true() + True, + /// false() + False, + /// lang(string) + Lang(Box<Expr>), +} + +impl CoreFunction { + pub fn name(&self) -> &'static str { + match self { + CoreFunction::Last => "last", + CoreFunction::Position => "position", + CoreFunction::Count(_) => "count", + CoreFunction::Id(_) => "id", + CoreFunction::LocalName(_) => "local-name", + CoreFunction::NamespaceUri(_) => "namespace-uri", + CoreFunction::Name(_) => "name", + CoreFunction::String(_) => "string", + CoreFunction::Concat(_) => "concat", + CoreFunction::StartsWith(_, _) => "starts-with", + CoreFunction::Contains(_, _) => "contains", + CoreFunction::SubstringBefore(_, _) => "substring-before", + CoreFunction::SubstringAfter(_, _) => "substring-after", + CoreFunction::Substring(_, _, _) => "substring", + CoreFunction::StringLength(_) => "string-length", + CoreFunction::NormalizeSpace(_) => "normalize-space", + CoreFunction::Translate(_, _, _) => "translate", + CoreFunction::Number(_) => "number", + CoreFunction::Sum(_) => "sum", + CoreFunction::Floor(_) => "floor", + CoreFunction::Ceiling(_) => "ceiling", + CoreFunction::Round(_) => "round", + CoreFunction::Boolean(_) => "boolean", + CoreFunction::Not(_) => "not", + CoreFunction::True => "true", + CoreFunction::False => "false", + CoreFunction::Lang(_) => "lang", + } + } + + pub fn min_args(&self) -> usize { + match self { + // No args + CoreFunction::Last | + CoreFunction::Position | + CoreFunction::True | + CoreFunction::False => 0, + + // Optional single arg + CoreFunction::LocalName(_) | + CoreFunction::NamespaceUri(_) | + CoreFunction::Name(_) | + CoreFunction::String(_) | + CoreFunction::StringLength(_) | + CoreFunction::NormalizeSpace(_) | + CoreFunction::Number(_) => 0, + + // Required single arg + CoreFunction::Count(_) | + CoreFunction::Id(_) | + CoreFunction::Sum(_) | + CoreFunction::Floor(_) | + CoreFunction::Ceiling(_) | + CoreFunction::Round(_) | + CoreFunction::Boolean(_) | + CoreFunction::Not(_) | + CoreFunction::Lang(_) => 1, + + // Required two args + CoreFunction::StartsWith(_, _) | + CoreFunction::Contains(_, _) | + CoreFunction::SubstringBefore(_, _) | + CoreFunction::SubstringAfter(_, _) => 2, + + // Special cases + CoreFunction::Concat(_) => 2, // Minimum 2 args + CoreFunction::Substring(_, _, _) => 2, // 2 or 3 args + CoreFunction::Translate(_, _, _) => 3, // Exactly 3 args + } + } + + pub fn max_args(&self) -> Option<usize> { + match self { + // No args + CoreFunction::Last | + CoreFunction::Position | + CoreFunction::True | + CoreFunction::False => Some(0), + + // Optional single arg (0 or 1) + CoreFunction::LocalName(_) | + CoreFunction::NamespaceUri(_) | + CoreFunction::Name(_) | + CoreFunction::String(_) | + CoreFunction::StringLength(_) | + CoreFunction::NormalizeSpace(_) | + CoreFunction::Number(_) => Some(1), + + // Exactly one arg + CoreFunction::Count(_) | + CoreFunction::Id(_) | + CoreFunction::Sum(_) | + CoreFunction::Floor(_) | + CoreFunction::Ceiling(_) | + CoreFunction::Round(_) | + CoreFunction::Boolean(_) | + CoreFunction::Not(_) | + CoreFunction::Lang(_) => Some(1), + + // Exactly two args + CoreFunction::StartsWith(_, _) | + CoreFunction::Contains(_, _) | + CoreFunction::SubstringBefore(_, _) | + CoreFunction::SubstringAfter(_, _) => Some(2), + + // Special cases + CoreFunction::Concat(_) => None, // Unlimited args + CoreFunction::Substring(_, _, _) => Some(3), // 2 or 3 args + CoreFunction::Translate(_, _, _) => Some(3), // Exactly 3 args + } + } + + /// Returns true if the number of arguments is valid for this function + pub fn is_valid_arity(&self, num_args: usize) -> bool { + let min = self.min_args(); + let max = self.max_args(); + + num_args >= min && max.map_or(true, |max| num_args <= max) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct OwnedParserError { + input: String, + kind: NomErrorKind, +} + +impl<'a> From<NomError<&'a str>> for OwnedParserError { + fn from(err: NomError<&'a str>) -> Self { + OwnedParserError { + input: err.input.to_string(), + kind: err.code, + } + } +} + +impl std::fmt::Display for OwnedParserError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "error {:?} at: {}", self.kind, self.input) + } +} + +impl std::error::Error for OwnedParserError {} + +/// Top-level parser +fn expr(input: &str) -> IResult<&str, Expr> { + expr_single(input) +} + +fn expr_single(input: &str) -> IResult<&str, Expr> { + or_expr(input) +} + +fn or_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = and_expr(input)?; + let (input, rest) = many0(preceded(ws(tag("or")), and_expr))(input)?; + + Ok(( + input, + rest.into_iter() + .fold(first, |acc, expr| Expr::Or(Box::new(acc), Box::new(expr))), + )) +} + +fn and_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = equality_expr(input)?; + let (input, rest) = many0(preceded(ws(tag("and")), equality_expr))(input)?; + + Ok(( + input, + rest.into_iter() + .fold(first, |acc, expr| Expr::And(Box::new(acc), Box::new(expr))), + )) +} + +fn equality_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = relational_expr(input)?; + let (input, rest) = many0(tuple(( + ws(alt(( + map(tag("="), |_| EqualityOp::Eq), + map(tag("!="), |_| EqualityOp::NotEq), + ))), + relational_expr, + )))(input)?; + + Ok(( + input, + rest.into_iter().fold(first, |acc, (op, expr)| { + Expr::Equality(Box::new(acc), op, Box::new(expr)) + }), + )) +} + +fn relational_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = additive_expr(input)?; + let (input, rest) = many0(tuple(( + ws(alt(( + map(tag("<="), |_| RelationalOp::LtEq), + map(tag(">="), |_| RelationalOp::GtEq), + map(tag("<"), |_| RelationalOp::Lt), + map(tag(">"), |_| RelationalOp::Gt), + ))), + additive_expr, + )))(input)?; + + Ok(( + input, + rest.into_iter().fold(first, |acc, (op, expr)| { + Expr::Relational(Box::new(acc), op, Box::new(expr)) + }), + )) +} + +fn additive_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = multiplicative_expr(input)?; + let (input, rest) = many0(tuple(( + ws(alt(( + map(tag("+"), |_| AdditiveOp::Add), + map(tag("-"), |_| AdditiveOp::Sub), + ))), + multiplicative_expr, + )))(input)?; + + Ok(( + input, + rest.into_iter().fold(first, |acc, (op, expr)| { + Expr::Additive(Box::new(acc), op, Box::new(expr)) + }), + )) +} + +fn multiplicative_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = unary_expr(input)?; + let (input, rest) = many0(tuple(( + ws(alt(( + map(tag("*"), |_| MultiplicativeOp::Mul), + map(tag("div"), |_| MultiplicativeOp::Div), + map(tag("mod"), |_| MultiplicativeOp::Mod), + ))), + unary_expr, + )))(input)?; + + Ok(( + input, + rest.into_iter().fold(first, |acc, (op, expr)| { + Expr::Multiplicative(Box::new(acc), op, Box::new(expr)) + }), + )) +} + +fn unary_expr(input: &str) -> IResult<&str, Expr> { + let (input, minus_count) = many0(ws(char('-')))(input)?; + let (input, expr) = union_expr(input)?; + + Ok(( + input, + (0..minus_count.len()).fold(expr, |acc, _| Expr::Unary(UnaryOp::Minus, Box::new(acc))), + )) +} + +fn union_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = path_expr(input)?; + let (input, rest) = many0(preceded(ws(char('|')), path_expr))(input)?; + + Ok(( + input, + rest.into_iter().fold(first, |acc, expr| { + Expr::Union(Box::new(acc), Box::new(expr)) + }), + )) +} + +fn path_expr(input: &str) -> IResult<&str, Expr> { + alt(( + // "//" RelativePathExpr + map(pair(tag("//"), relative_path_expr), |(_, rel_path)| { + Expr::Path(PathExpr { + is_absolute: true, + is_descendant: true, + steps: match rel_path { + Expr::Path(p) => p.steps, + _ => unreachable!(), + }, + }) + }), + // "/" RelativePathExpr? + map(pair(char('/'), opt(relative_path_expr)), |(_, rel_path)| { + Expr::Path(PathExpr { + is_absolute: true, + is_descendant: false, + steps: rel_path + .map(|p| match p { + Expr::Path(p) => p.steps, + _ => unreachable!(), + }) + .unwrap_or_default(), + }) + }), + // RelativePathExpr + relative_path_expr, + ))(input) +} + +fn relative_path_expr(input: &str) -> IResult<&str, Expr> { + let (input, first) = step_expr(input)?; + let (input, steps) = many0(pair( + // ("/" | "//") + ws(alt((value(false, char('/')), value(true, tag("//"))))), + step_expr, + ))(input)?; + + let mut all_steps = vec![first]; + for (is_descendant, step) in steps { + if is_descendant { + // Insert an implicit descendant-or-self::node() step + all_steps.push(StepExpr::Axis(AxisStep { + axis: Axis::DescendantOrSelf, + node_test: NodeTest::Kind(KindTest::Node), + predicates: PredicateListExpr { predicates: vec![] }, + })); + } + all_steps.push(step); + } + + Ok(( + input, + Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: all_steps, + }), + )) +} + +fn step_expr(input: &str) -> IResult<&str, StepExpr> { + alt(( + map(filter_expr, StepExpr::Filter), + map(axis_step, StepExpr::Axis), + ))(input) +} + +fn axis_step(input: &str) -> IResult<&str, AxisStep> { + let (input, (step, predicates)) = + pair(alt((forward_step, reverse_step)), predicate_list)(input)?; + + let (axis, node_test) = step; + Ok(( + input, + AxisStep { + axis, + node_test, + predicates, + }, + )) +} + +fn forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { + alt(( + // ForwardAxis NodeTest + pair(forward_axis, node_test), + // AbbrevForwardStep + abbrev_forward_step, + ))(input) +} + +fn forward_axis(input: &str) -> IResult<&str, Axis> { + let (input, axis) = alt(( + value(Axis::Child, tag("child::")), + value(Axis::Descendant, tag("descendant::")), + value(Axis::Attribute, tag("attribute::")), + value(Axis::Self_, tag("self::")), + value(Axis::DescendantOrSelf, tag("descendant-or-self::")), + value(Axis::FollowingSibling, tag("following-sibling::")), + value(Axis::Following, tag("following::")), + value(Axis::Namespace, tag("namespace::")), + ))(input)?; + + Ok((input, axis)) +} + +fn abbrev_forward_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { + let (input, attr) = opt(char('@'))(input)?; + let (input, test) = node_test(input)?; + + Ok(( + input, + ( + if attr.is_some() { + Axis::Attribute + } else { + Axis::Child + }, + test, + ), + )) +} + +fn reverse_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { + alt(( + // ReverseAxis NodeTest + pair(reverse_axis, node_test), + // AbbrevReverseStep + abbrev_reverse_step, + ))(input) +} + +fn reverse_axis(input: &str) -> IResult<&str, Axis> { + alt(( + value(Axis::Parent, tag("parent::")), + value(Axis::Ancestor, tag("ancestor::")), + value(Axis::PrecedingSibling, tag("preceding-sibling::")), + value(Axis::Preceding, tag("preceding::")), + value(Axis::AncestorOrSelf, tag("ancestor-or-self::")), + ))(input) +} + +fn abbrev_reverse_step(input: &str) -> IResult<&str, (Axis, NodeTest)> { + map(tag(".."), |_| { + (Axis::Parent, NodeTest::Kind(KindTest::Node)) + })(input) +} + +fn node_test(input: &str) -> IResult<&str, NodeTest> { + alt(( + map(kind_test, NodeTest::Kind), + map(name_test, |name| match name { + NameTest::Wildcard => NodeTest::Wildcard, + NameTest::QName(qname) => NodeTest::Name(qname), + }), + ))(input) +} + +#[derive(Clone, Debug, PartialEq)] +enum NameTest { + QName(QName), + Wildcard, +} + +fn name_test(input: &str) -> IResult<&str, NameTest> { + alt(( + // NCName ":" "*" + map(tuple((ncname, char(':'), char('*'))), |(prefix, _, _)| { + NameTest::QName(QName { + prefix: Some(prefix.to_string()), + local_part: "*".to_string(), + }) + }), + // "*" + value(NameTest::Wildcard, char('*')), + // QName + map(qname, NameTest::QName), + ))(input) +} + +fn filter_expr(input: &str) -> IResult<&str, FilterExpr> { + let (input, primary) = primary_expr(input)?; + let (input, predicates) = predicate_list(input)?; + + Ok(( + input, + FilterExpr { + primary, + predicates, + }, + )) +} + +fn predicate_list(input: &str) -> IResult<&str, PredicateListExpr> { + let (input, predicates) = many0(predicate)(input)?; + Ok((input, PredicateListExpr { predicates })) +} + +fn predicate(input: &str) -> IResult<&str, PredicateExpr> { + let (input, expr) = delimited(ws(char('[')), expr, ws(char(']')))(input)?; + Ok((input, PredicateExpr { expr })) +} + +fn primary_expr(input: &str) -> IResult<&str, PrimaryExpr> { + alt(( + literal, + var_ref, + map(parenthesized_expr, |e| { + PrimaryExpr::Parenthesized(Box::new(e)) + }), + context_item_expr, + function_call, + ))(input) +} + +fn literal(input: &str) -> IResult<&str, PrimaryExpr> { + map(alt((numeric_literal, string_literal)), |lit| { + PrimaryExpr::Literal(lit) + })(input) +} + +fn numeric_literal(input: &str) -> IResult<&str, Literal> { + alt((decimal_literal, integer_literal))(input) +} + +fn var_ref(input: &str) -> IResult<&str, PrimaryExpr> { + let (input, _) = char('$')(input)?; + let (input, name) = qname(input)?; + Ok((input, PrimaryExpr::Variable(name))) +} + +fn parenthesized_expr(input: &str) -> IResult<&str, Expr> { + delimited(ws(char('(')), expr, ws(char(')')))(input) +} + +fn context_item_expr(input: &str) -> IResult<&str, PrimaryExpr> { + map(char('.'), |_| PrimaryExpr::ContextItem)(input) +} + +fn function_call(input: &str) -> IResult<&str, PrimaryExpr> { + let (input, name) = qname(input)?; + let (input, args) = delimited( + ws(char('(')), + separated_list0(ws(char(',')), expr_single), + ws(char(')')), + )(input)?; + + // Helper to create error + let arity_error = || nom::Err::Error(NomError::new(input, NomErrorKind::Verify)); + + let core_fn = match name.local_part.as_str() { + // Node Set Functions + "last" => CoreFunction::Last, + "position" => CoreFunction::Position, + "count" => CoreFunction::Count(Box::new(args.into_iter().next().ok_or_else(arity_error)?)), + "id" => CoreFunction::Id(Box::new(args.into_iter().next().ok_or_else(arity_error)?)), + "local-name" => CoreFunction::LocalName(args.into_iter().next().map(Box::new)), + "namespace-uri" => CoreFunction::NamespaceUri(args.into_iter().next().map(Box::new)), + "name" => CoreFunction::Name(args.into_iter().next().map(Box::new)), + + // String Functions + "string" => CoreFunction::String(args.into_iter().next().map(Box::new)), + "concat" => CoreFunction::Concat(args.into_iter().collect()), + "starts-with" => { + let mut args = args.into_iter(); + CoreFunction::StartsWith( + Box::new(args.next().ok_or_else(arity_error)?), + Box::new(args.next().ok_or_else(arity_error)?), + ) + }, + "contains" => { + let mut args = args.into_iter(); + CoreFunction::Contains( + Box::new(args.next().ok_or_else(arity_error)?), + Box::new(args.next().ok_or_else(arity_error)?), + ) + }, + "substring-before" => { + let mut args = args.into_iter(); + CoreFunction::SubstringBefore( + Box::new(args.next().ok_or_else(arity_error)?), + Box::new(args.next().ok_or_else(arity_error)?), + ) + }, + "substring-after" => { + let mut args = args.into_iter(); + CoreFunction::SubstringAfter( + Box::new(args.next().ok_or_else(arity_error)?), + Box::new(args.next().ok_or_else(arity_error)?), + ) + }, + "substring" => { + let mut args = args.into_iter(); + CoreFunction::Substring( + Box::new(args.next().ok_or_else(arity_error)?), + Box::new(args.next().ok_or_else(arity_error)?), + args.next().map(Box::new), + ) + }, + "string-length" => CoreFunction::StringLength(args.into_iter().next().map(Box::new)), + "normalize-space" => CoreFunction::NormalizeSpace(args.into_iter().next().map(Box::new)), + "translate" => { + let mut args = args.into_iter(); + CoreFunction::Translate( + Box::new(args.next().ok_or_else(arity_error)?), + Box::new(args.next().ok_or_else(arity_error)?), + Box::new(args.next().ok_or_else(arity_error)?), + ) + }, + + // Number Functions + "number" => CoreFunction::Number(args.into_iter().next().map(Box::new)), + "sum" => CoreFunction::Sum(Box::new(args.into_iter().next().ok_or_else(arity_error)?)), + "floor" => CoreFunction::Floor(Box::new(args.into_iter().next().ok_or_else(arity_error)?)), + "ceiling" => { + CoreFunction::Ceiling(Box::new(args.into_iter().next().ok_or_else(arity_error)?)) + }, + "round" => CoreFunction::Round(Box::new(args.into_iter().next().ok_or_else(arity_error)?)), + + // Boolean Functions + "boolean" => { + CoreFunction::Boolean(Box::new(args.into_iter().next().ok_or_else(arity_error)?)) + }, + "not" => CoreFunction::Not(Box::new(args.into_iter().next().ok_or_else(arity_error)?)), + "true" => CoreFunction::True, + "false" => CoreFunction::False, + "lang" => CoreFunction::Lang(Box::new(args.into_iter().next().ok_or_else(arity_error)?)), + + // Unknown function + _ => return Err(nom::Err::Error(NomError::new(input, NomErrorKind::Verify))), + }; + + Ok((input, PrimaryExpr::Function(core_fn))) +} + +fn kind_test(input: &str) -> IResult<&str, KindTest> { + alt((pi_test, comment_test, text_test, any_kind_test))(input) +} + +fn any_kind_test(input: &str) -> IResult<&str, KindTest> { + map(tuple((tag("node"), ws(char('(')), ws(char(')')))), |_| { + KindTest::Node + })(input) +} + +fn text_test(input: &str) -> IResult<&str, KindTest> { + map(tuple((tag("text"), ws(char('(')), ws(char(')')))), |_| { + KindTest::Text + })(input) +} + +fn comment_test(input: &str) -> IResult<&str, KindTest> { + map( + tuple((tag("comment"), ws(char('(')), ws(char(')')))), + |_| KindTest::Comment, + )(input) +} + +fn pi_test(input: &str) -> IResult<&str, KindTest> { + map( + tuple(( + tag("processing-instruction"), + ws(char('(')), + opt(ws(string_literal)), + ws(char(')')), + )), + |(_, _, literal, _)| { + KindTest::PI(literal.map(|l| match l { + Literal::String(s) => s, + _ => unreachable!(), + })) + }, + )(input) +} + +fn ws<'a, F, O>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O> +where + F: FnMut(&'a str) -> IResult<&'a str, O>, +{ + delimited(multispace0, inner, multispace0) +} + +fn integer_literal(input: &str) -> IResult<&str, Literal> { + map(recognize(tuple((opt(char('-')), digit1))), |s: &str| { + Literal::Numeric(NumericLiteral::Integer(s.parse().unwrap())) + })(input) +} + +fn decimal_literal(input: &str) -> IResult<&str, Literal> { + map( + recognize(tuple((opt(char('-')), opt(digit1), char('.'), digit1))), + |s: &str| Literal::Numeric(NumericLiteral::Decimal(s.parse().unwrap())), + )(input) +} + +fn string_literal(input: &str) -> IResult<&str, Literal> { + alt(( + delimited( + char('"'), + map(take_while1(|c| c != '"'), |s: &str| { + Literal::String(s.to_string()) + }), + char('"'), + ), + delimited( + char('\''), + map(take_while1(|c| c != '\''), |s: &str| { + Literal::String(s.to_string()) + }), + char('\''), + ), + ))(input) +} + +// QName parser +fn qname(input: &str) -> IResult<&str, QName> { + let (input, prefix) = opt(tuple((ncname, char(':'))))(input)?; + let (input, local) = ncname(input)?; + + Ok(( + input, + QName { + prefix: prefix.map(|(p, _)| p.to_string()), + local_part: local.to_string(), + }, + )) +} + +// NCName parser +fn ncname(input: &str) -> IResult<&str, &str> { + recognize(pair( + alpha1, + many0(alt((alphanumeric1, tag("-"), tag("_")))), + ))(input) +} + +// Test functions to verify the parsers: +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_node_tests() { + let cases = vec![ + ("node()", NodeTest::Kind(KindTest::Node)), + ("text()", NodeTest::Kind(KindTest::Text)), + ("comment()", NodeTest::Kind(KindTest::Comment)), + ( + "processing-instruction()", + NodeTest::Kind(KindTest::PI(None)), + ), + ( + "processing-instruction('test')", + NodeTest::Kind(KindTest::PI(Some("test".to_string()))), + ), + ("*", NodeTest::Wildcard), + ( + "prefix:*", + NodeTest::Name(QName { + prefix: Some("prefix".to_string()), + local_part: "*".to_string(), + }), + ), + ( + "div", + NodeTest::Name(QName { + prefix: None, + local_part: "div".to_string(), + }), + ), + ( + "ns:div", + NodeTest::Name(QName { + prefix: Some("ns".to_string()), + local_part: "div".to_string(), + }), + ), + ]; + + for (input, expected) in cases { + match node_test(input) { + Ok((remaining, result)) => { + assert!(remaining.is_empty(), "Parser didn't consume all input"); + assert_eq!(result, expected); + }, + Err(e) => panic!("Failed to parse '{}': {:?}", input, e), + } + } + } + + #[test] + fn test_filter_expr() { + let cases = vec![ + ( + "processing-instruction('test')[2]", + Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Axis(AxisStep { + axis: Axis::Child, + node_test: NodeTest::Kind(KindTest::PI(Some("test".to_string()))), + predicates: PredicateListExpr { + predicates: vec![PredicateExpr { + expr: Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Literal(Literal::Numeric( + NumericLiteral::Integer(2), + )), + predicates: PredicateListExpr { predicates: vec![] }, + })], + }), + }], + }, + })], + }), + ), + ( + "concat('hello', ' ', 'world')", + Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Function(CoreFunction::Concat(vec![ + Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Literal(Literal::String( + "hello".to_string(), + )), + predicates: PredicateListExpr { predicates: vec![] }, + })], + }), + Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Literal(Literal::String(" ".to_string())), + predicates: PredicateListExpr { predicates: vec![] }, + })], + }), + Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Literal(Literal::String( + "world".to_string(), + )), + predicates: PredicateListExpr { predicates: vec![] }, + })], + }), + ])), + predicates: PredicateListExpr { predicates: vec![] }, + })], + }), + ), + ]; + + for (input, expected) in cases { + match parse(input) { + Ok(result) => { + assert_eq!(result, expected); + }, + Err(e) => panic!("Failed to parse '{}': {:?}", input, e), + } + } + } + + #[test] + fn test_complex_paths() { + let cases = vec![ + ( + "//*[contains(@class, 'test')]", + Expr::Path(PathExpr { + is_absolute: true, + is_descendant: true, + steps: vec![StepExpr::Axis(AxisStep { + axis: Axis::Child, + node_test: NodeTest::Wildcard, + predicates: PredicateListExpr { + predicates: vec![PredicateExpr { + expr: Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Function(CoreFunction::Contains( + Box::new(Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Axis(AxisStep { + axis: Axis::Attribute, + node_test: NodeTest::Name(QName { + prefix: None, + local_part: "class".to_string(), + }), + predicates: PredicateListExpr { + predicates: vec![], + }, + })], + })), + Box::new(Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Literal(Literal::String( + "test".to_string(), + )), + predicates: PredicateListExpr { + predicates: vec![], + }, + })], + })), + )), + predicates: PredicateListExpr { predicates: vec![] }, + })], + }), + }], + }, + })], + }), + ), + ( + "//div[position() > 1]/*[last()]", + Expr::Path(PathExpr { + is_absolute: true, + is_descendant: true, + steps: vec![ + StepExpr::Axis(AxisStep { + axis: Axis::Child, + node_test: NodeTest::Name(QName { + prefix: None, + local_part: "div".to_string(), + }), + predicates: PredicateListExpr { + predicates: vec![PredicateExpr { + expr: Expr::Relational( + Box::new(Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Function( + CoreFunction::Position, + ), + predicates: PredicateListExpr { + predicates: vec![], + }, + })], + })), + RelationalOp::Gt, + Box::new(Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Literal(Literal::Numeric( + NumericLiteral::Integer(1), + )), + predicates: PredicateListExpr { + predicates: vec![], + }, + })], + })), + ), + }], + }, + }), + StepExpr::Axis(AxisStep { + axis: Axis::Child, + node_test: NodeTest::Wildcard, + predicates: PredicateListExpr { + predicates: vec![PredicateExpr { + expr: Expr::Path(PathExpr { + is_absolute: false, + is_descendant: false, + steps: vec![StepExpr::Filter(FilterExpr { + primary: PrimaryExpr::Function(CoreFunction::Last), + predicates: PredicateListExpr { predicates: vec![] }, + })], + }), + }], + }, + }), + ], + }), + ), + ]; + + for (input, expected) in cases { + match parse(input) { + Ok(result) => { + assert_eq!(result, expected); + }, + Err(e) => panic!("Failed to parse '{}': {:?}", input, e), + } + } + } +} |