aboutsummaryrefslogtreecommitdiffstats
path: root/components/script/xpath/eval.rs
diff options
context:
space:
mode:
authorVille Lindholm <ville@lindholm.dev>2024-12-08 04:01:50 +0200
committerGitHub <noreply@github.com>2024-12-08 02:01:50 +0000
commitbc7fe41a02b044b52a54c7347f347557a4bd0e8d (patch)
tree328133ab7eea5f406965c24a3fd2c6cc4858c55c /components/script/xpath/eval.rs
parent264c0f972fd1a732ee1d1490d78a1d26a65c6f5f (diff)
downloadservo-bc7fe41a02b044b52a54c7347f347557a4bd0e8d.tar.gz
servo-bc7fe41a02b044b52a54c7347f347557a4bd0e8d.zip
Add XPath parser/evaluator (#34463)
* Add XPath parser/evaluator Signed-off-by: Ville Lindholm <ville@lindholm.dev> * Correctly annotate XPathEvaluator IDL Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review]: have bindings pass in `can_gc` Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review]: add docstrings Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review]: implement PartialEq for Value for readability Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review]: add docstrings for CoreFunctions Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review]: simplify node test code Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review]: add unit tests for string handling xpath functions Signed-off-by: Ville Lindholm <ville@lindholm.dev> * put xpath features behind dom.xpath.enabled pref Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review] remove rstest and insta dev-deps Signed-off-by: Ville Lindholm <ville@lindholm.dev> * update wpt test expectations Signed-off-by: Ville Lindholm <ville@lindholm.dev> * [PR review]: tweak metadata files Signed-off-by: Ville Lindholm <ville@lindholm.dev> * update wpt test expectations AGAIN Signed-off-by: Ville Lindholm <ville@lindholm.dev> --------- Signed-off-by: Ville Lindholm <ville@lindholm.dev>
Diffstat (limited to 'components/script/xpath/eval.rs')
-rw-r--r--components/script/xpath/eval.rs589
1 files changed, 589 insertions, 0 deletions
diff --git a/components/script/xpath/eval.rs b/components/script/xpath/eval.rs
new file mode 100644
index 00000000000..d880e2f3930
--- /dev/null
+++ b/components/script/xpath/eval.rs
@@ -0,0 +1,589 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use std::fmt;
+
+use html5ever::{local_name, namespace_prefix, namespace_url, ns, QualName};
+
+use super::parser::{
+ AdditiveOp, Axis, EqualityOp, Expr, FilterExpr, KindTest, Literal, MultiplicativeOp, NodeTest,
+ NumericLiteral, PathExpr, PredicateExpr, PredicateListExpr, PrimaryExpr,
+ QName as ParserQualName, RelationalOp, StepExpr, UnaryOp,
+};
+use super::{EvaluationCtx, Value};
+use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
+use crate::dom::bindings::inheritance::{Castable, CharacterDataTypeId, NodeTypeId};
+use crate::dom::bindings::root::DomRoot;
+use crate::dom::bindings::xmlname::validate_and_extract;
+use crate::dom::element::Element;
+use crate::dom::node::{Node, ShadowIncluding};
+use crate::dom::processinginstruction::ProcessingInstruction;
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum Error {
+ NotANodeset,
+ InvalidPath,
+ UnknownFunction { name: QualName },
+ UnknownVariable { name: QualName },
+ UnknownNamespace { prefix: String },
+ InvalidQName { qname: ParserQualName },
+ FunctionEvaluation { fname: String },
+ Internal { msg: String },
+}
+
+impl std::fmt::Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Error::NotANodeset => write!(f, "expression did not evaluate to a nodeset"),
+ Error::InvalidPath => write!(f, "invalid path expression"),
+ Error::UnknownFunction { name } => write!(f, "unknown function {:?}", name),
+ Error::UnknownVariable { name } => write!(f, "unknown variable {:?}", name),
+ Error::UnknownNamespace { prefix } => {
+ write!(f, "unknown namespace prefix {:?}", prefix)
+ },
+ Error::InvalidQName { qname } => {
+ write!(f, "invalid QName {:?}", qname)
+ },
+ Error::FunctionEvaluation { fname } => {
+ write!(f, "error while evaluating function: {}", fname)
+ },
+ Error::Internal { msg } => {
+ write!(f, "internal error: {}", msg)
+ },
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+pub fn try_extract_nodeset(v: Value) -> Result<Vec<DomRoot<Node>>, Error> {
+ match v {
+ Value::Nodeset(ns) => Ok(ns),
+ _ => Err(Error::NotANodeset),
+ }
+}
+
+pub trait Evaluatable: fmt::Debug {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error>;
+ /// Returns true if this expression evaluates to a primitive value, without needing to touch the DOM
+ fn is_primitive(&self) -> bool;
+}
+
+impl<T: ?Sized> Evaluatable for Box<T>
+where
+ T: Evaluatable,
+{
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ (**self).evaluate(context)
+ }
+
+ fn is_primitive(&self) -> bool {
+ (**self).is_primitive()
+ }
+}
+
+impl Evaluatable for Expr {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ match self {
+ Expr::And(left, right) => {
+ let left_bool = left.evaluate(context)?.boolean();
+ let v = left_bool && right.evaluate(context)?.boolean();
+ Ok(Value::Boolean(v))
+ },
+ Expr::Or(left, right) => {
+ let left_bool = left.evaluate(context)?.boolean();
+ let v = left_bool || right.evaluate(context)?.boolean();
+ Ok(Value::Boolean(v))
+ },
+ Expr::Equality(left, equality_op, right) => {
+ let left_val = left.evaluate(context)?;
+ let right_val = right.evaluate(context)?;
+
+ let v = match equality_op {
+ EqualityOp::Eq => left_val == right_val,
+ EqualityOp::NotEq => left_val != right_val,
+ };
+
+ Ok(Value::Boolean(v))
+ },
+ Expr::Relational(left, relational_op, right) => {
+ let left_val = left.evaluate(context)?.number();
+ let right_val = right.evaluate(context)?.number();
+
+ let v = match relational_op {
+ RelationalOp::Lt => left_val < right_val,
+ RelationalOp::Gt => left_val > right_val,
+ RelationalOp::LtEq => left_val <= right_val,
+ RelationalOp::GtEq => left_val >= right_val,
+ };
+ Ok(Value::Boolean(v))
+ },
+ Expr::Additive(left, additive_op, right) => {
+ let left_val = left.evaluate(context)?.number();
+ let right_val = right.evaluate(context)?.number();
+
+ let v = match additive_op {
+ AdditiveOp::Add => left_val + right_val,
+ AdditiveOp::Sub => left_val - right_val,
+ };
+ Ok(Value::Number(v))
+ },
+ Expr::Multiplicative(left, multiplicative_op, right) => {
+ let left_val = left.evaluate(context)?.number();
+ let right_val = right.evaluate(context)?.number();
+
+ let v = match multiplicative_op {
+ MultiplicativeOp::Mul => left_val * right_val,
+ MultiplicativeOp::Div => left_val / right_val,
+ MultiplicativeOp::Mod => left_val % right_val,
+ };
+ Ok(Value::Number(v))
+ },
+ Expr::Unary(unary_op, expr) => {
+ let v = expr.evaluate(context)?.number();
+
+ match unary_op {
+ UnaryOp::Minus => Ok(Value::Number(-v)),
+ }
+ },
+ Expr::Union(left, right) => {
+ let as_nodes = |e: &Expr| e.evaluate(context).and_then(try_extract_nodeset);
+
+ let mut left_nodes = as_nodes(left)?;
+ let right_nodes = as_nodes(right)?;
+
+ left_nodes.extend(right_nodes);
+ Ok(Value::Nodeset(left_nodes))
+ },
+ Expr::Path(path_expr) => path_expr.evaluate(context),
+ }
+ }
+
+ fn is_primitive(&self) -> bool {
+ match self {
+ Expr::Or(left, right) => left.is_primitive() && right.is_primitive(),
+ Expr::And(left, right) => left.is_primitive() && right.is_primitive(),
+ Expr::Equality(left, _, right) => left.is_primitive() && right.is_primitive(),
+ Expr::Relational(left, _, right) => left.is_primitive() && right.is_primitive(),
+ Expr::Additive(left, _, right) => left.is_primitive() && right.is_primitive(),
+ Expr::Multiplicative(left, _, right) => left.is_primitive() && right.is_primitive(),
+ Expr::Unary(_, expr) => expr.is_primitive(),
+ Expr::Union(_, _) => false,
+ Expr::Path(path_expr) => path_expr.is_primitive(),
+ }
+ }
+}
+
+impl Evaluatable for PathExpr {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ let mut current_nodes = vec![context.context_node.clone()];
+
+ // If path starts with '//', add an implicit descendant-or-self::node() step
+ if self.is_descendant {
+ current_nodes = current_nodes
+ .iter()
+ .flat_map(|n| n.traverse_preorder(ShadowIncluding::No))
+ .collect();
+ }
+
+ trace!("[PathExpr] Evaluating path expr: {:?}", self);
+
+ let have_multiple_steps = self.steps.len() > 1;
+
+ for step in &self.steps {
+ let mut next_nodes = Vec::new();
+ for node in current_nodes {
+ let step_context = context.subcontext_for_node(&node);
+ let step_result = step.evaluate(&step_context)?;
+ match (have_multiple_steps, step_result) {
+ (_, Value::Nodeset(mut nodes)) => {
+ // as long as we evaluate to nodesets, keep going
+ next_nodes.append(&mut nodes);
+ },
+ (false, value) => {
+ trace!("[PathExpr] Got single primitive value: {:?}", value);
+ return Ok(value);
+ },
+ (true, value) => {
+ error!(
+ "Expected nodeset from step evaluation, got: {:?} node: {:?}, step: {:?}",
+ value, node, step
+ );
+ return Ok(value);
+ },
+ }
+ }
+ current_nodes = next_nodes;
+ }
+
+ trace!("[PathExpr] Got nodes: {:?}", current_nodes);
+
+ Ok(Value::Nodeset(current_nodes))
+ }
+
+ fn is_primitive(&self) -> bool {
+ !self.is_absolute &&
+ !self.is_descendant &&
+ self.steps.len() == 1 &&
+ self.steps[0].is_primitive()
+ }
+}
+
+impl TryFrom<&ParserQualName> for QualName {
+ type Error = Error;
+
+ fn try_from(qname: &ParserQualName) -> Result<Self, Self::Error> {
+ let qname_as_str = qname.to_string();
+ if let Ok((ns, prefix, local)) = validate_and_extract(None, &qname_as_str) {
+ Ok(QualName { prefix, ns, local })
+ } else {
+ Err(Error::InvalidQName {
+ qname: qname.clone(),
+ })
+ }
+ }
+}
+
+pub enum NameTestComparisonMode {
+ /// Namespaces must match exactly
+ XHtml,
+ /// Missing namespace information is treated as the HTML namespace
+ Html,
+}
+
+pub fn element_name_test(
+ expected_name: QualName,
+ element_qualname: QualName,
+ comparison_mode: NameTestComparisonMode,
+) -> bool {
+ let is_wildcard = expected_name.local == local_name!("*");
+
+ let test_prefix = expected_name
+ .prefix
+ .clone()
+ .unwrap_or(namespace_prefix!(""));
+ let test_ns_uri = match test_prefix {
+ namespace_prefix!("*") => ns!(*),
+ namespace_prefix!("html") => ns!(html),
+ namespace_prefix!("xml") => ns!(xml),
+ namespace_prefix!("xlink") => ns!(xlink),
+ namespace_prefix!("svg") => ns!(svg),
+ namespace_prefix!("mathml") => ns!(mathml),
+ namespace_prefix!("") => {
+ if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
+ ns!()
+ } else {
+ ns!(html)
+ }
+ },
+ _ => {
+ // We don't support custom namespaces, use fallback or panic depending on strictness
+ if matches!(comparison_mode, NameTestComparisonMode::XHtml) {
+ panic!("Unrecognized namespace prefix: {}", test_prefix)
+ } else {
+ ns!(html)
+ }
+ },
+ };
+
+ if is_wildcard {
+ test_ns_uri == element_qualname.ns
+ } else {
+ test_ns_uri == element_qualname.ns && expected_name.local == element_qualname.local
+ }
+}
+
+fn apply_node_test(test: &NodeTest, node: &Node) -> Result<bool, Error> {
+ let result = match test {
+ NodeTest::Name(qname) => {
+ // Convert the unvalidated "parser QualName" into the proper QualName structure
+ let wanted_name: QualName = qname.try_into()?;
+ if matches!(node.type_id(), NodeTypeId::Element(_)) {
+ let element = node.downcast::<Element>().unwrap();
+ let comparison_mode = if node.owner_doc().is_xhtml_document() {
+ NameTestComparisonMode::XHtml
+ } else {
+ NameTestComparisonMode::Html
+ };
+ let element_qualname = QualName::new(
+ element.prefix().as_ref().cloned(),
+ element.namespace().clone(),
+ element.local_name().clone(),
+ );
+ element_name_test(wanted_name, element_qualname, comparison_mode)
+ } else {
+ false
+ }
+ },
+ NodeTest::Wildcard => true,
+ NodeTest::Kind(kind) => match kind {
+ KindTest::PI(target) => {
+ if NodeTypeId::CharacterData(CharacterDataTypeId::ProcessingInstruction) ==
+ node.type_id()
+ {
+ let pi = node.downcast::<ProcessingInstruction>().unwrap();
+ match (target, pi.target()) {
+ (Some(target_name), node_target_name)
+ if target_name == &node_target_name.to_string() =>
+ {
+ true
+ },
+ (Some(_), _) => false,
+ (None, _) => true,
+ }
+ } else {
+ false
+ }
+ },
+ KindTest::Comment => matches!(
+ node.type_id(),
+ NodeTypeId::CharacterData(CharacterDataTypeId::Comment)
+ ),
+ KindTest::Text => matches!(
+ node.type_id(),
+ NodeTypeId::CharacterData(CharacterDataTypeId::Text(_))
+ ),
+ KindTest::Node => true,
+ },
+ };
+ Ok(result)
+}
+
+impl Evaluatable for StepExpr {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ match self {
+ StepExpr::Filter(filter_expr) => filter_expr.evaluate(context),
+ StepExpr::Axis(axis_step) => {
+ let nodes: Vec<DomRoot<Node>> = match axis_step.axis {
+ Axis::Child => context.context_node.children().collect(),
+ Axis::Descendant => context
+ .context_node
+ .traverse_preorder(ShadowIncluding::No)
+ .skip(1)
+ .collect(),
+ Axis::Parent => vec![context.context_node.GetParentNode()]
+ .into_iter()
+ .flatten()
+ .collect(),
+ Axis::Ancestor => context.context_node.ancestors().collect(),
+ Axis::Following => context
+ .context_node
+ .following_nodes(&context.context_node)
+ .skip(1)
+ .collect(),
+ Axis::Preceding => context
+ .context_node
+ .preceding_nodes(&context.context_node)
+ .skip(1)
+ .collect(),
+ Axis::FollowingSibling => context.context_node.following_siblings().collect(),
+ Axis::PrecedingSibling => context.context_node.preceding_siblings().collect(),
+ Axis::Attribute => {
+ if matches!(Node::type_id(&context.context_node), NodeTypeId::Element(_)) {
+ let element = context.context_node.downcast::<Element>().unwrap();
+ element
+ .attrs()
+ .iter()
+ .map(|attr| attr.upcast::<Node>())
+ .map(DomRoot::from_ref)
+ .collect()
+ } else {
+ vec![]
+ }
+ },
+ Axis::Self_ => vec![context.context_node.clone()],
+ Axis::DescendantOrSelf => context
+ .context_node
+ .traverse_preorder(ShadowIncluding::No)
+ .collect(),
+ Axis::AncestorOrSelf => context
+ .context_node
+ .inclusive_ancestors(ShadowIncluding::No)
+ .collect(),
+ Axis::Namespace => Vec::new(), // Namespace axis is not commonly implemented
+ };
+
+ trace!("[StepExpr] Axis {:?} got nodes {:?}", axis_step.axis, nodes);
+
+ // Filter nodes according to the step's node_test. Will error out if any NodeTest
+ // application errors out.
+ let filtered_nodes: Vec<DomRoot<Node>> = nodes
+ .into_iter()
+ .map(|node| {
+ apply_node_test(&axis_step.node_test, &node)
+ .map(|matches| matches.then_some(node))
+ })
+ .collect::<Result<Vec<_>, _>>()?
+ .into_iter()
+ .flatten()
+ .collect();
+
+ trace!("[StepExpr] Filtering got nodes {:?}", filtered_nodes);
+
+ if axis_step.predicates.predicates.is_empty() {
+ trace!(
+ "[StepExpr] No predicates, returning nodes {:?}",
+ filtered_nodes
+ );
+ Ok(Value::Nodeset(filtered_nodes))
+ } else {
+ // Apply predicates
+ let predicate_list_subcontext = context
+ .update_predicate_nodes(filtered_nodes.iter().map(|n| &**n).collect());
+ axis_step.predicates.evaluate(&predicate_list_subcontext)
+ }
+ },
+ }
+ }
+
+ fn is_primitive(&self) -> bool {
+ match self {
+ StepExpr::Filter(filter_expr) => filter_expr.is_primitive(),
+ StepExpr::Axis(_) => false,
+ }
+ }
+}
+
+impl Evaluatable for PredicateListExpr {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ if let Some(ref predicate_nodes) = context.predicate_nodes {
+ // Initializing: every node the predicates act on is matched
+ let mut matched_nodes: Vec<DomRoot<Node>> = predicate_nodes.clone();
+
+ // apply each predicate to the nodes matched by the previous predicate
+ for predicate_expr in &self.predicates {
+ let context_for_predicate =
+ context.update_predicate_nodes(matched_nodes.iter().map(|n| &**n).collect());
+
+ let narrowed_nodes = predicate_expr
+ .evaluate(&context_for_predicate)
+ .and_then(try_extract_nodeset)?;
+ matched_nodes = narrowed_nodes;
+ trace!(
+ "[PredicateListExpr] Predicate {:?} matched nodes {:?}",
+ predicate_expr,
+ matched_nodes
+ );
+ }
+ Ok(Value::Nodeset(matched_nodes))
+ } else {
+ Err(Error::Internal {
+ msg: "[PredicateListExpr] No nodes on stack for predicate to operate on"
+ .to_string(),
+ })
+ }
+ }
+
+ fn is_primitive(&self) -> bool {
+ self.predicates.len() == 1 && self.predicates[0].is_primitive()
+ }
+}
+
+impl Evaluatable for PredicateExpr {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ let narrowed_nodes: Result<Vec<DomRoot<Node>>, Error> = context
+ .subcontext_iter_for_nodes()
+ .filter_map(|ctx| {
+ if let Some(predicate_ctx) = ctx.predicate_ctx {
+ let eval_result = self.expr.evaluate(&ctx);
+
+ let v = match eval_result {
+ Ok(Value::Number(v)) => Ok(predicate_ctx.index == v as usize),
+ Ok(v) => Ok(v.boolean()),
+ Err(e) => Err(e),
+ };
+
+ match v {
+ Ok(true) => Some(Ok(ctx.context_node)),
+ Ok(false) => None,
+ Err(e) => Some(Err(e)),
+ }
+ } else {
+ Some(Err(Error::Internal {
+ msg: "[PredicateExpr] No predicate context set".to_string(),
+ }))
+ }
+ })
+ .collect();
+
+ Ok(Value::Nodeset(narrowed_nodes?))
+ }
+
+ fn is_primitive(&self) -> bool {
+ self.expr.is_primitive()
+ }
+}
+
+impl Evaluatable for FilterExpr {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ let primary_result = self.primary.evaluate(context)?;
+ let have_predicates = !self.predicates.predicates.is_empty();
+
+ match (have_predicates, &primary_result) {
+ (false, _) => {
+ trace!(
+ "[FilterExpr] No predicates, returning primary result: {:?}",
+ primary_result
+ );
+ Ok(primary_result)
+ },
+ (true, Value::Nodeset(vec)) => {
+ let predicate_list_subcontext =
+ context.update_predicate_nodes(vec.iter().map(|n| &**n).collect());
+ let result_filtered_by_predicates =
+ self.predicates.evaluate(&predicate_list_subcontext);
+ trace!(
+ "[FilterExpr] Result filtered by predicates: {:?}",
+ result_filtered_by_predicates
+ );
+ result_filtered_by_predicates
+ },
+ // You can't use filtering expressions `[]` on other than node-sets
+ (true, _) => Err(Error::NotANodeset),
+ }
+ }
+
+ fn is_primitive(&self) -> bool {
+ self.predicates.predicates.is_empty() && self.primary.is_primitive()
+ }
+}
+
+impl Evaluatable for PrimaryExpr {
+ fn evaluate(&self, context: &EvaluationCtx) -> Result<Value, Error> {
+ match self {
+ PrimaryExpr::Literal(literal) => literal.evaluate(context),
+ PrimaryExpr::Variable(_qname) => todo!(),
+ PrimaryExpr::Parenthesized(expr) => expr.evaluate(context),
+ PrimaryExpr::ContextItem => Ok(Value::Nodeset(vec![context.context_node.clone()])),
+ PrimaryExpr::Function(core_function) => core_function.evaluate(context),
+ }
+ }
+
+ fn is_primitive(&self) -> bool {
+ match self {
+ PrimaryExpr::Literal(_) => true,
+ PrimaryExpr::Variable(_qname) => false,
+ PrimaryExpr::Parenthesized(expr) => expr.is_primitive(),
+ PrimaryExpr::ContextItem => false,
+ PrimaryExpr::Function(_) => false,
+ }
+ }
+}
+
+impl Evaluatable for Literal {
+ fn evaluate(&self, _context: &EvaluationCtx) -> Result<Value, Error> {
+ match self {
+ Literal::Numeric(numeric_literal) => match numeric_literal {
+ // We currently make no difference between ints and floats
+ NumericLiteral::Integer(v) => Ok(Value::Number(*v as f64)),
+ NumericLiteral::Decimal(v) => Ok(Value::Number(*v)),
+ },
+ Literal::String(s) => Ok(Value::String(s.into())),
+ }
+ }
+
+ fn is_primitive(&self) -> bool {
+ true
+ }
+}