aboutsummaryrefslogtreecommitdiffstats
path: root/python/tidy
diff options
context:
space:
mode:
authorMartin Robinson <mrobinson@igalia.com>2024-09-08 08:04:19 -0700
committerGitHub <noreply@github.com>2024-09-08 15:04:19 +0000
commite70507ca403c9475a92b3c1b8230fad08c9c7ab2 (patch)
treee13a67eacbaa5bbc664f026eb1ab5c1473b66668 /python/tidy
parentf6ae05007751968f90a702b15c8b5083453ad8c7 (diff)
downloadservo-e70507ca403c9475a92b3c1b8230fad08c9c7ab2.tar.gz
servo-e70507ca403c9475a92b3c1b8230fad08c9c7ab2.zip
tidy: Fix rustdoc warnings and add a tidy check for a common URL issue (#33366)
This change fixes all rustdoc errors and also adds a tidy check for a very common rustdoc URL issue. Eventually rustdoc warnings should likely cause the build to fail, but this catches those issues sooner in order to not waste so much developer time. Signed-off-by: Martin Robinson <mrobinson@igalia.com>
Diffstat (limited to 'python/tidy')
-rw-r--r--python/tidy/test.py36
-rw-r--r--python/tidy/tidy.py31
2 files changed, 66 insertions, 1 deletions
diff --git a/python/tidy/test.py b/python/tidy/test.py
index 1d7ce3fbfc8..5bdcbddd090 100644
--- a/python/tidy/test.py
+++ b/python/tidy/test.py
@@ -9,6 +9,7 @@
import logging
import os
+from typing import Iterable, Tuple
import unittest
from . import tidy
@@ -241,6 +242,41 @@ class CheckTidiness(unittest.TestCase):
errors = tidy.collect_errors_for_files(iterFile('multiline_string.rs'), [], [tidy.check_rust], print_text=False)
self.assertNoMoreErrors(errors)
+ def test_raw_url_in_rustdoc(self):
+ def assert_has_a_single_rustdoc_error(errors: Iterable[Tuple[int, str]]):
+ self.assertEqual(tidy.ERROR_RAW_URL_IN_RUSTDOC, next(errors)[1])
+ self.assertNoMoreErrors(errors)
+
+ errors = tidy.check_for_raw_urls_in_rustdoc(
+ "file.rs", 3,
+ b"/// https://google.com"
+ )
+ assert_has_a_single_rustdoc_error(errors)
+
+ errors = tidy.check_for_raw_urls_in_rustdoc(
+ "file.rs", 3,
+ b"//! (https://google.com)"
+ )
+ assert_has_a_single_rustdoc_error(errors)
+
+ errors = tidy.check_for_raw_urls_in_rustdoc(
+ "file.rs", 3,
+ b"/// <https://google.com>"
+ )
+ self.assertNoMoreErrors(errors)
+
+ errors = tidy.check_for_raw_urls_in_rustdoc(
+ "file.rs", 3,
+ b"/// [hi]: https://google.com"
+ )
+ self.assertNoMoreErrors(errors)
+
+ errors = tidy.check_for_raw_urls_in_rustdoc(
+ "file.rs", 3,
+ b"/// [hi](https://google.com)"
+ )
+ self.assertNoMoreErrors(errors)
+
def run_tests():
verbosity = 1 if logging.getLogger().level >= logging.WARN else 2
diff --git a/python/tidy/tidy.py b/python/tidy/tidy.py
index b57a127c102..88e92b80922 100644
--- a/python/tidy/tidy.py
+++ b/python/tidy/tidy.py
@@ -32,8 +32,11 @@ CONFIG_FILE_PATH = os.path.join(".", "servo-tidy.toml")
WPT_CONFIG_INI_PATH = os.path.join(WPT_PATH, "config.ini")
# regex source https://stackoverflow.com/questions/6883049/
URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
+UTF8_URL_REGEX = re.compile(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
CARGO_LOCK_FILE = os.path.join(TOPDIR, "Cargo.lock")
+ERROR_RAW_URL_IN_RUSTDOC = "Found raw link in rustdoc. Please escape it with angle brackets or use a markdown link."
+
sys.path.append(os.path.join(WPT_PATH, "tests"))
sys.path.append(os.path.join(WPT_PATH, "tests", "tools", "wptrunner"))
@@ -306,13 +309,39 @@ def check_whitespace(idx, line):
yield (idx + 1, "CR on line")
-def check_by_line(file_name, lines):
+def check_for_raw_urls_in_rustdoc(file_name: str, idx: int, line: bytes):
+ """Check that rustdoc comments in Rust source code do not have raw URLs. These appear
+ as warnings when rustdoc is run. rustdoc warnings could be made fatal, but adding this
+ check as part of tidy catches this common problem without having to run rustdoc for all
+ of Servo."""
+ if not file_name.endswith(".rs"):
+ return
+
+ if b"///" not in line and b"//!" not in line:
+ return
+
+ # Types of URLS that are allowed:
+ # - A URL surrounded by angle or square brackets.
+ # - A markdown link.
+ # - A URL as part of a markdown definition identifer.
+ # [link text]: https://example.com
+ match = URL_REGEX.search(line)
+ if match and (
+ not line[match.start() - 1:].startswith(b"<")
+ and not line[match.start() - 1:].startswith(b"[")
+ and not line[match.start() - 2:].startswith(b"](")
+ and not line[match.start() - 3:].startswith(b"]: ")):
+ yield (idx + 1, ERROR_RAW_URL_IN_RUSTDOC)
+
+
+def check_by_line(file_name: str, lines: list[bytes]):
for idx, line in enumerate(lines):
errors = itertools.chain(
check_length(file_name, idx, line),
check_whitespace(idx, line),
check_whatwg_specific_url(idx, line),
check_whatwg_single_page_url(idx, line),
+ check_for_raw_urls_in_rustdoc(file_name, idx, line),
)
for error in errors: