diff options
author | Martin Robinson <mrobinson@igalia.com> | 2024-09-08 08:04:19 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-08 15:04:19 +0000 |
commit | e70507ca403c9475a92b3c1b8230fad08c9c7ab2 (patch) | |
tree | e13a67eacbaa5bbc664f026eb1ab5c1473b66668 /python/tidy | |
parent | f6ae05007751968f90a702b15c8b5083453ad8c7 (diff) | |
download | servo-e70507ca403c9475a92b3c1b8230fad08c9c7ab2.tar.gz servo-e70507ca403c9475a92b3c1b8230fad08c9c7ab2.zip |
tidy: Fix rustdoc warnings and add a tidy check for a common URL issue (#33366)
This change fixes all rustdoc errors and also adds a tidy check for a
very common rustdoc URL issue. Eventually rustdoc warnings should likely
cause the build to fail, but this catches those issues sooner in order
to not waste so much developer time.
Signed-off-by: Martin Robinson <mrobinson@igalia.com>
Diffstat (limited to 'python/tidy')
-rw-r--r-- | python/tidy/test.py | 36 | ||||
-rw-r--r-- | python/tidy/tidy.py | 31 |
2 files changed, 66 insertions, 1 deletions
diff --git a/python/tidy/test.py b/python/tidy/test.py index 1d7ce3fbfc8..5bdcbddd090 100644 --- a/python/tidy/test.py +++ b/python/tidy/test.py @@ -9,6 +9,7 @@ import logging import os +from typing import Iterable, Tuple import unittest from . import tidy @@ -241,6 +242,41 @@ class CheckTidiness(unittest.TestCase): errors = tidy.collect_errors_for_files(iterFile('multiline_string.rs'), [], [tidy.check_rust], print_text=False) self.assertNoMoreErrors(errors) + def test_raw_url_in_rustdoc(self): + def assert_has_a_single_rustdoc_error(errors: Iterable[Tuple[int, str]]): + self.assertEqual(tidy.ERROR_RAW_URL_IN_RUSTDOC, next(errors)[1]) + self.assertNoMoreErrors(errors) + + errors = tidy.check_for_raw_urls_in_rustdoc( + "file.rs", 3, + b"/// https://google.com" + ) + assert_has_a_single_rustdoc_error(errors) + + errors = tidy.check_for_raw_urls_in_rustdoc( + "file.rs", 3, + b"//! (https://google.com)" + ) + assert_has_a_single_rustdoc_error(errors) + + errors = tidy.check_for_raw_urls_in_rustdoc( + "file.rs", 3, + b"/// <https://google.com>" + ) + self.assertNoMoreErrors(errors) + + errors = tidy.check_for_raw_urls_in_rustdoc( + "file.rs", 3, + b"/// [hi]: https://google.com" + ) + self.assertNoMoreErrors(errors) + + errors = tidy.check_for_raw_urls_in_rustdoc( + "file.rs", 3, + b"/// [hi](https://google.com)" + ) + self.assertNoMoreErrors(errors) + def run_tests(): verbosity = 1 if logging.getLogger().level >= logging.WARN else 2 diff --git a/python/tidy/tidy.py b/python/tidy/tidy.py index b57a127c102..88e92b80922 100644 --- a/python/tidy/tidy.py +++ b/python/tidy/tidy.py @@ -32,8 +32,11 @@ CONFIG_FILE_PATH = os.path.join(".", "servo-tidy.toml") WPT_CONFIG_INI_PATH = os.path.join(WPT_PATH, "config.ini") # regex source https://stackoverflow.com/questions/6883049/ URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+') +UTF8_URL_REGEX = re.compile(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+') CARGO_LOCK_FILE = os.path.join(TOPDIR, "Cargo.lock") +ERROR_RAW_URL_IN_RUSTDOC = "Found raw link in rustdoc. Please escape it with angle brackets or use a markdown link." + sys.path.append(os.path.join(WPT_PATH, "tests")) sys.path.append(os.path.join(WPT_PATH, "tests", "tools", "wptrunner")) @@ -306,13 +309,39 @@ def check_whitespace(idx, line): yield (idx + 1, "CR on line") -def check_by_line(file_name, lines): +def check_for_raw_urls_in_rustdoc(file_name: str, idx: int, line: bytes): + """Check that rustdoc comments in Rust source code do not have raw URLs. These appear + as warnings when rustdoc is run. rustdoc warnings could be made fatal, but adding this + check as part of tidy catches this common problem without having to run rustdoc for all + of Servo.""" + if not file_name.endswith(".rs"): + return + + if b"///" not in line and b"//!" not in line: + return + + # Types of URLS that are allowed: + # - A URL surrounded by angle or square brackets. + # - A markdown link. + # - A URL as part of a markdown definition identifer. + # [link text]: https://example.com + match = URL_REGEX.search(line) + if match and ( + not line[match.start() - 1:].startswith(b"<") + and not line[match.start() - 1:].startswith(b"[") + and not line[match.start() - 2:].startswith(b"](") + and not line[match.start() - 3:].startswith(b"]: ")): + yield (idx + 1, ERROR_RAW_URL_IN_RUSTDOC) + + +def check_by_line(file_name: str, lines: list[bytes]): for idx, line in enumerate(lines): errors = itertools.chain( check_length(file_name, idx, line), check_whitespace(idx, line), check_whatwg_specific_url(idx, line), check_whatwg_single_page_url(idx, line), + check_for_raw_urls_in_rustdoc(file_name, idx, line), ) for error in errors: |