aboutsummaryrefslogtreecommitdiffstats
path: root/etc/ci
diff options
context:
space:
mode:
authorMartin Robinson <mrobinson@igalia.com>2023-04-12 11:24:56 +0200
committerMartin Robinson <mrobinson@igalia.com>2023-04-12 13:37:52 +0200
commit72dbf0e752b4e18a6945d8af72b5f6c6dd0ac305 (patch)
tree090bef2a913ea03e950a59421d5edc213418bb43 /etc/ci
parentcf4160b6ed73bbba5c891b1fd4dcceaf94ff9807 (diff)
downloadservo-72dbf0e752b4e18a6945d8af72b5f6c6dd0ac305.tar.gz
servo-72dbf0e752b4e18a6945d8af72b5f6c6dd0ac305.zip
Handle non-UTF8 diffs in the WPT upstream script
The output of `git diff` and `git show` can include non-UTF8 text or binary data, so the WPT upstream script should handle this properly. Fixes #29620.
Diffstat (limited to 'etc/ci')
-rw-r--r--etc/ci/upstream-wpt-changes/test.py22
-rw-r--r--etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff7
-rw-r--r--etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py13
-rw-r--r--etc/ci/upstream-wpt-changes/wptupstreamer/step.py6
4 files changed, 42 insertions, 6 deletions
diff --git a/etc/ci/upstream-wpt-changes/test.py b/etc/ci/upstream-wpt-changes/test.py
index c56831360ba..3ecf3afbb3c 100644
--- a/etc/ci/upstream-wpt-changes/test.py
+++ b/etc/ci/upstream-wpt-changes/test.py
@@ -229,7 +229,7 @@ class TestApplyCommitsToWPT(unittest.TestCase):
def run_test(self, pr_number: int, commit_data: dict):
def make_commit(data):
- with open(os.path.join(TESTS_DIR, data[2]), encoding="utf-8") as file:
+ with open(os.path.join(TESTS_DIR, data[2]), "rb") as file:
return {"author": data[0], "message": data[1], "diff": file.read()}
commits = [make_commit(data) for data in commit_data]
@@ -272,6 +272,15 @@ class TestApplyCommitsToWPT(unittest.TestCase):
[
["test author <test@author>", "test commit message", "18746.diff"],
["another person <two@author>", "a different message", "wpt.diff"],
+ ["another person <two@author>", "adding some non-utf8 chaos", "add-non-utf8-file.diff"],
+ ],
+ )
+
+ def test_non_utf8_commit(self):
+ self.run_test(
+ 100,
+ [
+ ["test author <nonutf8@author>", "adding some non-utf8 chaos", "add-non-utf8-file.diff"],
],
)
@@ -455,6 +464,17 @@ class TestFullSyncRun(unittest.TestCase):
]
)
+ def test_opened_upstreamable_pr_with_non_utf8_file_contents(self):
+ self.assertListEqual(
+ self.run_test("opened.json", ["add-non-utf8-file.diff"]),
+ [
+ "CreateOrUpdateBranchForPRStep:1:servo-wpt-sync/wpt/servo_export_18746",
+ "OpenPRStep:servo-wpt-sync/wpt/servo_export_18746→wpt/wpt#1",
+ "CommentStep:servo/servo#18746:🤖 Opened new upstream WPT pull request "
+ "(wpt/wpt#1) with upstreamable changes.",
+ ],
+ )
+
def test_open_new_upstreamable_pr_with_preexisting_upstream_pr_not_apply_cleanly_to_upstream(
self,
):
diff --git a/etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff b/etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff
new file mode 100644
index 00000000000..bd3b755d87d
--- /dev/null
+++ b/etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff
@@ -0,0 +1,7 @@
+diff --git a/tests/wpt/web-platform-tests/non-utf8-file.txt b/tests/wpt/web-platform-tests/non-utf8-file.txt
+new file mode 100644
+index 0000000..5a992e0
+--- /dev/null
++++ b/tests/wpt/web-platform-tests/non-utf8-file.txt
+@@ -0,0 +1 @@
++foo bér ÿ "
diff --git a/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py b/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py
index 156bb108c5a..1ca65c56321 100644
--- a/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py
+++ b/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py
@@ -50,7 +50,7 @@ class LocalGitRepo:
self.path = path
self.sync = sync
- def run(self, *args, env: dict = {}):
+ def run_without_encoding(self, *args, env: dict = {}):
command_line = ["git"] + list(args)
logging.info(" → Execution (cwd='%s'): %s",
self.path, " ".join(command_line))
@@ -63,12 +63,19 @@ class LocalGitRepo:
try:
return subprocess.check_output(
command_line, cwd=self.path, env=env, stderr=subprocess.STDOUT
- ).decode("utf-8")
+ )
except subprocess.CalledProcessError as exception:
logging.warning("Process execution failed with output:\n%s",
- exception.output.decode("utf-8"))
+ exception.output.decode("utf-8", errors="surrogateescape"))
raise exception
+ def run(self, *args, env: dict = {}):
+ return (
+ self
+ .run_without_encoding(*args, env=env)
+ .decode("utf-8", errors="surrogateescape")
+ )
+
@dataclasses.dataclass()
class SyncRun:
diff --git a/etc/ci/upstream-wpt-changes/wptupstreamer/step.py b/etc/ci/upstream-wpt-changes/wptupstreamer/step.py
index 8c9d5c9a93b..9781353ce15 100644
--- a/etc/ci/upstream-wpt-changes/wptupstreamer/step.py
+++ b/etc/ci/upstream-wpt-changes/wptupstreamer/step.py
@@ -114,7 +114,9 @@ class CreateOrUpdateBranchForPRStep(Step):
# TODO: If we could cleverly parse and manipulate the full commit diff
# we could avoid cloning the servo repository altogether and only
# have to fetch the commit diffs from GitHub.
- diff = local_servo_repo.run(
+ # NB: The output of git show might include binary files or non-UTF8 text,
+ # so store the content of the diff as a `bytes`.
+ diff = local_servo_repo.run_without_encoding(
"show", "--binary", "--format=%b", sha, "--", UPSTREAMABLE_PATH
)
@@ -140,7 +142,7 @@ class CreateOrUpdateBranchForPRStep(Step):
strip_count = UPSTREAMABLE_PATH.count("/") + 1
try:
- with open(patch_path, "w", encoding="utf-8") as file:
+ with open(patch_path, "wb") as file:
file.write(commit["diff"])
run.sync.local_wpt_repo.run(
"apply", PATCH_FILE_NAME, "-p", str(strip_count)