diff options
author | Martin Robinson <mrobinson@igalia.com> | 2023-04-12 11:24:56 +0200 |
---|---|---|
committer | Martin Robinson <mrobinson@igalia.com> | 2023-04-12 13:37:52 +0200 |
commit | 72dbf0e752b4e18a6945d8af72b5f6c6dd0ac305 (patch) | |
tree | 090bef2a913ea03e950a59421d5edc213418bb43 /etc/ci | |
parent | cf4160b6ed73bbba5c891b1fd4dcceaf94ff9807 (diff) | |
download | servo-72dbf0e752b4e18a6945d8af72b5f6c6dd0ac305.tar.gz servo-72dbf0e752b4e18a6945d8af72b5f6c6dd0ac305.zip |
Handle non-UTF8 diffs in the WPT upstream script
The output of `git diff` and `git show` can include non-UTF8 text or
binary data, so the WPT upstream script should handle this properly.
Fixes #29620.
Diffstat (limited to 'etc/ci')
-rw-r--r-- | etc/ci/upstream-wpt-changes/test.py | 22 | ||||
-rw-r--r-- | etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff | 7 | ||||
-rw-r--r-- | etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py | 13 | ||||
-rw-r--r-- | etc/ci/upstream-wpt-changes/wptupstreamer/step.py | 6 |
4 files changed, 42 insertions, 6 deletions
diff --git a/etc/ci/upstream-wpt-changes/test.py b/etc/ci/upstream-wpt-changes/test.py index c56831360ba..3ecf3afbb3c 100644 --- a/etc/ci/upstream-wpt-changes/test.py +++ b/etc/ci/upstream-wpt-changes/test.py @@ -229,7 +229,7 @@ class TestApplyCommitsToWPT(unittest.TestCase): def run_test(self, pr_number: int, commit_data: dict): def make_commit(data): - with open(os.path.join(TESTS_DIR, data[2]), encoding="utf-8") as file: + with open(os.path.join(TESTS_DIR, data[2]), "rb") as file: return {"author": data[0], "message": data[1], "diff": file.read()} commits = [make_commit(data) for data in commit_data] @@ -272,6 +272,15 @@ class TestApplyCommitsToWPT(unittest.TestCase): [ ["test author <test@author>", "test commit message", "18746.diff"], ["another person <two@author>", "a different message", "wpt.diff"], + ["another person <two@author>", "adding some non-utf8 chaos", "add-non-utf8-file.diff"], + ], + ) + + def test_non_utf8_commit(self): + self.run_test( + 100, + [ + ["test author <nonutf8@author>", "adding some non-utf8 chaos", "add-non-utf8-file.diff"], ], ) @@ -455,6 +464,17 @@ class TestFullSyncRun(unittest.TestCase): ] ) + def test_opened_upstreamable_pr_with_non_utf8_file_contents(self): + self.assertListEqual( + self.run_test("opened.json", ["add-non-utf8-file.diff"]), + [ + "CreateOrUpdateBranchForPRStep:1:servo-wpt-sync/wpt/servo_export_18746", + "OpenPRStep:servo-wpt-sync/wpt/servo_export_18746→wpt/wpt#1", + "CommentStep:servo/servo#18746:🤖 Opened new upstream WPT pull request " + "(wpt/wpt#1) with upstreamable changes.", + ], + ) + def test_open_new_upstreamable_pr_with_preexisting_upstream_pr_not_apply_cleanly_to_upstream( self, ): diff --git a/etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff b/etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff new file mode 100644 index 00000000000..bd3b755d87d --- /dev/null +++ b/etc/ci/upstream-wpt-changes/tests/add-non-utf8-file.diff @@ -0,0 +1,7 @@ +diff --git a/tests/wpt/web-platform-tests/non-utf8-file.txt b/tests/wpt/web-platform-tests/non-utf8-file.txt +new file mode 100644 +index 0000000..5a992e0 +--- /dev/null ++++ b/tests/wpt/web-platform-tests/non-utf8-file.txt +@@ -0,0 +1 @@ ++foo bér ÿ " diff --git a/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py b/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py index 156bb108c5a..1ca65c56321 100644 --- a/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py +++ b/etc/ci/upstream-wpt-changes/wptupstreamer/__init__.py @@ -50,7 +50,7 @@ class LocalGitRepo: self.path = path self.sync = sync - def run(self, *args, env: dict = {}): + def run_without_encoding(self, *args, env: dict = {}): command_line = ["git"] + list(args) logging.info(" → Execution (cwd='%s'): %s", self.path, " ".join(command_line)) @@ -63,12 +63,19 @@ class LocalGitRepo: try: return subprocess.check_output( command_line, cwd=self.path, env=env, stderr=subprocess.STDOUT - ).decode("utf-8") + ) except subprocess.CalledProcessError as exception: logging.warning("Process execution failed with output:\n%s", - exception.output.decode("utf-8")) + exception.output.decode("utf-8", errors="surrogateescape")) raise exception + def run(self, *args, env: dict = {}): + return ( + self + .run_without_encoding(*args, env=env) + .decode("utf-8", errors="surrogateescape") + ) + @dataclasses.dataclass() class SyncRun: diff --git a/etc/ci/upstream-wpt-changes/wptupstreamer/step.py b/etc/ci/upstream-wpt-changes/wptupstreamer/step.py index 8c9d5c9a93b..9781353ce15 100644 --- a/etc/ci/upstream-wpt-changes/wptupstreamer/step.py +++ b/etc/ci/upstream-wpt-changes/wptupstreamer/step.py @@ -114,7 +114,9 @@ class CreateOrUpdateBranchForPRStep(Step): # TODO: If we could cleverly parse and manipulate the full commit diff # we could avoid cloning the servo repository altogether and only # have to fetch the commit diffs from GitHub. - diff = local_servo_repo.run( + # NB: The output of git show might include binary files or non-UTF8 text, + # so store the content of the diff as a `bytes`. + diff = local_servo_repo.run_without_encoding( "show", "--binary", "--format=%b", sha, "--", UPSTREAMABLE_PATH ) @@ -140,7 +142,7 @@ class CreateOrUpdateBranchForPRStep(Step): strip_count = UPSTREAMABLE_PATH.count("/") + 1 try: - with open(patch_path, "w", encoding="utf-8") as file: + with open(patch_path, "wb") as file: file.write(commit["diff"]) run.sync.local_wpt_repo.run( "apply", PATCH_FILE_NAME, "-p", str(strip_count) |