aboutsummaryrefslogtreecommitdiffstats
path: root/includes
diff options
context:
space:
mode:
Diffstat (limited to 'includes')
-rw-r--r--includes/composer/PhpUnitSplitter/SuiteSplittingException.php12
-rw-r--r--includes/composer/PhpUnitSplitter/TestSuiteBuilder.php269
2 files changed, 218 insertions, 63 deletions
diff --git a/includes/composer/PhpUnitSplitter/SuiteSplittingException.php b/includes/composer/PhpUnitSplitter/SuiteSplittingException.php
new file mode 100644
index 000000000000..91182dd483b6
--- /dev/null
+++ b/includes/composer/PhpUnitSplitter/SuiteSplittingException.php
@@ -0,0 +1,12 @@
+<?php
+
+declare( strict_types = 1 );
+
+namespace MediaWiki\Composer\PhpUnitSplitter;
+
+/**
+ * @license GPL-2.0-or-later
+ */
+class SuiteSplittingException extends \RuntimeException {
+
+}
diff --git a/includes/composer/PhpUnitSplitter/TestSuiteBuilder.php b/includes/composer/PhpUnitSplitter/TestSuiteBuilder.php
index b9da8c651cab..a59673916a96 100644
--- a/includes/composer/PhpUnitSplitter/TestSuiteBuilder.php
+++ b/includes/composer/PhpUnitSplitter/TestSuiteBuilder.php
@@ -14,7 +14,7 @@ class TestSuiteBuilder {
}
/**
- * Try to build balanced groups (split_groups / buckets) of tests. We have a couple of
+ * Try to build balanced groups (split_groups) of tests. We have a couple of
* objectives here:
* - the groups should contain a stable ordering of tests so that we reduce the amount
* of random test failures due to test re-ordering
@@ -24,80 +24,223 @@ class TestSuiteBuilder {
* - the groups should have a similar test execution time
*
* Information about test duration may be completely absent (if no test cache information is
- * supplied), or partially absent (if the test has not been seen before). Since we neither
- * want to ignore the duration information nor rely on it, we compromise by filling the buckets
- * until we have reached a maximum by test count *or* by duration. This has the consequence
- * that tests with a duration of zero will be treated somewhat like tests with an average
- * duration.
+ * supplied), or partially absent (if the test has not been seen before). We attempt to create
+ * similar-duration split-groups using the information we have available, and if anything goes
+ * wrong we fall back to just creating split-groups with the same number of tests in them.
*
* @param array $testDescriptors the list of tests that we want to sort into split_groups
* @param int $groups the number of split_groups we are targetting
+ * @param ?int $chunkSize optionally override the size of the 'chunks' into which tests
+ * are grouped. If not supplied, the chunk size will depend on the total number
+ * of tests.
* @return array a structured array of the resulting split_groups
*/
- public function buildSuites( array $testDescriptors, int $groups ): array {
+ public function buildSuites( array $testDescriptors, int $groups, ?int $chunkSize = null ): array {
$suites = array_fill( 0, $groups, [ "list" => [], "time" => 0 ] );
+ // If there are no test descriptors, we just return empty suites
+ if ( $testDescriptors === [] ) {
+ return $suites;
+ }
- // Sort the tests alphabetically so that tests in the same extension (folder) stay
- // together in the same split_group
+ // Sort the tests by name so that we run tests of the same extension together and in a predictable order
usort( $testDescriptors, [ self::class, "sortByNameAscending" ] );
- // Count the total number of tests (with valid filenames) and set the max number
- // of tests per bucket
- $testCount = array_reduce(
- $testDescriptors,
- static fn ( $acc, $descriptor ) => ( $descriptor->getFilename() ? $acc + 1 : $acc ),
- 0
- );
- $bucketTestCount = ceil( $testCount / $groups );
-
- // Count the total duration of tests (with duration information) and set the max
- // duration per bucket
- $totalDuration = array_reduce(
- $testDescriptors,
- static fn ( $acc, $descriptor ) => $acc + $descriptor->getDuration(),
- 0
- );
- $maxBucketDuration = ceil( $totalDuration / $groups );
-
- // Counters for current bucket and cumulative counters for total progress
- $currentTestIndex = 0;
- $currentBucketDuration = 0;
- $currentBucketIndex = 0;
- $cumulativeTestCount = 0;
- $cumulativeDuration = 0;
- foreach ( $testDescriptors as $testDescriptor ) {
- if ( !$testDescriptor->getFilename() ) {
- // We didn't resolve a matching file for this test, so we skip it
- // from the suite here. This only happens for "known" missing test
- // classes (see PhpUnitXmlManager::EXPECTED_MISSING_CLASSES) - in
- // all other cases a missing test file will throw an exception during
- // suite building.
- continue;
- }
- $suites[$currentBucketIndex]["list"][] = $testDescriptor->getFilename();
- $suites[$currentBucketIndex]["time"] += $testDescriptor->getDuration();
- $currentTestIndex += 1;
- $cumulativeTestCount += 1;
- $currentBucketDuration += $testDescriptor->getDuration();
- $cumulativeDuration += $testDescriptor->getDuration();
-
- // Advance to the next bucket if we either have reached the limit in number of tests or the
- // limit in test duration
- if ( $currentTestIndex >= $bucketTestCount || $currentBucketDuration > $maxBucketDuration ) {
- // Don't advance past the last bucket. If we reached the last bucket, just dump
- // everything in there.
- if ( $currentBucketIndex < $groups - 1 ) {
- $currentBucketIndex++;
- }
- $currentTestIndex = 0;
- $currentBucketDuration = 0;
+ $descriptorCount = count( $testDescriptors );
+ if ( $chunkSize === null ) {
+ // The algorithm is CPU intensive - make sure we run with at most 200 'chunks' of tests to group
+ $chunkSize = intval( ceil( $descriptorCount / 200 ) );
+ }
+
+ // Skip over any leading zero-time tests, and add them back to the first group at the end
+ // Without this adjustment, the dynamic-sizing algorithm can end up with a zero-size split-group
+ // which would cause PHPUnit to error.
+ $startIndex = 0;
+ while ( $startIndex < $descriptorCount && $testDescriptors[$startIndex]->getDuration() == 0 ) {
+ $startIndex++;
+ }
+
+ if ( $startIndex === 0 ) {
+ $testDescriptorsWithoutLeadingZeros = $testDescriptors;
+ $leadingZeros = [];
+ } elseif ( $startIndex < $descriptorCount ) {
+ $leadingZeros = array_map(
+ static fn ( $testDescriptor ) => $testDescriptor->getFilename(),
+ array_slice( $testDescriptors, 0, $startIndex )
+ );
+ $testDescriptorsWithoutLeadingZeros = array_slice( $testDescriptors, $startIndex );
+ } else {
+ // if we never encounter a test with duration information, fall back to splitting
+ // tests into split-groups with the same number of test classes.
+ return $this->buildSuitesNoDurationInformation( $testDescriptors, $groups );
+ }
+
+ try {
+ $this->buildSuitesWithDurationInformationWithoutLeadingEmptyTests(
+ $testDescriptorsWithoutLeadingZeros, $suites, $groups, $chunkSize
+ );
+ } catch ( SuiteSplittingException $se ) {
+ return $this->buildSuitesNoDurationInformation( $testDescriptors, $groups );
+ }
- // Rebalance the bucket targets - $remainingBuckets will be at least 1
- $remainingBuckets = $groups - $currentBucketIndex;
- $bucketTestCount = ceil( ( $testCount - $cumulativeTestCount ) / $remainingBuckets );
- $maxBucketDuration = ceil( ( $totalDuration - $cumulativeDuration ) / $remainingBuckets );
+ // Add any zero-length tests that we sliced away before splitting back to the first group
+ if ( $leadingZeros !== [] ) {
+ $suites[0]["list"] = array_merge( $leadingZeros, $suites[0]["list"] );
+ }
+
+ return $suites;
+ }
+
+ /**
+ * @throws SuiteSplittingException
+ */
+ private function buildSuitesWithDurationInformationWithoutLeadingEmptyTests(
+ array $testDescriptorsWithoutLeadingZeros,
+ array &$suites,
+ int $numGroups,
+ int $chunkSize
+ ): void {
+ $n = count( $testDescriptorsWithoutLeadingZeros );
+ if ( $n == 0 ) {
+ return;
+ }
+
+ $chunks = $this->createChunksOfTests( $n, $testDescriptorsWithoutLeadingZeros, $chunkSize );
+
+ $numChunks = count( $chunks );
+ $durations = array_column( $chunks, "time" );
+
+ // Build an array of cumulative test duration (or 'prefix sum') - sum(0..i){x.duration}
+ $prefixSum = $this->calculatePrefixSum( $numChunks, $durations );
+
+ // Generate backtracking table
+ $backtrack = $this->generateBacktrackingTable( $numChunks, $numGroups, $prefixSum );
+
+ $this->constructSplitGroups( $numGroups, $numChunks, $chunks, $backtrack, $suites );
+ }
+
+ /**
+ * Called as a fallback for the case where not duration information is available.
+ * Simply split the tests into $groups equally-sized split-groups.
+ *
+ * @param TestDescriptor[] $testDescriptors
+ * @param int $groups
+ * @return array
+ */
+ private function buildSuitesNoDurationInformation( array $testDescriptors, int $groups ): array {
+ $suites = array_fill( 0, $groups, [ "list" => [], "time" => 0 ] );
+ $testCount = count( $testDescriptors );
+ $splitGroupSize = ceil( $testCount / $groups );
+ $bucketIndex = 0;
+ $testIndex = 0;
+ for ( $currentGroup = 0; $currentGroup < $groups, $testIndex < $testCount; $testIndex++, $bucketIndex++ ) {
+ if ( $bucketIndex >= $splitGroupSize ) {
+ $bucketIndex = 0;
+ $currentGroup++;
+ if ( $currentGroup === $groups ) {
+ break;
+ }
}
+ $suites[$currentGroup]["list"][] = $testDescriptors[$testIndex]->getFilename();
+ $suites[$currentGroup]["time"] += $testDescriptors[$testIndex]->getDuration();
}
+
return $suites;
}
+
+ private function createChunksOfTests( int $n, array $testDescriptors, int $chunkSize ): array {
+ $chunks = [];
+ for ( $i = 0; $i < $n; $i += $chunkSize ) {
+ $chunk = array_slice( $testDescriptors, $i, min( $chunkSize, $n - $i ) );
+ $chunks[] = [
+ "list" => $chunk,
+ "time" => array_reduce( $chunk, static fn ( $sum, $test ) => $sum + $test->getDuration(), 0 )
+ ];
+ }
+ return $chunks;
+ }
+
+ private function calculatePrefixSum( int $numChunks, array $durations ): array {
+ $prefixSum = array_fill( 0, $numChunks + 1, 0 );
+ for ( $i = 1; $i <= $numChunks; $i++ ) {
+ $prefixSum[$i] = $prefixSum[$i - 1] + $durations[$i - 1];
+ }
+ return $prefixSum;
+ }
+
+ /**
+ * Construct the split groups from the backtracking table.
+ * @throws SuiteSplittingException
+ */
+ private function constructSplitGroups(
+ int $numGroups,
+ int $numChunks,
+ array $chunks,
+ array $backtrack,
+ array &$suites
+ ) {
+ for ( $splitGroupId = $numGroups, $i = $numChunks; $splitGroupId > 0; --$splitGroupId ) {
+ $startIndex = $backtrack[$i][$splitGroupId];
+ if ( $startIndex === -1 ) {
+ throw new SuiteSplittingException( "Invalid backtracking index building group " . $splitGroupId );
+ }
+ $suites[$splitGroupId - 1]["list"] = array_merge(
+ ...array_map( static fn ( $chunk ) => array_map(
+ static fn ( $test ) => $test->getFilename(),
+ $chunk["list"]
+ ),
+ array_slice( $chunks, $startIndex, $i - $startIndex )
+ )
+ );
+ $suites[$splitGroupId - 1]["time"] = array_reduce(
+ array_slice( $chunks, $startIndex, $i - $startIndex ),
+ static fn ( $acc, $chunk ) => $acc + $chunk["time"],
+ 0
+ );
+ $i = $startIndex;
+ }
+ }
+
+ /**
+ * Find the distribution of group split points that minimises the duration of the largest split_group
+ * and thereby minimises the duration of the CI job.
+ */
+ private function generateBacktrackingTable( int $numChunks, int $numGroups, array $prefixSum ): array {
+ // $minimumLargestBucket[x][y] is the minimum possible largest split_group duration when splitting
+ // the first x chunks into y groups
+ $minimumLargestBucket = array_fill( 0, $numChunks + 1, array_fill( 0, $numGroups + 1, PHP_INT_MAX ) );
+ // The backtracking table keeps track of the end point of the last group of the optimal distribution
+ $backtrack = array_fill( 0, $numChunks + 1, array_fill( 0, $numGroups + 1, -1 ) );
+
+ $minimumLargestBucket[0][0] = 0;
+
+ // We work inductively, starting with distributing 1 chunk into 1 split_group
+ // and progressively distributing more tests until all chunks are in a split_group
+ for ( $i = 1; $i <= $numChunks; $i++ ) {
+ // For $i chunks, split them into up to $numGroups groups by identifying the optimal split points
+ for ( $j = 1; $j <= min( $numGroups, $i ); $j++ ) {
+ // For each split group $j find a split point $k, somewhere before the current chunk
+ for ( $k = 0; $k < $i; $k++ ) {
+ // the difference of the prefix sums is the combined duration of chunks $k through $i
+ $currentSplitGroupDuration = $prefixSum[$i] - $prefixSum[$k];
+ // Compare the duration of the proposed split group with the minimum duration we found so far
+ // for splitting $k tests into $j-1 groups.
+ $newBestCaseMinimumLargestBucket = max(
+ $minimumLargestBucket[$k][$j - 1], $currentSplitGroupDuration
+ );
+ // If our current duration is smaller, we know that we can split $k tests into $j groups without
+ // increasing the minimum duration. If it is greater, we know that putting a split point at $k would
+ // make the minimum duration of any group at least $currentSplitGroupDuration.
+ if ( $newBestCaseMinimumLargestBucket < $minimumLargestBucket[$i][$j] ) {
+ // If the new duration is less than the existing minimum for splitting $i tests into $j groups,
+ // we update the minimum duration.
+ $minimumLargestBucket[$i][$j] = $newBestCaseMinimumLargestBucket;
+ // Set the backtrack reference so that we know where the split point was for this minimal value.
+ $backtrack[$i][$j] = $k;
+ }
+ }
+ }
+ }
+ return $backtrack;
+ }
+
}