diff options
author | Timo Tijhof <krinkle@fastmail.com> | 2022-10-15 23:12:43 +0100 |
---|---|---|
committer | Timo Tijhof <krinkle@fastmail.com> | 2022-10-15 23:16:10 +0100 |
commit | 77dbe2d19c1a0e88a53175862cc8dbfdcc33be16 (patch) | |
tree | 6aff6a4b07a5b203d63e393d1142c474b8e4f7d3 /includes/Maintenance | |
parent | 59fd86d553b0c90b5673e4eff6d8c95a3d253a54 (diff) | |
download | mediawikicore-77dbe2d19c1a0e88a53175862cc8dbfdcc33be16.tar.gz mediawikicore-77dbe2d19c1a0e88a53175862cc8dbfdcc33be16.zip |
Maintenance: Move OrderedStreamingForkController to PSR-4 namespace
* Improve class docs while at it.
* Fix the odd file-level docblock that was combined with the
class block in a way virtually no other file in core does.
Bug: T166010
Change-Id: Iefcece6da487166f04ccf8be94a24749c9ef97e2
Diffstat (limited to 'includes/Maintenance')
-rw-r--r-- | includes/Maintenance/ForkController.php | 9 | ||||
-rw-r--r-- | includes/Maintenance/OrderedStreamingForkController.php | 234 |
2 files changed, 238 insertions, 5 deletions
diff --git a/includes/Maintenance/ForkController.php b/includes/Maintenance/ForkController.php index ccc8b68267b3..ac72ed1826dd 100644 --- a/includes/Maintenance/ForkController.php +++ b/includes/Maintenance/ForkController.php @@ -1,7 +1,5 @@ <?php /** - * Class for managing forking command line scripts. - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -28,9 +26,10 @@ use ObjectCache; use RedisConnectionPool; /** - * Class for managing forking command line scripts. - * Currently just does forking and process control, but it could easily be extended - * to provide IPC and job dispatch. + * Manage forking inside CLI maintenance scripts. + * + * Only handles forking and process control. In the future, this could + * be extended to provide IPC and job dispatch. * * This class requires the posix and pcntl extensions. * diff --git a/includes/Maintenance/OrderedStreamingForkController.php b/includes/Maintenance/OrderedStreamingForkController.php new file mode 100644 index 000000000000..c6af57f4cc0c --- /dev/null +++ b/includes/Maintenance/OrderedStreamingForkController.php @@ -0,0 +1,234 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Maintenance; + +/** + * Apply a transformation to values via a pool of sub processes. + * + * The controller reads lines from a given input stream, where each line + * describes work to be done. This work is then farmed out to multiple + * child streams that correspond to child procesess. Each child has exactly + * one piece of work in-flight at a given moment. The result of each work + * is written to an output stream. + * + * If numProcs is zero, the fallback is to perform work in-process instead. + * + * This class guarantees that the output is produced in the same exact order + * as input values were. + * + * Currently used by CirrusSearch extension to implement CLI search script. + * + * @ingroup Maintenance + * @since 1.30 + */ +class OrderedStreamingForkController extends ForkController { + /** @var callable */ + protected $workCallback; + /** @var resource */ + protected $input; + /** @var resource */ + protected $output; + /** @var int */ + protected $nextOutputId; + /** @var string[] Int key indicates order, value is data */ + protected $delayedOutputData = []; + + /** + * @param int $numProcs The number of worker processes to fork + * @param callable $workCallback A callback to call in the child process + * once for each line of work to process. + * @param resource $input A socket to read work lines from + * @param resource $output A socket to write the result of work to. + */ + public function __construct( $numProcs, $workCallback, $input, $output ) { + parent::__construct( $numProcs ); + $this->workCallback = $workCallback; + $this->input = $input; + $this->output = $output; + } + + /** + * @inheritDoc + */ + public function start() { + if ( $this->procsToStart > 0 ) { + $status = parent::start(); + if ( $status === 'child' ) { + $this->consume(); + } + } else { + $status = 'parent'; + $this->consumeNoFork(); + } + return $status; + } + + /** + * @param int $numProcs + * @return string + */ + protected function forkWorkers( $numProcs ) { + $this->prepareEnvironment(); + + $childSockets = []; + // Create the child processes + for ( $i = 0; $i < $numProcs; $i++ ) { + $sockets = stream_socket_pair( STREAM_PF_UNIX, STREAM_SOCK_STREAM, STREAM_IPPROTO_IP ); + // Do the fork + $pid = pcntl_fork(); + if ( $pid === -1 || $pid === false ) { + echo "Error creating child processes\n"; + exit( 1 ); + } + + if ( !$pid ) { + $this->initChild(); + $this->childNumber = $i; + $this->input = $sockets[0]; + $this->output = $sockets[0]; + fclose( $sockets[1] ); + return 'child'; + } else { + // This is the parent process + $this->children[$pid] = true; + fclose( $sockets[0] ); + $childSockets[] = $sockets[1]; + } + } + $this->feedChildren( $childSockets ); + foreach ( $childSockets as $socket ) { + fclose( $socket ); + } + return 'parent'; + } + + /** + * Child worker process. Reads work from $this->input and writes the + * result of that work to $this->output when completed. + */ + protected function consume() { + while ( !feof( $this->input ) ) { + $line = trim( fgets( $this->input ) ); + if ( $line ) { + list( $id, $data ) = json_decode( $line ); + $result = call_user_func( $this->workCallback, $data ); + fwrite( $this->output, json_encode( [ $id, $result ] ) . "\n" ); + } + } + } + + /** + * Special cased version of self::consume() when no forking occurs + */ + protected function consumeNoFork() { + while ( !feof( $this->input ) ) { + $data = fgets( $this->input ); + if ( substr( $data, -1 ) === "\n" ) { + // Strip any final new line used to delimit lines of input. + // The last line of input might not have it, though. + $data = substr( $data, 0, -1 ); + } + if ( $data === '' ) { + continue; + } + $result = call_user_func( $this->workCallback, $data ); + fwrite( $this->output, "$result\n" ); + } + } + + /** + * Reads lines of work from $this->input and farms them out to + * the provided socket. + * + * @param resource[] $sockets + */ + protected function feedChildren( array $sockets ) { + $used = []; + $id = 0; + $this->nextOutputId = 0; + + while ( !feof( $this->input ) ) { + $data = fgets( $this->input ); + if ( $used ) { + do { + $this->updateAvailableSockets( $sockets, $used, $sockets ? 0 : 5 ); + } while ( !$sockets ); + } + if ( substr( $data, -1 ) === "\n" ) { + // Strip any final new line used to delimit lines of input. + // The last line of input might not have it, though. + $data = substr( $data, 0, -1 ); + } + if ( $data === '' ) { + continue; + } + $socket = array_pop( $sockets ); + fwrite( $socket, json_encode( [ $id++, $data ] ) . "\n" ); + $used[] = $socket; + } + while ( $used ) { + $this->updateAvailableSockets( $sockets, $used, 5 ); + } + } + + /** + * Moves sockets from $used to $sockets when they are available + * for more work + * + * @param resource[] &$sockets List of sockets that are waiting for work + * @param resource[] &$used List of sockets currently performing work + * @param int $timeout The number of seconds to block waiting. 0 for + * non-blocking operation. + */ + protected function updateAvailableSockets( &$sockets, &$used, $timeout ) { + $read = $used; + $write = $except = []; + stream_select( $read, $write, $except, $timeout ); + foreach ( $read as $socket ) { + $line = fgets( $socket ); + list( $id, $data ) = json_decode( trim( $line ) ); + $this->receive( (int)$id, $data ); + $sockets[] = $socket; + $idx = array_search( $socket, $used ); + unset( $used[$idx] ); + } + } + + /** + * @param int $id + * @param string $data + */ + protected function receive( $id, $data ) { + if ( $id !== $this->nextOutputId ) { + $this->delayedOutputData[$id] = $data; + return; + } + fwrite( $this->output, $data . "\n" ); + $this->nextOutputId = $id + 1; + while ( isset( $this->delayedOutputData[$this->nextOutputId] ) ) { + fwrite( $this->output, $this->delayedOutputData[$this->nextOutputId] . "\n" ); + unset( $this->delayedOutputData[$this->nextOutputId] ); + $this->nextOutputId++; + } + } +} + +class_alias( OrderedStreamingForkController::class, 'OrderedStreamingForkController' ); |