diff options
author | Timo Tijhof <krinkle@fastmail.com> | 2023-08-25 00:21:19 +0100 |
---|---|---|
committer | Timo Tijhof <krinkle@fastmail.com> | 2023-08-25 01:16:17 +0100 |
commit | aca3c8203a51a5d6a91130e1a558501c014a9200 (patch) | |
tree | 03afc7387a996bb8fb98ab67e928318ad2080e3a /includes/deferred | |
parent | 487815c1f5273b888f14b62c175e11e44dcc5f38 (diff) | |
download | mediawikicore-aca3c8203a51a5d6a91130e1a558501c014a9200.tar.gz mediawikicore-aca3c8203a51a5d6a91130e1a558501c014a9200.zip |
deferred: Make DeferredUpdates docs more accessible
In prep for the next decoupling commit:
* Rephrase introduction to state what its actually for in practice.
* Remove mentions of implementation details in favour of inline
comments, especially stuff relating to databases.
Except for 1 thing, every integration with databases is already
fully decoupled.
Automatic cancellation on rollback works by having the caller pass
on an IDatabase object to addCallableUpdate() for that specific
update, which works even if it came from an unknown LBFactory
or unknown service containter.
Oppertunistic execution is triggered by service wiring, where
MWLBFactory takes responsibility for having LBFactory notify the
DeferredUpdates singleton; not the other way around.
Bug: T265749
Change-Id: I048d22ffe2fa3838d9a5f4aa4128c756185a6b2e
Diffstat (limited to 'includes/deferred')
-rw-r--r-- | includes/deferred/AtomicSectionUpdate.php | 4 | ||||
-rw-r--r-- | includes/deferred/AutoCommitUpdate.php | 3 | ||||
-rw-r--r-- | includes/deferred/DeferredUpdates.php | 155 | ||||
-rw-r--r-- | includes/deferred/MWCallableUpdate.php | 8 |
4 files changed, 102 insertions, 68 deletions
diff --git a/includes/deferred/AtomicSectionUpdate.php b/includes/deferred/AtomicSectionUpdate.php index 7042ed726df9..07517c7ef251 100644 --- a/includes/deferred/AtomicSectionUpdate.php +++ b/includes/deferred/AtomicSectionUpdate.php @@ -15,11 +15,11 @@ class AtomicSectionUpdate implements DeferrableUpdate, DeferrableCallback { private $callback; /** + * @see IDatabase::doAtomicSection() * @param IDatabase $dbw DB handle; update aborts if a transaction now this rolls back * @param string $fname Caller name (usually __METHOD__) * @param callable $callback - * @param IDatabase[] $conns Abort if a transaction now on one of these rolls back [optional] - * @see IDatabase::doAtomicSection() + * @param IDatabase[] $conns Cancel the update if a DB transaction is rolled back [optional] */ public function __construct( IDatabase $dbw, $fname, callable $callback, array $conns = [] ) { $this->dbw = $dbw; diff --git a/includes/deferred/AutoCommitUpdate.php b/includes/deferred/AutoCommitUpdate.php index 773bbb2daa4e..c1da75cff7fb 100644 --- a/includes/deferred/AutoCommitUpdate.php +++ b/includes/deferred/AutoCommitUpdate.php @@ -18,7 +18,8 @@ class AutoCommitUpdate implements DeferrableUpdate, DeferrableCallback { * @param IDatabase $dbw DB handle; update aborts if a transaction now this rolls back * @param string $fname Caller name (usually __METHOD__) * @param callable $callback Callback that takes (IDatabase, method name string) - * @param IDatabase[] $conns Abort if a transaction now on one of these rolls back [optional] + * @param IDatabase[] $conns Cancel the update if a transaction on these + * connections is rolled back [optional] */ public function __construct( IDatabase $dbw, $fname, callable $callback, array $conns = [] ) { $this->dbw = $dbw; diff --git a/includes/deferred/DeferredUpdates.php b/includes/deferred/DeferredUpdates.php index 4c13744233ed..2d8ef23915eb 100644 --- a/includes/deferred/DeferredUpdates.php +++ b/includes/deferred/DeferredUpdates.php @@ -1,7 +1,5 @@ <?php /** - * Interface and manager for deferred updates. - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -28,51 +26,70 @@ use Wikimedia\Rdbms\ILBFactory; use Wikimedia\ScopedCallback; /** - * Class for managing the deferral of updates within the scope of a PHP script invocation + * Defer callable updates to run later in the PHP process + * + * This is a performance feature that enables MediaWiki to produce faster web responses. + * It allows you to postpone non-blocking work (e.g. work that does not change the web + * response) to after the HTTP response has been sent to the client (i.e. web browser). + * + * Once the response is finalized and sent to the browser, the webserver process stays + * for a little while longer (detached from the web request) to run your POSTSEND tasks. + * + * There is also a PRESEND option, which runs your task right before the finalized response + * is sent to the browser. This is for critical tasks that does need to block the response, + * but where you'd like to benefit from other DeferredUpdates features. Such as: + * + * - MergeableUpdate: batch updates from different components without coupling + * or awareness of each other. + * - Automatic cancellation: pass a IDatabase object (for any wiki or database) to + * DeferredUpdates::addCallableUpdate or AtomicSectionUpdate. + * - Reducing lock contention: if the response is likely to take several seconds + * (e.g. uploading a large file to FileBackend, or saving an edit to a large article) + * much of that work may overlap with a database transaction that is staying open for + * the entire duration. By moving contentious writes out to a PRESEND update, these + * get their own transaction (after the main one is committed), which give up some + * atomicity for improved throughput. + * + * ## Expectation and comparison to job queue + * + * When scheduling a POSTSEND via the DeferredUpdates system you can generally expect + * it to complete well before the client makes their next request. Updates runs directly after + * the web response is sent, from the same process on the same server. This unlike the JobQueue, + * where jobs may need to wait in line for some minutes or hours. + * + * If your update fails, this failure is not known to the client and gets no retry. For updates + * that need re-tries for system consistency or data integrity, it is recommended to implement + * it as a job instead and use JobQueueGroup::lazyPush. This has the caveat of being delayed + * by default, the same as any other job. + * + * A hybrid solution is available via the EnqueueableDataUpdate interface. By implementing + * this interface, you can queue your update via the DeferredUpdates first, and if it fails, + * the system will automatically catch this and queue it as a job instead. + * + * ## How it works during web requests * - * In web request mode, deferred updates run at the end of request execution, after the main - * database transaction round ends, and either before (PRESEND) or after (POSTSEND) the HTTP - * response has been sent. If an update runs after the HTTP response is sent, it will not block - * clients. Otherwise, the client will not see the response until the update finishes. Use the - * PRESEND and POSTSEND class constants to specify when an update should run. POSTSEND is the - * default for DeferredUpdates::addUpdate() and DeferredUpdates::addCallableUpdate(). An update - * that might need to alter the HTTP response output must use PRESEND. The control flow with - * regard to deferred updates during a typical state changing web request is as follows: - * - 1) Main transaction round starts - * - 2) Various writes to RBMS/file/blob stores and deferred updates enqueued - * - 3) Main transaction round ends - * - 4) PRESEND pending update queue is B1...BN - * - 5) B1 runs, resulting PRESEND updates iteratively run in FIFO order; likewise for B2..BN - * - 6) The web response is sent out to the client - * - 7) POSTSEND pending update queue is A1...AM - * - 8) A1 runs, resulting updates iteratively run in FIFO order; likewise for A2..AM + * 1. Your request route is executed (e.g. Action or SpecialPage class, or API). + * 2. Output is finalized and main database transaction is committed. + * 3. PRESEND updates run via DeferredUpdates::doUpdates. + * 5. The web response is sent to the browser. + * 6. POSTSEND updates run via DeferredUpdates::doUpdates. * - * @see MediaWiki::restInPeace() + * @see MediaWiki::preOutputCommit + * @see MediaWiki::restInPeace * - * In CLI mode, no distinction is made between PRESEND and POSTSEND deferred updates and all of - * them will run during the following occasions: - * - a) During DeferredUpdates::addUpdate() if no LBFactory DB handles have writes pending - * - b) On commit of an LBFactory DB handle if no other such handles have writes pending - * - c) During an LBFactory::waitForReplication call if no LBFactory DBs have writes pending - * - d) When the queue is large and an LBFactory DB handle commits (EnqueueableDataUpdate only) - * - e) Upon the completion of Maintenance::execute() via Maintenance::shutdown() + * ## How it works for Maintenance scripts * - * @see MWLBFactory::applyGlobalState() + * In CLI mode, no distinction is made between PRESEND and POSTSEND deferred updates, + * and the queue is periodically executed throughout the process. * - * If DeferredUpdates::doUpdates() is currently running a deferred update, then the public - * DeferredUpdates interface operates on the PRESEND/POSTSEND "sub"-queues that correspond to - * the innermost in-progress deferred update. Otherwise, the public interface operates on the - * PRESEND/POSTSEND "top"-queues. Affected methods include: - * - DeferredUpdates::addUpdate() - * - DeferredUpdates::addCallableUpdate() - * - DeferredUpdates::doUpdates() - * - DeferredUpdates::tryOpportunisticExecute() - * - DeferredUpdates::pendingUpdatesCount() - * - DeferredUpdates::getPendingUpdates() - * - DeferredUpdates::clearPendingUpdates() + * @see DeferredUpdates::tryOpportunisticExecute * - * Updates that work through this system will be more likely to complete by the time the - * client makes their next request after this request than with the JobQueue system. + * ## How it works internally + * + * Each update is added via DeferredUpdates::addUpdate and stored in either the PRESEND or + * POSTSEND queue. If an update gets queued while another update is already running, then + * we store in a "sub"-queue associated with the current update. This allows nested updates + * to be completed before other updates, which improves ordering for process caching. * * @since 1.19 */ @@ -133,11 +150,6 @@ class DeferredUpdates { global $wgCommandLineMode; self::getScopeStack()->current()->addUpdate( $update, $stage ); - // If CLI mode is active and no RDBMs transaction round is in the way, then run all - // the pending updates now. This is needed for scripts that never, or rarely, use the - // RDBMs layer, but that do modify systems via deferred updates. This logic avoids - // excessive pending update queue sizes when long-running scripts never trigger the - // basic RDBMs hooks for running pending updates. if ( $wgCommandLineMode ) { self::tryOpportunisticExecute(); } @@ -146,12 +158,10 @@ class DeferredUpdates { /** * Add an update to the pending update queue that invokes the specified callback when run * - * @see DeferredUpdates::addUpdate() - * @see MWCallableUpdate::__construct() - * * @param callable $callable * @param int $stage One of (DeferredUpdates::PRESEND, DeferredUpdates::POSTSEND) - * @param IDatabase|IDatabase[]|null $dbw Abort if this DB is rolled back [optional] + * @param IDatabase|IDatabase[]|null $dbw Cancel the update if a DB transaction + * is rolled back [optional] * @since 1.27 Added $stage parameter * @since 1.28 Added the $dbw parameter */ @@ -342,16 +352,35 @@ class DeferredUpdates { } /** - * Consume and execute all pending updates unless an update is already - * in progress or the ILBFactory service instance has "busy" DB handles + * Consume and execute pending updates now if possible, instead of waiting. + * + * In web requests, updates are always deferred until the end of the request. + * + * In CLI mode, updates run earlier and more often. This is important for long-running + * Maintenance scripts that would otherwise grow an excessively large queue, which increases + * memory use, and risks losing all updates if the script ends early or crashes. + * + * The folllowing conditions are required for updates to run early in CLI mode: + * + * - No update is already in progress (ensure linear flow, recursion guard). + * - LBFactory indicates that we don't have any "busy" database connections, i.e. + * there are no pending writes or otherwise active and uncommitted transactions, + * except if the transaction is empty and merely used for primary DB read queries, + * in which case the transaction (and its repeatable-read snapshot) can be safely flushed. + * + * How this works: + * + * - When a maintenance script commits a change or waits for replication, such as + * via. IConnectionProvider::commitAndWaitForReplication, then ILBFactory calls + * tryOpportunisticExecute(). This is injected via MWLBFactory::applyGlobalState. + * + * - For maintenance scripts that don't do much with the database, we also call + * tryOpportunisticExecute() after every addUpdate() call. * - * A DB handle is considered "busy" if it has an unfinished transaction that cannot safely - * be flushed or the parent ILBFactory instance has an unfinished transaction round that - * cannot safely be flushed. If the number of pending updates reaches BIG_QUEUE_SIZE and - * there are still busy DB handles, then EnqueueableDataUpdate updates might be enqueued - * as jobs. This avoids excessive memory use and risk of losing updates due to failures. + * - Upon the completion of Maintenance::execute() via Maintenance::shutdown(), + * any remaining updates are run. * - * Note that this method operates on updates from all stages and thus should not be called + * Note that this method runs both PRESEND and POSTSEND updates and thus should not be called * during web requests. It is only intended for long-running Maintenance scripts. * * @internal For use by Maintenance @@ -458,11 +487,13 @@ class DeferredUpdates { } /** - * Attempt to run an update with the appropriate transaction round state it expects + * Attempt to run an update with the appropriate transaction round state if needed * - * DeferredUpdate classes that wrap the execution of bundles of other DeferredUpdate - * instances can use this method to run the updates. Any such wrapper class should - * always use TRX_ROUND_ABSENT itself. + * It is allowed for a DeferredUpdate to directly execute one or more other DeferredUpdate + * instances without queueing them by calling this method. In that case, the outer update + * must use TransactionRoundAwareUpdate::TRX_ROUND_ABSENT, e.g. by extending + * TransactionRoundDefiningUpdate, so that this method can give each update its own + * transaction round. * * @param DeferrableUpdate $update * @param ILBFactory $lbFactory diff --git a/includes/deferred/MWCallableUpdate.php b/includes/deferred/MWCallableUpdate.php index 91fb53db4eba..905f7ca4a714 100644 --- a/includes/deferred/MWCallableUpdate.php +++ b/includes/deferred/MWCallableUpdate.php @@ -3,7 +3,9 @@ use Wikimedia\Rdbms\IDatabase; /** - * Deferrable Update for closure/callback + * DeferrableUpdate for closure/callable + * + * @internal Use DeferredUpdates::addCallableUpdate instead */ class MWCallableUpdate implements DeferrableUpdate, DeferrableCallback, TransactionRoundAwareUpdate @@ -18,8 +20,8 @@ class MWCallableUpdate /** * @param callable $callback * @param string $fname Calling method - * @param IDatabase|IDatabase[]|null $dbws Abort if any of the specified DB handles have - * a currently pending transaction which later gets rolled back [optional] (since 1.28) + * @param IDatabase|IDatabase[]|null $dbws Cancel the update if a DB transaction + * is rolled back [optional] (since 1.28) */ public function __construct( callable $callback, $fname = 'unknown', $dbws = [] ) { $this->callback = $callback; |