diff options
author | Bill Pirkle <bpirkle@wikimedia.org> | 2018-10-31 18:02:02 -0500 |
---|---|---|
committer | Bill Pirkle <bpirkle@wikimedia.org> | 2018-10-31 18:02:02 -0500 |
commit | 94ec06e0bfead0173e2aae609d6d7326d0dd804c (patch) | |
tree | 1d0024140ccd52e747c0fcd34db90e21b4df0324 /includes/export/WikiExporter.php | |
parent | 47c9437835fe68e69b723f454d682064688bdad9 (diff) | |
download | mediawikicore-94ec06e0bfead0173e2aae609d6d7326d0dd804c.tar.gz mediawikicore-94ec06e0bfead0173e2aae609d6d7326d0dd804c.zip |
Fix for missing end tag </page> on some exports
T203424 replaced streaming mode with batched queries.
However, it did not properly handle some values of
the $wgExportMaxHistory config variable, and emitted
broken XML. This change fixes that issue.
Bug: T207974
Change-Id: Iade3fc603e513da51b7a970c16275516c02ede49
Diffstat (limited to 'includes/export/WikiExporter.php')
-rw-r--r-- | includes/export/WikiExporter.php | 78 |
1 files changed, 42 insertions, 36 deletions
diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php index 32f75198bb27..e6b9719edd36 100644 --- a/includes/export/WikiExporter.php +++ b/includes/export/WikiExporter.php @@ -418,7 +418,7 @@ class WikiExporter { $queryConds[] = 'rev_page>' . intval( $revPage ) . ' OR (rev_page=' . intval( $revPage ) . ' AND rev_id' . $op . intval( $revId ) . ')'; - # Do the query! + # Do the query and process any results, remembering max ids for the next iteration. $result = $this->db->select( $tables, $fields, @@ -427,15 +427,18 @@ class WikiExporter { $opts, $join ); - # Output dump results, get new max ids. - $lastRow = $this->outputPageStream( $result, $lastRow ); - - if ( !$result->numRows() || !$lastRow ) { - $done = true; - } else { + if ( $result->numRows() > 0 ) { + $lastRow = $this->outputPageStreamBatch( $result, $lastRow ); $rowCount += $result->numRows(); $revPage = $lastRow->rev_page; $revId = $lastRow->rev_id; + } else { + $done = true; + } + + // If we are finished, close off final page element (if any). + if ( $done && $lastRow ) { + $this->finishPageStreamOutput( $lastRow ); } } } @@ -445,47 +448,50 @@ class WikiExporter { * The result set should be sorted/grouped by page to avoid duplicate * page records in the output. * - * @param ResultWrapper $resultset + * @param ResultWrapper $results * @param object $lastRow the last row output from the previous call (or null if none) * @return object the last row processed */ - protected function outputPageStream( $resultset, $lastRow ) { - if ( $resultset->numRows() ) { - foreach ( $resultset as $row ) { - if ( $lastRow === null || - $lastRow->page_namespace != $row->page_namespace || - $lastRow->page_title != $row->page_title ) { - if ( $lastRow !== null ) { - $output = ''; - if ( $this->dumpUploads ) { - $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents ); - } - $output .= $this->writer->closePage(); - $this->sink->writeClosePage( $output ); + protected function outputPageStreamBatch( $results, $lastRow ) { + foreach ( $results as $row ) { + if ( $lastRow === null || + $lastRow->page_namespace != $row->page_namespace || + $lastRow->page_title != $row->page_title ) { + if ( $lastRow !== null ) { + $output = ''; + if ( $this->dumpUploads ) { + $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents ); } - $output = $this->writer->openPage( $row ); - $this->sink->writeOpenPage( $row, $output ); + $output .= $this->writer->closePage(); + $this->sink->writeClosePage( $output ); } - $output = $this->writer->writeRevision( $row ); - $this->sink->writeRevision( $row, $output ); - $lastRow = $row; + $output = $this->writer->openPage( $row ); + $this->sink->writeOpenPage( $row, $output ); } - } elseif ( $lastRow !== null ) { - // Empty resultset means done with all batches Close off final page element (if any). - $output = ''; - if ( $this->dumpUploads ) { - $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents ); - } - $output .= $this->author_list; - $output .= $this->writer->closePage(); - $this->sink->writeClosePage( $output ); - $lastRow = null; + $output = $this->writer->writeRevision( $row ); + $this->sink->writeRevision( $row, $output ); + $lastRow = $row; } return $lastRow; } /** + * Final page stream output, after all batches are complete + * + * @param object $lastRow the last row output from the last batch (or null if none) + */ + protected function finishPageStreamOutput( $lastRow ) { + $output = ''; + if ( $this->dumpUploads ) { + $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents ); + } + $output .= $this->author_list; + $output .= $this->writer->closePage(); + $this->sink->writeClosePage( $output ); + } + + /** * @param ResultWrapper $resultset * @return int the log_id value of the last item output, or null if none */ |