aboutsummaryrefslogtreecommitdiffstats
path: root/includes/export/WikiExporter.php
diff options
context:
space:
mode:
authorBill Pirkle <bpirkle@wikimedia.org>2018-10-31 18:02:02 -0500
committerBill Pirkle <bpirkle@wikimedia.org>2018-10-31 18:02:02 -0500
commit94ec06e0bfead0173e2aae609d6d7326d0dd804c (patch)
tree1d0024140ccd52e747c0fcd34db90e21b4df0324 /includes/export/WikiExporter.php
parent47c9437835fe68e69b723f454d682064688bdad9 (diff)
downloadmediawikicore-94ec06e0bfead0173e2aae609d6d7326d0dd804c.tar.gz
mediawikicore-94ec06e0bfead0173e2aae609d6d7326d0dd804c.zip
Fix for missing end tag </page> on some exports
T203424 replaced streaming mode with batched queries. However, it did not properly handle some values of the $wgExportMaxHistory config variable, and emitted broken XML. This change fixes that issue. Bug: T207974 Change-Id: Iade3fc603e513da51b7a970c16275516c02ede49
Diffstat (limited to 'includes/export/WikiExporter.php')
-rw-r--r--includes/export/WikiExporter.php78
1 files changed, 42 insertions, 36 deletions
diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php
index 32f75198bb27..e6b9719edd36 100644
--- a/includes/export/WikiExporter.php
+++ b/includes/export/WikiExporter.php
@@ -418,7 +418,7 @@ class WikiExporter {
$queryConds[] = 'rev_page>' . intval( $revPage ) . ' OR (rev_page=' .
intval( $revPage ) . ' AND rev_id' . $op . intval( $revId ) . ')';
- # Do the query!
+ # Do the query and process any results, remembering max ids for the next iteration.
$result = $this->db->select(
$tables,
$fields,
@@ -427,15 +427,18 @@ class WikiExporter {
$opts,
$join
);
- # Output dump results, get new max ids.
- $lastRow = $this->outputPageStream( $result, $lastRow );
-
- if ( !$result->numRows() || !$lastRow ) {
- $done = true;
- } else {
+ if ( $result->numRows() > 0 ) {
+ $lastRow = $this->outputPageStreamBatch( $result, $lastRow );
$rowCount += $result->numRows();
$revPage = $lastRow->rev_page;
$revId = $lastRow->rev_id;
+ } else {
+ $done = true;
+ }
+
+ // If we are finished, close off final page element (if any).
+ if ( $done && $lastRow ) {
+ $this->finishPageStreamOutput( $lastRow );
}
}
}
@@ -445,47 +448,50 @@ class WikiExporter {
* The result set should be sorted/grouped by page to avoid duplicate
* page records in the output.
*
- * @param ResultWrapper $resultset
+ * @param ResultWrapper $results
* @param object $lastRow the last row output from the previous call (or null if none)
* @return object the last row processed
*/
- protected function outputPageStream( $resultset, $lastRow ) {
- if ( $resultset->numRows() ) {
- foreach ( $resultset as $row ) {
- if ( $lastRow === null ||
- $lastRow->page_namespace != $row->page_namespace ||
- $lastRow->page_title != $row->page_title ) {
- if ( $lastRow !== null ) {
- $output = '';
- if ( $this->dumpUploads ) {
- $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
- }
- $output .= $this->writer->closePage();
- $this->sink->writeClosePage( $output );
+ protected function outputPageStreamBatch( $results, $lastRow ) {
+ foreach ( $results as $row ) {
+ if ( $lastRow === null ||
+ $lastRow->page_namespace != $row->page_namespace ||
+ $lastRow->page_title != $row->page_title ) {
+ if ( $lastRow !== null ) {
+ $output = '';
+ if ( $this->dumpUploads ) {
+ $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
}
- $output = $this->writer->openPage( $row );
- $this->sink->writeOpenPage( $row, $output );
+ $output .= $this->writer->closePage();
+ $this->sink->writeClosePage( $output );
}
- $output = $this->writer->writeRevision( $row );
- $this->sink->writeRevision( $row, $output );
- $lastRow = $row;
+ $output = $this->writer->openPage( $row );
+ $this->sink->writeOpenPage( $row, $output );
}
- } elseif ( $lastRow !== null ) {
- // Empty resultset means done with all batches Close off final page element (if any).
- $output = '';
- if ( $this->dumpUploads ) {
- $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
- }
- $output .= $this->author_list;
- $output .= $this->writer->closePage();
- $this->sink->writeClosePage( $output );
- $lastRow = null;
+ $output = $this->writer->writeRevision( $row );
+ $this->sink->writeRevision( $row, $output );
+ $lastRow = $row;
}
return $lastRow;
}
/**
+ * Final page stream output, after all batches are complete
+ *
+ * @param object $lastRow the last row output from the last batch (or null if none)
+ */
+ protected function finishPageStreamOutput( $lastRow ) {
+ $output = '';
+ if ( $this->dumpUploads ) {
+ $output .= $this->writer->writeUploads( $lastRow, $this->dumpUploadFileContents );
+ }
+ $output .= $this->author_list;
+ $output .= $this->writer->closePage();
+ $this->sink->writeClosePage( $output );
+ }
+
+ /**
* @param ResultWrapper $resultset
* @return int the log_id value of the last item output, or null if none
*/