Rewrite importImages.php to use Maintenance infrastructure

I even tested it! Change-Id: I999f5842625c752f01c1eb4b012431c4a24ce7ce
author: Max Semenik <maxsem.wiki@gmail.com> 2016-12-16 14:45:20 -0800
committer: Kunal Mehta <legoktm@member.fsf.org> 2016-12-18 04:46:39 -0800
commit: e7caa2f2982060a0884a3e752d50a58b0408aea1 (patch)
tree: 9db8a78c4c0afc9f13fcd7c967d86d8b2e6cb0ef
parent: 8a112ec84e5977074beb031bc9f32d0ad56d8ed5 (diff)
download: mediawikicore-e7caa2f2982060a0884a3e752d50a58b0408aea1.tar.gz
mediawikicore-e7caa2f2982060a0884a3e752d50a58b0408aea1.zip
3 files changed, 428 insertions, 446 deletions
diff --git a/autoload.php b/autoload.php
index 6dbcc1d6bd31..00c29914873e 100644
--- a/autoload.php
+++ b/autoload.php
@@ -616,6 +616,7 @@ $wgAutoloadLocalClasses = [
 	'ImageListPager' => __DIR__ . '/includes/specials/pagers/ImageListPager.php',
 	'ImagePage' => __DIR__ . '/includes/page/ImagePage.php',
 	'ImageQueryPage' => __DIR__ . '/includes/specialpage/ImageQueryPage.php',
+	'ImportImages' => __DIR__ . '/maintenance/importImages.php',
 	'ImportLogFormatter' => __DIR__ . '/includes/logging/ImportLogFormatter.php',
 	'ImportReporter' => __DIR__ . '/includes/specials/SpecialImport.php',
 	'ImportSiteScripts' => __DIR__ . '/maintenance/importSiteScripts.php',
diff --git a/maintenance/importImages.inc b/maintenance/importImages.inc
deleted file mode 100644
index fc9428d7aa05..000000000000
--- a/maintenance/importImages.inc
+++ /dev/null
@@ -1,137 +0,0 @@
-<?php
-/**
- * Support functions for the importImages.php script
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Maintenance
- * @author Rob Church <robchur@gmail.com>
- * @author Mij <mij@bitchx.it>
- */
-
-/**
- * Search a directory for files with one of a set of extensions
- *
- * @param string $dir Path to directory to search
- * @param array $exts Array of extensions to search for
- * @param bool $recurse Search subdirectories recursively
- * @return array|bool Array of filenames on success, or false on failure
- */
-function findFiles( $dir, $exts, $recurse = false ) {
-	if ( is_dir( $dir ) ) {
-		$dhl = opendir( $dir );
-		if ( $dhl ) {
-			$files = [];
-			while ( ( $file = readdir( $dhl ) ) !== false ) {
-				if ( is_file( $dir . '/' . $file ) ) {
-					list( /* $name */, $ext ) = splitFilename( $dir . '/' . $file );
-					if ( array_search( strtolower( $ext ), $exts ) !== false ) {
-						$files[] = $dir . '/' . $file;
-					}
-				} elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
-					$files = array_merge( $files, findFiles( $dir . '/' . $file, $exts, true ) );
-				}
-			}
-
-			return $files;
-		} else {
-			return [];
-		}
-	} else {
-		return [];
-	}
-}
-
-/**
- * Split a filename into filename and extension
- *
- * @param string $filename Filename
- * @return array
- */
-function splitFilename( $filename ) {
-	$parts = explode( '.', $filename );
-	$ext = $parts[count( $parts ) - 1];
-	unset( $parts[count( $parts ) - 1] );
-	$fname = implode( '.', $parts );
-
-	return [ $fname, $ext ];
-}
-
-/**
- * Find an auxilliary file with the given extension, matching
- * the give base file path. $maxStrip determines how many extensions
- * may be stripped from the original file name before appending the
- * new extension. For example, with $maxStrip = 1 (the default),
- * file files acme.foo.bar.txt and acme.foo.txt would be auxilliary
- * files for acme.foo.bar and the extension ".txt". With $maxStrip = 2,
- * acme.txt would also be acceptable.
- *
- * @param string $file Base path
- * @param string $auxExtension The extension to be appended to the base path
- * @param int $maxStrip The maximum number of extensions to strip from the base path (default: 1)
- * @return string|bool
- */
-function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
-	if ( strpos( $auxExtension, '.' ) !== 0 ) {
-		$auxExtension = '.' . $auxExtension;
-	}
-
-	$d = dirname( $file );
-	$n = basename( $file );
-
-	while ( $maxStrip >= 0 ) {
-		$f = $d . '/' . $n . $auxExtension;
-
-		if ( file_exists( $f ) ) {
-			return $f;
-		}
-
-		$idx = strrpos( $n, '.' );
-		if ( !$idx ) {
-			break;
-		}
-
-		$n = substr( $n, 0, $idx );
-		$maxStrip -= 1;
-	}
-
-	return false;
-}
-
-# @todo FIXME: Access the api in a saner way and performing just one query
-# (preferably batching files too).
-function getFileCommentFromSourceWiki( $wiki_host, $file ) {
-	$url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
-		. rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
-	$body = Http::get( $url, [], __METHOD__ );
-	if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
-		return false;
-	}
-
-	return html_entity_decode( $matches[1] );
-}
-
-function getFileUserFromSourceWiki( $wiki_host, $file ) {
-	$url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
-		. rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
-	$body = Http::get( $url, [], __METHOD__ );
-	if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
-		return false;
-	}
-
-	return html_entity_decode( $matches[1] );
-}
diff --git a/maintenance/importImages.php b/maintenance/importImages.php
index 5a4ab394032d..7f2a9e1de0ee 100644
--- a/maintenance/importImages.php
+++ b/maintenance/importImages.php
@@ -32,373 +32,491 @@
  * @author Mij <mij@bitchx.it>
  */
 
-$optionsWithArgs = [
-	'extensions', 'comment', 'comment-file', 'comment-ext', 'summary', 'user',
-	'license', 'sleep', 'limit', 'from', 'source-wiki-url', 'timestamp',
-];
+require_once __DIR__ . '/Maintenance.php';
+
+class ImportImages extends Maintenance {
+
+	public function __construct() {
+		parent::__construct();
+
+		$this->addDescription( 'Imports images and other media files into the wiki' );
+		$this->addArg( 'dir', 'Path to the directory containing images to be imported' );
+
+		$this->addOption( 'extensions',
+			'Comma-separated list of allowable extensions, defaults to $wgFileExtensions',
+			false,
+			true
+		);
+		$this->addOption( 'overwrite',
+			'Overwrite existing images with the same name (default is to skip them)' );
+		$this->addOption( 'limit',
+			'Limit the number of images to process. Ignored or skipped images are not counted',
+			false,
+			true
+		);
+		$this->addOption( 'from',
+			"Ignore all files until the one with the given name. Useful for resuming aborted "
+				. "imports. The name should be the file's canonical database form.",
+			false,
+			true
+		);
+		$this->addOption( 'skip-dupes',
+			'Skip images that were already uploaded under a different name (check SHA1)' );
+		$this->addOption( 'search-recursively', 'Search recursively for files in subdirectories' );
+		$this->addOption( 'sleep',
+			'Sleep between files. Useful mostly for debugging',
+			false,
+			true
+		);
+		$this->addOption( 'user',
+			"Set username of uploader, default 'Maintenance script'",
+			false,
+			true
+		);
+		// This parameter can optionally have an argument. If none specified, getOption()
+		// returns 1 which is precisely what we need.
+		$this->addOption( 'check-userblock', 'Check if the user got blocked during import' );
+		$this->addOption( 'comment',
+			"Set file description, default 'Importing file'",
+			false,
+			true
+		);
+		$this->addOption( 'comment-file',
+			'Set description to the content of this file',
+			false,
+			true
+		);
+		$this->addOption( 'comment-ext',
+			'Causes the description for each file to be loaded from a file with the same name, but '
+				. 'the extension provided. If a global description is also given, it is appended.',
+			false,
+			true
+		);
+		$this->addOption( 'summary',
+			'Upload summary, description will be used if not provided',
+			false,
+			true
+		);
+		$this->addOption( 'license',
+			'Use an optional license template',
+			false,
+			true
+		);
+		$this->addOption( 'timestamp',
+			'Override upload time/date, all MediaWiki timestamp formats are accepted',
+			false,
+			true
+		);
+		$this->addOption( 'protect',
+			'Specify the protect value (autoconfirmed,sysop)',
+			false,
+			true
+		);
+		$this->addOption( 'unprotect', 'Unprotects all uploaded images' );
+		$this->addOption( 'source-wiki-url',
+			'If specified, take User and Comment data for each imported file from this URL. '
+				. 'For example, --source-wiki-url="http://en.wikipedia.org/',
+			false,
+			true
+		);
+		$this->addOption( 'dry', "Dry run, don't import anything" );
+	}
 
-$optionsWithoutArgs = [
-	'protect', 'unprotect', 'search-recursively', 'check-userblock', 'overwrite',
-	'skip-dupes', 'dry'
-];
+	public function execute() {
+		global $wgFileExtensions, $wgUser, $wgRestrictionLevels;
 
-require_once __DIR__ . '/commandLine.inc';
-require_once __DIR__ . '/importImages.inc';
-$processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
+		$processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
 
-echo "Import Images\n\n";
+		$this->output( "Import Images\n\n" );
 
-# Need a path
-if ( count( $args ) == 0 ) {
-	showUsage();
-}
+		$dir = $this->getArg( 0 );
 
-$dir = $args[0];
+		# Check Protection
+		if ( $this->hasOption( 'protect' ) && $this->hasOption( 'unprotect' ) ) {
+			$this->error( "Cannot specify both protect and unprotect.  Only 1 is allowed.\n", 1 );
+		}
 
-# Check Protection
-if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) ) {
-	die( "Cannot specify both protect and unprotect.  Only 1 is allowed.\n" );
-}
+		if ( $this->hasOption( 'protect' ) && trim( $this->getOption( 'protect' ) ) ) {
+			$this->error( "You must specify a protection option.\n", 1 );
+		}
 
-if ( isset( $options['protect'] ) && $options['protect'] == 1 ) {
-	die( "You must specify a protection option.\n" );
-}
+		# Prepare the list of allowed extensions
+		$extensions = $this->hasOption( 'extensions' )
+			? explode( ',', strtolower( $this->getOption( 'extensions' ) ) )
+			: $wgFileExtensions;
 
-# Prepare the list of allowed extensions
-global $wgFileExtensions;
-$extensions = isset( $options['extensions'] )
-	? explode( ',', strtolower( $options['extensions'] ) )
-	: $wgFileExtensions;
-
-# Search the path provided for candidates for import
-$files = findFiles( $dir, $extensions, isset( $options['search-recursively'] ) );
-
-# Initialise the user for this operation
-$user = isset( $options['user'] )
-	? User::newFromName( $options['user'] )
-	: User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
-if ( !$user instanceof User ) {
-	$user = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
-}
-$wgUser = $user;
-
-# Get block check. If a value is given, this specified how often the check is performed
-if ( isset( $options['check-userblock'] ) ) {
-	if ( !$options['check-userblock'] ) {
-		$checkUserBlock = 1;
-	} else {
-		$checkUserBlock = (int)$options['check-userblock'];
-	}
-} else {
-	$checkUserBlock = false;
-}
+		# Search the path provided for candidates for import
+		$files = $this->findFiles( $dir, $extensions, $this->hasOption( 'search-recursively' ) );
 
-# Get --from
-MediaWiki\suppressWarnings();
-$from = $options['from'];
-MediaWiki\restoreWarnings();
+		# Initialise the user for this operation
+		$user = $this->hasOption( 'user' )
+			? User::newFromName( $this->getOption( 'user' ) )
+			: User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
+		if ( !$user instanceof User ) {
+			$user = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
+		}
+		$wgUser = $user;
+
+		# Get block check. If a value is given, this specified how often the check is performed
+		$checkUserBlock = (int)$this->getOption( 'check-userblock' );
+
+		$from = $this->getOption( 'from' );
+		$sleep = (int)$this->getOption( 'sleep' );
+		$limit = (int)$this->getOption( 'limit' );
+		$timestamp = $this->getOption( 'timestamp', false );
+
+		# Get the upload comment. Provide a default one in case there's no comment given.
+		$commentFile = $this->getOption( 'comment-file' );
+		if ( $commentFile !== null ) {
+			$comment = file_get_contents( $commentFile );
+			if ( $comment === false || $comment === null ) {
+				$this->error( "failed to read comment file: {$commentFile}\n", 1 );
+			}
+		} else {
+			$comment = $this->getOption( 'comment', 'Importing file' );
+		}
+		$commentExt = $this->getOption( 'comment-ext' );
+		$summary = $this->getOption( 'summary', '' );
 
-# Get sleep time.
-MediaWiki\suppressWarnings();
-$sleep = $options['sleep'];
-MediaWiki\restoreWarnings();
+		$license = $this->getOption( 'license', '' );
 
-if ( $sleep ) {
-	$sleep = (int)$sleep;
-}
+		$sourceWikiUrl = $this->getOption( 'source-wiki-url' );
 
-# Get limit number
-MediaWiki\suppressWarnings();
-$limit = $options['limit'];
-MediaWiki\restoreWarnings();
+		# Batch "upload" operation
+		$count = count( $files );
+		if ( $count > 0 ) {
 
-if ( $limit ) {
-	$limit = (int)$limit;
-}
+			foreach ( $files as $file ) {
+				$base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
 
-$timestamp = isset( $options['timestamp'] ) ? $options['timestamp'] : false;
+				# Validate a title
+				$title = Title::makeTitleSafe( NS_FILE, $base );
+				if ( !is_object( $title ) ) {
+					$this->output(
+						"{$base} could not be imported; a valid title cannot be produced\n" );
+					continue;
+				}
 
-# Get the upload comment. Provide a default one in case there's no comment given.
-$comment = 'Importing file';
+				if ( $from ) {
+					if ( $from == $title->getDBkey() ) {
+						$from = null;
+					} else {
+						$ignored++;
+						continue;
+					}
+				}
 
-if ( isset( $options['comment-file'] ) ) {
-	$comment = file_get_contents( $options['comment-file'] );
-	if ( $comment === false || $comment === null ) {
-		die( "failed to read comment file: {$options['comment-file']}\n" );
-	}
-} elseif ( isset( $options['comment'] ) ) {
-	$comment = $options['comment'];
-}
+				if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
+					$user->clearInstanceCache( 'name' ); // reload from DB!
+					if ( $user->isBlocked() ) {
+						$this->output( $user->getName() . " was blocked! Aborting.\n" );
+						break;
+					}
+				}
 
-$commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
+				# Check existence
+				$image = wfLocalFile( $title );
+				if ( $image->exists() ) {
+					if ( $this->hasOption( 'overwrite' ) ) {
+						$this->output( "{$base} exists, overwriting..." );
+						$svar = 'overwritten';
+					} else {
+						$this->output( "{$base} exists, skipping\n" );
+						$skipped++;
+						continue;
+					}
+				} else {
+					if ( $this->hasOption( 'skip-dupes' ) ) {
+						$repo = $image->getRepo();
+						# XXX: we end up calculating this again when actually uploading. that sucks.
+						$sha1 = FSFile::getSha1Base36FromPath( $file );
+
+						$dupes = $repo->findBySha1( $sha1 );
+
+						if ( $dupes ) {
+							$this->output(
+								"{$base} already exists as {$dupes[0]->getName()}, skipping\n" );
+							$skipped++;
+							continue;
+						}
+					}
 
-$summary = isset( $options['summary'] ) ? $options['summary'] : '';
+					$this->output( "Importing {$base}..." );
+					$svar = 'added';
+				}
 
-# Get the license specifier
-$license = isset( $options['license'] ) ? $options['license'] : '';
+				if ( $sourceWikiUrl ) {
+					/* find comment text directly from source wiki, through MW's API */
+					$real_comment = $this->getFileCommentFromSourceWiki( $sourceWikiUrl, $base );
+					if ( $real_comment === false ) {
+						$commentText = $comment;
+					} else {
+						$commentText = $real_comment;
+					}
 
-# Batch "upload" operation
-$count = count( $files );
-if ( $count > 0 ) {
+					/* find user directly from source wiki, through MW's API */
+					$real_user = $this->getFileUserFromSourceWiki( $sourceWikiUrl, $base );
+					if ( $real_user === false ) {
+						$wgUser = $user;
+					} else {
+						$wgUser = User::newFromName( $real_user );
+						if ( $wgUser === false ) {
+							# user does not exist in target wiki
+							$this->output(
+								"failed: user '$real_user' does not exist in target wiki." );
+							continue;
+						}
+					}
+				} else {
+					# Find comment text
+					$commentText = false;
+
+					if ( $commentExt ) {
+						$f = $this->findAuxFile( $file, $commentExt );
+						if ( !$f ) {
+							$this->output( " No comment file with extension {$commentExt} found "
+								 . "for {$file}, using default comment. " );
+						} else {
+							$commentText = file_get_contents( $f );
+							if ( !$commentText ) {
+								$this->output(
+									" Failed to load comment file {$f}, using default comment. " );
+							}
+						}
+					}
 
-	foreach ( $files as $file ) {
-		$base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
+					if ( !$commentText ) {
+						$commentText = $comment;
+					}
+				}
 
-		# Validate a title
-		$title = Title::makeTitleSafe( NS_FILE, $base );
-		if ( !is_object( $title ) ) {
-			echo "{$base} could not be imported; a valid title cannot be produced\n";
-			continue;
-		}
+				# Import the file
+				if ( $this->hasOption( 'dry' ) ) {
+					$this->output(
+						" publishing {$file} by '{$wgUser->getName()}', comment '$commentText'... "
+					);
+				} else {
+					$mwProps = new MWFileProps( MimeMagic::singleton() );
+					$props = $mwProps->getPropsFromPath( $file, true );
+					$flags = 0;
+					$publishOptions = [];
+					$handler = MediaHandler::getHandler( $props['mime'] );
+					if ( $handler ) {
+						$publishOptions['headers'] = $handler->getStreamHeaders( $props['metadata'] );
+					} else {
+						$publishOptions['headers'] = [];
+					}
+					$archive = $image->publish( $file, $flags, $publishOptions );
+					if ( !$archive->isGood() ) {
+						$this->output( "failed. (" .
+							 $archive->getWikiText( false, false, 'en' ) .
+							 ")\n" );
+						$failed++;
+						continue;
+					}
+				}
 
-		if ( $from ) {
-			if ( $from == $title->getDBkey() ) {
-				$from = null;
-			} else {
-				$ignored++;
-				continue;
-			}
-		}
+				$commentText = SpecialUpload::getInitialPageText( $commentText, $license );
+				if ( !$this->hasOption( 'summary' ) ) {
+					$summary = $commentText;
+				}
 
-		if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
-			$user->clearInstanceCache( 'name' ); // reload from DB!
-			if ( $user->isBlocked() ) {
-				echo $user->getName() . " was blocked! Aborting.\n";
-				break;
-			}
-		}
+				if ( $this->hasOption( 'dry' ) ) {
+					$this->output( "done.\n" );
+				} elseif ( $image->recordUpload2(
+					$archive->value,
+					$summary,
+					$commentText,
+					$props,
+					$timestamp
+				) ) {
+					# We're done!
+					$this->output( "done.\n" );
 
-		# Check existence
-		$image = wfLocalFile( $title );
-		if ( $image->exists() ) {
-			if ( isset( $options['overwrite'] ) ) {
-				echo "{$base} exists, overwriting...";
-				$svar = 'overwritten';
-			} else {
-				echo "{$base} exists, skipping\n";
-				$skipped++;
-				continue;
-			}
-		} else {
-			if ( isset( $options['skip-dupes'] ) ) {
-				$repo = $image->getRepo();
-				# XXX: we end up calculating this again when actually uploading. that sucks.
-				$sha1 = FSFile::getSha1Base36FromPath( $file );
+					$doProtect = false;
 
-				$dupes = $repo->findBySha1( $sha1 );
+					$protectLevel = $this->getOption( 'protect' );
 
-				if ( $dupes ) {
-					echo "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n";
-					$skipped++;
-					continue;
+					if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
+						$doProtect = true;
+					}
+					if ( $this->hasOption( 'unprotect' ) ) {
+						$protectLevel = '';
+						$doProtect = true;
+					}
+
+					if ( $doProtect ) {
+						# Protect the file
+						$this->output( "\nWaiting for replica DBs...\n" );
+						// Wait for replica DBs.
+						sleep( 2.0 ); # Why this sleep?
+						wfWaitForSlaves();
+
+						$this->output( "\nSetting image restrictions ... " );
+
+						$cascade = false;
+						$restrictions = [];
+						foreach ( $title->getRestrictionTypes() as $type ) {
+							$restrictions[$type] = $protectLevel;
+						}
+
+						$page = WikiPage::factory( $title );
+						$status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
+						$this->output( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
+					}
+				} else {
+					$this->output( "failed. (at recordUpload stage)\n" );
+					$svar = 'failed';
 				}
-			}
 
-			echo "Importing {$base}...";
-			$svar = 'added';
-		}
+				$$svar++;
+				$processed++;
 
-		if ( isset( $options['source-wiki-url'] ) ) {
-			/* find comment text directly from source wiki, through MW's API */
-			$real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
-			if ( $real_comment === false ) {
-				$commentText = $comment;
-			} else {
-				$commentText = $real_comment;
+				if ( $limit && $processed >= $limit ) {
+					break;
+				}
+
+				if ( $sleep ) {
+					sleep( $sleep );
+				}
 			}
 
-			/* find user directly from source wiki, through MW's API */
-			$real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
-			if ( $real_user === false ) {
-				$wgUser = $user;
-			} else {
-				$wgUser = User::newFromName( $real_user );
-				if ( $wgUser === false ) {
-					# user does not exist in target wiki
-					echo "failed: user '$real_user' does not exist in target wiki.";
-					continue;
+			# Print out some statistics
+			$this->output( "\n" );
+			foreach (
+				[
+					'count' => 'Found',
+					'limit' => 'Limit',
+					'ignored' => 'Ignored',
+					'added' => 'Added',
+					'skipped' => 'Skipped',
+					'overwritten' => 'Overwritten',
+					'failed' => 'Failed'
+				] as $var => $desc
+			) {
+				if ( $$var > 0 ) {
+					$this->output( "{$desc}: {$$var}\n" );
 				}
 			}
 		} else {
-			# Find comment text
-			$commentText = false;
-
-			if ( $commentExt ) {
-				$f = findAuxFile( $file, $commentExt );
-				if ( !$f ) {
-					echo " No comment file with extension {$commentExt} found "
-						. "for {$file}, using default comment. ";
-				} else {
-					$commentText = file_get_contents( $f );
-					if ( !$commentText ) {
-						echo " Failed to load comment file {$f}, using default comment. ";
+			$this->output( "No suitable files could be found for import.\n" );
+		}
+	}
+
+	/**
+	 * Search a directory for files with one of a set of extensions
+	 *
+	 * @param string $dir Path to directory to search
+	 * @param array $exts Array of extensions to search for
+	 * @param bool $recurse Search subdirectories recursively
+	 * @return array|bool Array of filenames on success, or false on failure
+	 */
+	private function findFiles( $dir, $exts, $recurse = false ) {
+		if ( is_dir( $dir ) ) {
+			$dhl = opendir( $dir );
+			if ( $dhl ) {
+				$files = [];
+				while ( ( $file = readdir( $dhl ) ) !== false ) {
+					if ( is_file( $dir . '/' . $file ) ) {
+						list( /* $name */, $ext ) = $this->splitFilename( $dir . '/' . $file );
+						if ( array_search( strtolower( $ext ), $exts ) !== false ) {
+							$files[] = $dir . '/' . $file;
+						}
+					} elseif ( $recurse && is_dir( $dir . '/' . $file ) && $file !== '..' && $file !== '.' ) {
+						$files = array_merge( $files, $this->findFiles( $dir . '/' . $file, $exts, true ) );
 					}
 				}
-			}
-
-			if ( !$commentText ) {
-				$commentText = $comment;
-			}
-		}
 
-		# Import the file
-		if ( isset( $options['dry'] ) ) {
-			echo " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... ";
-		} else {
-			$mwProps = new MWFileProps( MimeMagic::singleton() );
-			$props = $mwProps->getPropsFromPath( $file, true );
-			$flags = 0;
-			$publishOptions = [];
-			$handler = MediaHandler::getHandler( $props['mime'] );
-			if ( $handler ) {
-				$publishOptions['headers'] = $handler->getStreamHeaders( $props['metadata'] );
+				return $files;
 			} else {
-				$publishOptions['headers'] = [];
+				return [];
 			}
-			$archive = $image->publish( $file, $flags, $publishOptions );
-			if ( !$archive->isGood() ) {
-				echo "failed. (" .
-					$archive->getWikiText( false, false, 'en' ) .
-					")\n";
-				$failed++;
-				continue;
-			}
-		}
-
-		$commentText = SpecialUpload::getInitialPageText( $commentText, $license );
-		if ( !isset( $options['summary'] ) ) {
-			$summary = $commentText;
+		} else {
+			return [];
 		}
+	}
 
-		if ( isset( $options['dry'] ) ) {
-			echo "done.\n";
-		} elseif ( $image->recordUpload2(
-			$archive->value,
-			$summary,
-			$commentText,
-			$props,
-			$timestamp
-		) ) {
-			# We're done!
-			echo "done.\n";
+	/**
+	 * Split a filename into filename and extension
+	 *
+	 * @param string $filename Filename
+	 * @return array
+	 */
+	private function splitFilename( $filename ) {
+		$parts = explode( '.', $filename );
+		$ext = $parts[count( $parts ) - 1];
+		unset( $parts[count( $parts ) - 1] );
+		$fname = implode( '.', $parts );
+
+		return [ $fname, $ext ];
+	}
 
-			$doProtect = false;
+	/**
+	 * Find an auxilliary file with the given extension, matching
+	 * the give base file path. $maxStrip determines how many extensions
+	 * may be stripped from the original file name before appending the
+	 * new extension. For example, with $maxStrip = 1 (the default),
+	 * file files acme.foo.bar.txt and acme.foo.txt would be auxilliary
+	 * files for acme.foo.bar and the extension ".txt". With $maxStrip = 2,
+	 * acme.txt would also be acceptable.
+	 *
+	 * @param string $file Base path
+	 * @param string $auxExtension The extension to be appended to the base path
+	 * @param int $maxStrip The maximum number of extensions to strip from the base path (default: 1)
+	 * @return string|bool
+	 */
+	private function findAuxFile( $file, $auxExtension, $maxStrip = 1 ) {
+		if ( strpos( $auxExtension, '.' ) !== 0 ) {
+			$auxExtension = '.' . $auxExtension;
+		}
 
-			global $wgRestrictionLevels;
+		$d = dirname( $file );
+		$n = basename( $file );
 
-			$protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
+		while ( $maxStrip >= 0 ) {
+			$f = $d . '/' . $n . $auxExtension;
 
-			if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
-				$doProtect = true;
-			}
-			if ( isset( $options['unprotect'] ) ) {
-				$protectLevel = '';
-				$doProtect = true;
+			if ( file_exists( $f ) ) {
+				return $f;
 			}
 
-			if ( $doProtect ) {
-				# Protect the file
-				echo "\nWaiting for replica DBs...\n";
-				// Wait for replica DBs.
-				sleep( 2.0 ); # Why this sleep?
-				wfWaitForSlaves();
-
-				echo "\nSetting image restrictions ... ";
-
-				$cascade = false;
-				$restrictions = [];
-				foreach ( $title->getRestrictionTypes() as $type ) {
-					$restrictions[$type] = $protectLevel;
-				}
-
-				$page = WikiPage::factory( $title );
-				$status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
-				echo ( $status->isOK() ? 'done' : 'failed' ) . "\n";
+			$idx = strrpos( $n, '.' );
+			if ( !$idx ) {
+				break;
 			}
-		} else {
-			echo "failed. (at recordUpload stage)\n";
-			$svar = 'failed';
-		}
 
-		$$svar++;
-		$processed++;
-
-		if ( $limit && $processed >= $limit ) {
-			break;
+			$n = substr( $n, 0, $idx );
+			$maxStrip -= 1;
 		}
 
-		if ( $sleep ) {
-			sleep( $sleep );
-		}
+		return false;
 	}
 
-	# Print out some statistics
-	echo "\n";
-	foreach (
-		[
-			'count' => 'Found',
-			'limit' => 'Limit',
-			'ignored' => 'Ignored',
-			'added' => 'Added',
-			'skipped' => 'Skipped',
-			'overwritten' => 'Overwritten',
-			'failed' => 'Failed'
-		] as $var => $desc
-	) {
-		if ( $$var > 0 ) {
-			echo "{$desc}: {$$var}\n";
+	# @todo FIXME: Access the api in a saner way and performing just one query
+	# (preferably batching files too).
+	private function getFileCommentFromSourceWiki( $wiki_host, $file ) {
+		$url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
+			   . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=comment';
+		$body = Http::get( $url, [], __METHOD__ );
+		if ( preg_match( '#<ii comment="([^"]*)" />#', $body, $matches ) == 0 ) {
+			return false;
 		}
+
+		return html_entity_decode( $matches[1] );
 	}
-} else {
-	echo "No suitable files could be found for import.\n";
-}
 
-exit( 0 );
+	private function getFileUserFromSourceWiki( $wiki_host, $file ) {
+		$url = $wiki_host . '/api.php?action=query&format=xml&titles=File:'
+			   . rawurlencode( $file ) . '&prop=imageinfo&&iiprop=user';
+		$body = Http::get( $url, [], __METHOD__ );
+		if ( preg_match( '#<ii user="([^"]*)" />#', $body, $matches ) == 0 ) {
+			return false;
+		}
 
-function showUsage( $reason = false ) {
-	if ( $reason ) {
-		echo $reason . "\n";
+		return html_entity_decode( $matches[1] );
 	}
 
-	echo <<<TEXT
-Imports images and other media files into the wiki
-USAGE: php importImages.php [options] <dir>
-
-<dir> : Path to the directory containing images to be imported
-
-Options:
---extensions=<exts>     Comma-separated list of allowable extensions, defaults
-                        to \$wgFileExtensions.
---overwrite             Overwrite existing images with the same name (default
-                        is to skip them).
---limit=<num>           Limit the number of images to process. Ignored or
-                        skipped images are not counted.
---from=<name>           Ignore all files until the one with the given name.
-                        Useful for resuming aborted imports. <name> should be
-                        the file's canonical database form.
---skip-dupes            Skip images that were already uploaded under a different
-                        name (check SHA1).
---search-recursively    Search recursively for files in subdirectories.
---sleep=<sec>           Sleep between files. Useful mostly for debugging.
---user=<username>       Set username of uploader, default 'Maintenance script'.
---check-userblock       Check if the user got blocked during import.
---comment=<text>        Set file description, default 'Importing file'.
---comment-file=<file>   Set description to the content of <file>.
---comment-ext=<ext>     Causes the description for each file to be loaded from a
-                        file with the same name, but the extension <ext>. If a
-                        global description is also given, it is appended.
---license=<code>        Use an optional license template.
---dry                   Dry run, don't import anything.
---protect=<protect>     Specify the protect value (autoconfirmed,sysop).
---summary=<summary>     Upload summary, description will be used if not
-                        provided.
---timestamp=<timestamp> Override upload time/date, all MediaWiki timestamp
-                        formats are accepted.
---unprotect             Unprotects all uploaded images.
---source-wiki-url       If specified, take User and Comment data for each
-                        imported file from this URL. For example,
-                        --source-wiki-url="http://en.wikipedia.org/."
-
-TEXT;
-	exit( 1 );
 }
+
+$maintClass = 'ImportImages';
+require_once RUN_MAINTENANCE_IF_MAIN;
author	Max Semenik <maxsem.wiki@gmail.com>	2016-12-16 14:45:20 -0800
committer	Kunal Mehta <legoktm@member.fsf.org>	2016-12-18 04:46:39 -0800
commit	e7caa2f2982060a0884a3e752d50a58b0408aea1 (patch)
tree	9db8a78c4c0afc9f13fcd7c967d86d8b2e6cb0ef
parent	8a112ec84e5977074beb031bc9f32d0ad56d8ed5 (diff)
download	mediawikicore-e7caa2f2982060a0884a3e752d50a58b0408aea1.tar.gz mediawikicore-e7caa2f2982060a0884a3e752d50a58b0408aea1.zip