*/ protected static $allowedUrls = []; /** * Checks if the user is allowed to use the upload-by-URL feature. If the * user is not allowed, return the name of the user right as a string. If * the user is allowed, have the parent do further permissions checking. * * @param Authority $performer * * @return bool|string */ public static function isAllowed( Authority $performer ) { if ( !$performer->isAllowed( 'upload_by_url' ) ) { return 'upload_by_url'; } return parent::isAllowed( $performer ); } /** * Checks if the upload from URL feature is enabled * @return bool */ public static function isEnabled() { $allowCopyUploads = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::AllowCopyUploads ); return $allowCopyUploads && parent::isEnabled(); } /** * Checks whether the URL is for an allowed host * The domains in the allowlist can include wildcard characters (*) in place * of any of the domain levels, e.g. '*.flickr.com' or 'upload.*.gov.uk'. * * @param string $url * @return bool */ public static function isAllowedHost( $url ) { $domains = self::getAllowedHosts(); if ( !count( $domains ) ) { return true; } $parsedUrl = wfGetUrlUtils()->parse( $url ); if ( !$parsedUrl ) { return false; } $valid = false; foreach ( $domains as $domain ) { // See if the domain for the upload matches this allowed domain $domainPieces = explode( '.', $domain ); $uploadDomainPieces = explode( '.', $parsedUrl['host'] ); if ( count( $domainPieces ) === count( $uploadDomainPieces ) ) { $valid = true; // See if all the pieces match or not (excluding wildcards) foreach ( $domainPieces as $index => $piece ) { if ( $piece !== '*' && $piece !== $uploadDomainPieces[$index] ) { $valid = false; } } if ( $valid ) { // We found a match, so quit comparing against the list break; } } /* Non-wildcard test if ( $parsedUrl['host'] === $domain ) { $valid = true; break; } */ } return $valid; } /** * Provides a caching key for an upload from url set of parameters * Used to set the status of an async job in UploadFromUrlJob * and retreive it in frontend clients like ApiUpload. Will return the * empty string if not all parameters are present. * * @param array $params * @return string */ public static function getCacheKey( $params ) { if ( !isset( $params['filename'] ) || !isset( $params['url'] ) ) { return ""; } else { // We use sha1 here to ensure we have a fixed-length string of printable // characters. There is no cryptography involved, so we just need a // relatively fast function. return sha1( sprintf( "%s|||%s", $params['filename'], $params['url'] ) ); } } /** * Get the caching key from a web request * @param WebRequest &$request * * @return string */ public static function getCacheKeyFromRequest( &$request ) { $uploadCacheKey = $request->getText( 'wpCacheKey', $request->getText( 'key', '' ) ); if ( $uploadCacheKey !== '' ) { return $uploadCacheKey; } $desiredDestName = $request->getText( 'wpDestFile' ); if ( !$desiredDestName ) { $desiredDestName = $request->getText( 'wpUploadFileURL' ); } return self::getCacheKey( [ 'filename' => $desiredDestName, 'url' => trim( $request->getVal( 'wpUploadFileURL' ) ) ] ); } /** * @return string[] */ private static function getAllowedHosts(): array { $config = MediaWikiServices::getInstance()->getMainConfig(); $domains = $config->get( MainConfigNames::CopyUploadsDomains ); if ( $config->get( MainConfigNames::CopyUploadAllowOnWikiDomainConfig ) ) { $page = wfMessage( 'copyupload-allowed-domains' )->inContentLanguage()->plain(); foreach ( explode( "\n", $page ) as $line ) { // Strip comments $line = preg_replace( "/^\\s*([^#]*)\\s*((.*)?)$/", "\\1", $line ); // Trim whitespace $line = trim( $line ); if ( $line !== '' ) { $domains[] = $line; } } } return $domains; } /** * Checks whether the URL is not allowed. * * @param string $url * @return bool */ public static function isAllowedUrl( $url ) { if ( !isset( self::$allowedUrls[$url] ) ) { $allowed = true; ( new HookRunner( MediaWikiServices::getInstance()->getHookContainer() ) ) ->onIsUploadAllowedFromUrl( $url, $allowed ); self::$allowedUrls[$url] = $allowed; } return self::$allowedUrls[$url]; } /** * Get the URL of the file to be uploaded * @return string */ public function getUrl() { return $this->mUrl; } /** * Entry point for API upload * * @param string $name * @param string $url */ public function initialize( $name, $url ) { $this->mUrl = $url; $tempPath = $this->makeTemporaryFile(); # File size and removeTempFile will be filled in later $this->initializePathInfo( $name, $tempPath, 0, false ); } /** * Entry point for SpecialUpload * @param WebRequest &$request */ public function initializeFromRequest( &$request ) { $desiredDestName = $request->getText( 'wpDestFile' ); if ( !$desiredDestName ) { $desiredDestName = $request->getText( 'wpUploadFileURL' ); } $this->initialize( $desiredDestName, trim( $request->getVal( 'wpUploadFileURL' ) ) ); } /** * @param WebRequest $request * @return bool */ public static function isValidRequest( $request ) { $user = RequestContext::getMain()->getUser(); $url = $request->getVal( 'wpUploadFileURL' ); return $url && MediaWikiServices::getInstance() ->getPermissionManager() ->userHasRight( $user, 'upload_by_url' ); } /** * @return string */ public function getSourceType() { return 'url'; } /** * Download the file * * @param array $httpOptions Array of options for MWHttpRequest. * This could be used to override the timeout on the http request. * @return Status */ public function fetchFile( $httpOptions = [] ) { $status = $this->canFetchFile(); if ( !$status->isGood() ) { return $status; } return $this->reallyFetchFile( $httpOptions ); } /** * verify we can actually download the file * * @return Status */ public function canFetchFile() { if ( !MWHttpRequest::isValidURI( $this->mUrl ) ) { return Status::newFatal( 'http-invalid-url', $this->mUrl ); } if ( !self::isAllowedHost( $this->mUrl ) ) { return Status::newFatal( 'upload-copy-upload-invalid-domain' ); } if ( !self::isAllowedUrl( $this->mUrl ) ) { return Status::newFatal( 'upload-copy-upload-invalid-url' ); } return Status::newGood(); } /** * Create a new temporary file in the URL subdirectory of wfTempDir(). * * @return string Path to the file */ protected function makeTemporaryFile() { $tmpFile = MediaWikiServices::getInstance()->getTempFSFileFactory() ->newTempFSFile( 'URL', 'urlupload_' ); $tmpFile->bind( $this ); return $tmpFile->getPath(); } /** * Callback: save a chunk of the result of a HTTP request to the temporary file * * @param mixed $req * @param string $buffer * @return int Number of bytes handled */ public function saveTempFileChunk( $req, $buffer ) { wfDebugLog( 'fileupload', 'Received chunk of ' . strlen( $buffer ) . ' bytes' ); $nbytes = fwrite( $this->mTmpHandle, $buffer ); if ( $nbytes == strlen( $buffer ) ) { $this->mFileSize += $nbytes; } else { // Well... that's not good! wfDebugLog( 'fileupload', 'Short write ' . $nbytes . '/' . strlen( $buffer ) . ' bytes, aborting with ' . $this->mFileSize . ' uploaded so far' ); fclose( $this->mTmpHandle ); $this->mTmpHandle = false; } return $nbytes; } /** * Download the file, save it to the temporary file and update the file * size and set $mRemoveTempFile to true. * * @param array $httpOptions Array of options for MWHttpRequest * @return Status */ protected function reallyFetchFile( $httpOptions = [] ) { $copyUploadProxy = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CopyUploadProxy ); $copyUploadTimeout = MediaWikiServices::getInstance()->getMainConfig() ->get( MainConfigNames::CopyUploadTimeout ); // Note the temporary file should already be created by makeTemporaryFile() $this->mTmpHandle = fopen( $this->mTempPath, 'wb' ); if ( !$this->mTmpHandle ) { return Status::newFatal( 'tmp-create-error' ); } wfDebugLog( 'fileupload', 'Temporary file created "' . $this->mTempPath . '"' ); $this->mRemoveTempFile = true; $this->mFileSize = 0; $options = $httpOptions + [ 'followRedirects' => false ]; if ( $copyUploadProxy !== false ) { $options['proxy'] = $copyUploadProxy; } if ( $copyUploadTimeout && !isset( $options['timeout'] ) ) { $options['timeout'] = $copyUploadTimeout; } wfDebugLog( 'fileupload', 'Starting download from "' . $this->mUrl . '" ' . '<' . implode( ',', array_keys( array_filter( $options ) ) ) . '>' ); // Manually follow any redirects up to the limit and reset the output file before each new request to prevent // capturing the redirect response as part of the file. $attemptsLeft = $options['maxRedirects'] ?? 5; $targetUrl = $this->mUrl; $requestFactory = MediaWikiServices::getInstance()->getHttpRequestFactory(); while ( $attemptsLeft > 0 ) { $req = $requestFactory->create( $targetUrl, $options, __METHOD__ ); $req->setCallback( [ $this, 'saveTempFileChunk' ] ); $status = $req->execute(); if ( !$req->isRedirect() ) { break; } $targetUrl = $req->getFinalUrl(); // Remove redirect response content from file. ftruncate( $this->mTmpHandle, 0 ); rewind( $this->mTmpHandle ); $attemptsLeft--; } if ( $attemptsLeft == 0 ) { return Status::newFatal( 'upload-too-many-redirects' ); } if ( $this->mTmpHandle ) { // File got written ok... fclose( $this->mTmpHandle ); $this->mTmpHandle = null; } else { // We encountered a write error during the download... return Status::newFatal( 'tmp-write-error' ); } // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop if ( $status->isOK() ) { wfDebugLog( 'fileupload', 'Download by URL completed successfully.' ); } else { // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop wfDebugLog( 'fileupload', $status->getWikiText( false, false, 'en' ) ); wfDebugLog( 'fileupload', // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Always set after loop 'Download by URL completed with HTTP status ' . $req->getStatus() ); } // @phan-suppress-next-line PhanTypeMismatchReturnNullable,PhanPossiblyUndeclaredVariable Always set after loop return $status; } }