aboutsummaryrefslogtreecommitdiffstats
path: root/includes/BadFileLookup.php
blob: e1ffbc5a7a10d4bd3f25ca68126184062b907b51 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
<?php

namespace MediaWiki;

use BagOStuff;
use Hooks;
use MalformedTitleException;
use MediaWiki\Linker\LinkTarget;
use RepoGroup;
use TitleParser;

class BadFileLookup {
	/** @var callable Returns contents of blacklist (see comment for isBadFile()) */
	private $blacklistCallback;

	/** @var BagOStuff Cache of parsed bad image list */
	private $cache;

	/** @var RepoGroup */
	private $repoGroup;

	/** @var TitleParser */
	private $titleParser;

	/** @var array|null Parsed blacklist */
	private $badFiles;

	/**
	 * Do not call directly. Use MediaWikiServices.
	 *
	 * @param callable $blacklistCallback Callback that returns wikitext of a file blacklist
	 * @param BagOStuff $cache For caching parsed versions of the blacklist
	 * @param RepoGroup $repoGroup
	 * @param TitleParser $titleParser
	 */
	public function __construct(
		callable $blacklistCallback,
		BagOStuff $cache,
		RepoGroup $repoGroup,
		TitleParser $titleParser
	) {
		$this->blacklistCallback = $blacklistCallback;
		$this->cache = $cache;
		$this->repoGroup = $repoGroup;
		$this->titleParser = $titleParser;
	}

	/**
	 * Determine if a file exists on the 'bad image list'.
	 *
	 * The format of MediaWiki:Bad_image_list is as follows:
	 *    * Only list items (lines starting with "*") are considered
	 *    * The first link on a line must be a link to a bad file
	 *    * Any subsequent links on the same line are considered to be exceptions,
	 *      i.e. articles where the file may occur inline.
	 *
	 * @param string $name The file name to check
	 * @param LinkTarget|null $contextTitle The page on which the file occurs, if known
	 * @return bool
	 */
	public function isBadFile( $name, LinkTarget $contextTitle = null ) {
		// Handle redirects; callers almost always hit RepoGroup::findFile() anyway,
		// so just use that method because it has a fast process cache.
		$file = $this->repoGroup->findFile( $name );
		// XXX If we don't find the file we also don't replace spaces by underscores or otherwise
		// validate or normalize the title, is this right?
		if ( $file ) {
			$name = $file->getTitle()->getDBkey();
		}

		// Run the extension hook
		$bad = false;
		if ( !Hooks::run( 'BadImage', [ $name, &$bad ] ) ) {
			return (bool)$bad;
		}

		if ( $this->badFiles === null ) {
			// Not used before in this request, try the cache
			$blacklist = ( $this->blacklistCallback )();
			$key = $this->cache->makeKey( 'bad-image-list', sha1( $blacklist ) );
			$this->badFiles = $this->cache->get( $key ) ?: null;
		}

		if ( $this->badFiles === null ) {
			// Cache miss, build the list now
			$this->badFiles = [];
			$lines = explode( "\n", $blacklist );
			foreach ( $lines as $line ) {
				// List items only
				if ( substr( $line, 0, 1 ) !== '*' ) {
					continue;
				}

				// Find all links
				$m = [];
				// XXX What is the ':?' doing in the regex? Why not let the TitleParser strip it?
				if ( !preg_match_all( '/\[\[:?(.*?)\]\]/', $line, $m ) ) {
					continue;
				}

				$fileDBkey = null;
				$exceptions = [];
				foreach ( $m[1] as $i => $titleText ) {
					try {
						$title = $this->titleParser->parseTitle( $titleText );
					} catch ( MalformedTitleException $e ) {
						continue;
					}
					if ( $i == 0 ) {
						$fileDBkey = $title->getDBkey();
					} else {
						$exceptions[$title->getNamespace()][$title->getDBkey()] = true;
					}
				}

				if ( $fileDBkey !== null ) {
					$this->badFiles[$fileDBkey] = $exceptions;
				}
			}
			$this->cache->set( $key, $this->badFiles, 24 * 60 * 60 );
		}

		return isset( $this->badFiles[$name] ) && ( !$contextTitle ||
			!isset( $this->badFiles[$name][$contextTitle->getNamespace()]
				[$contextTitle->getDBkey()] ) );
	}
}