aboutsummaryrefslogtreecommitdiffstats
path: root/maintenance/benchmarks/bench_utf8_title_check.php
blob: 2e4b90292d81a332342e88a2458d7c9dea3f7ece (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
<?php
/**
 * @file
 * @ingroup Benchmark
 */

require_once( dirname( __FILE__ ) . '/Benchmarker.php' );

/**
 * This little benchmark executes the regexp used in Language->checkTitleEncoding() and compares its execution time
 * against that of mb_check_encoding, if available.
 */
class bench_utf8_title_check extends Benchmarker {

	private $canRun;

	private $data;

	public function __construct() {
		parent::__construct();

		$this->data = array (
			"",
			"United States of America", // 7bit ASCII
			"S%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e",
			"Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn",
			// This comes from bug 36839
			"Acteur%7CAlbert%20Robbins%7CAnglais%7CAnn%20Donahue%7CAnthony%20E.%20Zuiker%7CCarol%20Mendelsohn%7C"
			. "Catherine%20Willows%7CDavid%20Hodges%7CDavid%20Phillips%7CGil%20Grissom%7CGreg%20Sanders%7CHodges%7C"
			. "Internet%20Movie%20Database%7CJim%20Brass%7CLady%20Heather%7C"
			. "Les%20Experts%20(s%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e)%7CLes%20Experts%20:%20Manhattan%7C"
			. "Les%20Experts%20:%20Miami%7CListe%20des%20personnages%20des%20Experts%7C"
			. "Liste%20des%20%C3%A9pisodes%20des%20Experts%7CMod%C3%A8le%20discussion:Palette%20Les%20Experts%7C"
			. "Nick%20Stokes%7CPersonnage%20de%20fiction%7CPersonnage%20fictif%7CPersonnage%20de%20fiction%7C"
			. "Personnages%20r%C3%A9currents%20dans%20Les%20Experts%7CRaymond%20Langston%7CRiley%20Adams%7C"
			. "Saison%201%20des%20Experts%7CSaison%2010%20des%20Experts%7CSaison%2011%20des%20Experts%7C"
			. "Saison%2012%20des%20Experts%7CSaison%202%20des%20Experts%7CSaison%203%20des%20Experts%7C"
			. "Saison%204%20des%20Experts%7CSaison%205%20des%20Experts%7CSaison%206%20des%20Experts%7C"
			. "Saison%207%20des%20Experts%7CSaison%208%20des%20Experts%7CSaison%209%20des%20Experts%7C"
			. "Sara%20Sidle%7CSofia%20Curtis%7CS%C3%A9rie%20t%C3%A9l%C3%A9vis%C3%A9e%7CWallace%20Langham%7C"
			. "Warrick%20Brown%7CWendy%20Simms%7C%C3%89tats-Unis"
		);

		$this->canRun = function_exists ( 'mb_check_encoding' );

		if ( $this->canRun ) {
			$this->mDescription = "Benchmark for using a regexp vs. mb_check_encoding to check for UTF-8 encoding.";
			mb_internal_encoding( 'UTF-8' );
		} else {
			$this->mDescription = "CANNOT RUN benchmark using mb_check_encoding: function not available.";
		}
	}

	public function execute() {
		if ( !$this->canRun ) {
			return;
		}
		$benchmarks = array();
		foreach ($this->data as $val) {
			$benchmarks[] = array(
				'function' => array( $this, 'use_regexp' ),
				'args' => array( rawurldecode ( $val ) )
			);
			$benchmarks[] = array(
				'function' => array( $this, 'use_regexp_non_capturing' ),
				'args' => array( rawurldecode ( $val ) )
			);
			$benchmarks[] = array(
				'function' => array( $this, 'use_regexp_once_only' ),
				'args' => array( rawurldecode ( $val ) )
			);
			$benchmarks[] = array(
				'function' => array( $this, 'use_mb_check_encoding' ),
				'args' => array( rawurldecode ( $val ) )
			);
		}
		$this->bench( $benchmarks );
		print $this->getFormattedResults();
	}

	private $isutf8;

	function use_regexp( $s ) {
		$this->isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
				'[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
	}

	function use_regexp_non_capturing( $s ) {
		// Same as above with a non-capturing subgroup.
		$this->isutf8 = preg_match( '/^(?:[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
				'[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
	}

	function use_regexp_once_only( $s ) {
		// Same as above with a once-only subgroup.
		$this->isutf8 = preg_match( '/^(?>[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
				'[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s );
	}

	function use_mb_check_encoding( $s ) {
		$this->isutf8 = mb_check_encoding( $s, 'UTF-8' );
	}

}

$maintClass = 'bench_utf8_title_check';
require_once( RUN_MAINTENANCE_IF_MAIN );