aboutsummaryrefslogtreecommitdiffstats
path: root/includes/Html.php
diff options
context:
space:
mode:
authorTim Starling <tstarling@wikimedia.org>2021-07-29 21:19:14 +1000
committerKrinkle <krinklemail@gmail.com>2021-08-02 18:10:59 +0000
commit5deb6a2a4546318d1fa94ad8c3fa54e9eb8fc67c (patch)
treea8333e22bdc3fb7ddb3d49ffb02b6a3c67c7dc24 /includes/Html.php
parente82c5e52d50a9afd67045f984dc3fb84e2daef44 (diff)
downloadmediawikicore-5deb6a2a4546318d1fa94ad8c3fa54e9eb8fc67c.tar.gz
mediawikicore-5deb6a2a4546318d1fa94ad8c3fa54e9eb8fc67c.zip
Html::openElement() micro-optimisations
* Use isset() instead of in_array() In dropDefaults(): * Remove strtolower(), since the performance cost of running strtolower() on all attribute names far outweighs the performance benefit of dropping default values for attributes with non-canonical capitalisation. Attribute names with non-canonical case are extremely rare in the MediaWiki ecosystem. The change is harmless in the sense that a default attribute is semantically equivalent to an omitted attribute. * Handle common cases first. * Avoid value normalization when the attribute name is unknown. * For class attributes, check if the value is empty using strict comparison rather than normalization. In expandAttributes(): * If the value is an array, explode each element and add the parts to an array rather than doing implode() then explode(). * If the value is an array, check for spaces in the loop rather than calling array_diff(). * Replace $quote with a literal * Skip the check for boolean attributes if we already know it is a space-separated attribute. * Inline Sanitizer::encodeAttribute Measuring instruction count per iteration with perf stat, averaged over 10M iterations, PS1. Test case: Html::openElement('a', [ 'class' => [ 'foo', 'bar' ] ] ) * Baseline: 11160.7265433 * in_array(): 10390.3837233 * dropDefaults() changes: 9674.1248824 * expandAttributes() misc: 9248.1947500 * implode/explode and space check: 8318.9800417 * Sanitizer inline: 8021.7371794 Change-Id: I7680b8939da50430316d77d42ebc1987752e99dc
Diffstat (limited to 'includes/Html.php')
-rw-r--r--includes/Html.php244
1 files changed, 135 insertions, 109 deletions
diff --git a/includes/Html.php b/includes/Html.php
index 3ffea80dcff8..8b19456debfc 100644
--- a/includes/Html.php
+++ b/includes/Html.php
@@ -47,59 +47,57 @@ use MediaWiki\MediaWikiServices;
* @since 1.16
*/
class Html {
- /** @var string[] List of void elements from HTML5, section 8.1.2 as of 2016-09-19 */
+ /** @var bool[] List of void elements from HTML5, section 8.1.2 as of 2016-09-19 */
private static $voidElements = [
- 'area',
- 'base',
- 'br',
- 'col',
- 'embed',
- 'hr',
- 'img',
- 'input',
- 'keygen',
- 'link',
- 'meta',
- 'param',
- 'source',
- 'track',
- 'wbr',
+ 'area' => true,
+ 'base' => true,
+ 'br' => true,
+ 'col' => true,
+ 'embed' => true,
+ 'hr' => true,
+ 'img' => true,
+ 'input' => true,
+ 'keygen' => true,
+ 'link' => true,
+ 'meta' => true,
+ 'param' => true,
+ 'source' => true,
+ 'track' => true,
+ 'wbr' => true,
];
/**
* Boolean attributes, which may have the value omitted entirely. Manually
* collected from the HTML5 spec as of 2011-08-12.
- * @var string[]
+ * @var bool[]
*/
private static $boolAttribs = [
- 'async',
- 'autofocus',
- 'autoplay',
- 'checked',
- 'controls',
- 'default',
- 'defer',
- 'disabled',
- 'formnovalidate',
- 'hidden',
- 'ismap',
- 'itemscope',
- 'loop',
- 'multiple',
- 'muted',
- 'novalidate',
- 'open',
- 'pubdate',
- 'readonly',
- 'required',
- 'reversed',
- 'scoped',
- 'seamless',
- 'selected',
- 'truespeed',
- 'typemustmatch',
- // HTML5 Microdata
- 'itemscope',
+ 'async' => true,
+ 'autofocus' => true,
+ 'autoplay' => true,
+ 'checked' => true,
+ 'controls' => true,
+ 'default' => true,
+ 'defer' => true,
+ 'disabled' => true,
+ 'formnovalidate' => true,
+ 'hidden' => true,
+ 'ismap' => true,
+ 'itemscope' => true,
+ 'loop' => true,
+ 'multiple' => true,
+ 'muted' => true,
+ 'novalidate' => true,
+ 'open' => true,
+ 'pubdate' => true,
+ 'readonly' => true,
+ 'required' => true,
+ 'reversed' => true,
+ 'scoped' => true,
+ 'seamless' => true,
+ 'selected' => true,
+ 'truespeed' => true,
+ 'typemustmatch' => true,
];
/**
@@ -211,7 +209,7 @@ class Html {
*/
public static function rawElement( $element, $attribs = [], $contents = '' ) {
$start = self::openElement( $element, $attribs );
- if ( in_array( $element, self::$voidElements ) ) {
+ if ( isset( self::$voidElements[$element] ) ) {
// Silly XML.
return substr( $start, 0, -1 ) . '/>';
} else {
@@ -266,33 +264,33 @@ class Html {
// Remove invalid input types
if ( $element == 'input' ) {
$validTypes = [
- 'hidden',
- 'text',
- 'password',
- 'checkbox',
- 'radio',
- 'file',
- 'submit',
- 'image',
- 'reset',
- 'button',
+ 'hidden' => true,
+ 'text' => true,
+ 'password' => true,
+ 'checkbox' => true,
+ 'radio' => true,
+ 'file' => true,
+ 'submit' => true,
+ 'image' => true,
+ 'reset' => true,
+ 'button' => true,
// HTML input types
- 'datetime',
- 'datetime-local',
- 'date',
- 'month',
- 'time',
- 'week',
- 'number',
- 'range',
- 'email',
- 'url',
- 'search',
- 'tel',
- 'color',
+ 'datetime' => true,
+ 'datetime-local' => true,
+ 'date' => true,
+ 'month' => true,
+ 'time' => true,
+ 'week' => true,
+ 'number' => true,
+ 'range' => true,
+ 'email' => true,
+ 'url' => true,
+ 'search' => true,
+ 'tel' => true,
+ 'color' => true,
];
- if ( isset( $attribs['type'] ) && !in_array( $attribs['type'], $validTypes ) ) {
+ if ( isset( $attribs['type'] ) && !isset( $validTypes[$attribs['type']] ) ) {
unset( $attribs['type'] );
}
}
@@ -371,25 +369,20 @@ class Html {
'textarea' => [ 'wrap' => 'soft' ],
];
- $element = strtolower( $element );
-
foreach ( $attribs as $attrib => $value ) {
- $lcattrib = strtolower( $attrib );
- if ( is_array( $value ) ) {
- $value = implode( ' ', $value );
- } else {
- $value = strval( $value );
- }
-
- // Simple checks using $attribDefaults
- if ( isset( $attribDefaults[$element][$lcattrib] )
- && $attribDefaults[$element][$lcattrib] == $value
- ) {
- unset( $attribs[$attrib] );
- }
-
- if ( $lcattrib == 'class' && $value == '' ) {
- unset( $attribs[$attrib] );
+ if ( $attrib === 'class' ) {
+ if ( $value === '' || $value === [] || $value === [ '' ] ) {
+ unset( $attribs[$attrib] );
+ }
+ } elseif ( isset( $attribDefaults[$element][$attrib] ) ) {
+ if ( is_array( $value ) ) {
+ $value = implode( ' ', $value );
+ } else {
+ $value = strval( $value );
+ }
+ if ( $attribDefaults[$element][$attrib] == $value ) {
+ unset( $attribs[$attrib] );
+ }
}
}
@@ -490,7 +483,7 @@ class Html {
// For boolean attributes, support [ 'foo' ] instead of
// requiring [ 'foo' => 'meaningless' ].
- if ( is_int( $key ) && in_array( strtolower( $value ), self::$boolAttribs ) ) {
+ if ( is_int( $key ) && isset( self::$boolAttribs[strtolower( $value )] ) ) {
$key = $value;
}
@@ -501,23 +494,23 @@ class Html {
// https://www.w3.org/TR/html401/index/attributes.html ("space-separated")
// https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated")
$spaceSeparatedListAttributes = [
- 'class', // html4, html5
- 'accesskey', // as of html5, multiple space-separated values allowed
+ 'class' => true, // html4, html5
+ 'accesskey' => true, // as of html5, multiple space-separated values allowed
// html4-spec doesn't document rel= as space-separated
// but has been used like that and is now documented as such
// in the html5-spec.
- 'rel',
+ 'rel' => true,
];
// Specific features for attributes that allow a list of space-separated values
- if ( in_array( $key, $spaceSeparatedListAttributes ) ) {
+ if ( isset( $spaceSeparatedListAttributes[$key] ) ) {
// Apply some normalization and remove duplicates
// Convert into correct array. Array can contain space-separated
// values. Implode/explode to get those into the main array as well.
if ( is_array( $value ) ) {
// If input wasn't an array, we can skip this step
- $newValue = [];
+ $arrayValue = [];
foreach ( $value as $k => $v ) {
if ( is_string( $v ) ) {
// String values should be normal `[ 'foo' ]`
@@ -526,34 +519,55 @@ class Html {
// As a special case don't set 'foo' if a
// separate 'foo' => true/false exists in the array
// keys should be authoritative
- $newValue[] = $v;
+ foreach ( explode( ' ', $v ) as $part ) {
+ // Normalize spacing by fixing up cases where people used
+ // more than 1 space and/or a trailing/leading space
+ if ( $part !== '' && $part !== ' ' ) {
+ $arrayValue[] = $part;
+ }
+ }
}
} elseif ( $v ) {
// If the value is truthy but not a string this is likely
// an [ 'foo' => true ], falsy values don't add strings
- $newValue[] = $k;
+ $arrayValue[] = $k;
}
}
- $value = implode( ' ', $newValue );
+ } else {
+ $arrayValue = explode( ' ', $value );
+ // Normalize spacing by fixing up cases where people used
+ // more than 1 space and/or a trailing/leading space
+ $arrayValue = array_diff( $arrayValue, [ '', ' ' ] );
}
- $value = explode( ' ', $value );
-
- // Normalize spacing by fixing up cases where people used
- // more than 1 space and/or a trailing/leading space
- $value = array_diff( $value, [ '', ' ' ] );
// Remove duplicates and create the string
- $value = implode( ' ', array_unique( $value ) );
+ $value = implode( ' ', array_unique( $arrayValue ) );
+
+ // Optimization: Skip below boolAttribs check and jump straight
+ // to its `else` block. The current $spaceSeparatedListAttributes
+ // block is mutually exclusive with $boolAttribs.
+ // phpcs:ignore Generic.PHP.DiscourageGoto
+ goto not_bool; // NOSONAR
} elseif ( is_array( $value ) ) {
throw new MWException( "HTML attribute $key can not contain a list of values" );
}
- $quote = '"';
-
- if ( in_array( $key, self::$boolAttribs ) ) {
+ if ( isset( self::$boolAttribs[$key] ) ) {
$ret .= " $key=\"\"";
} else {
- $ret .= " $key=$quote" . Sanitizer::encodeAttribute( $value ) . $quote;
+ // phpcs:ignore Generic.PHP.DiscourageGoto
+ not_bool:
+ // Inlined from Sanitizer::encodeAttribute() for improved performance
+ $encValue = htmlspecialchars( $value, ENT_QUOTES );
+ // Whitespace is normalized during attribute decoding,
+ // so if we've been passed non-spaces we must encode them
+ // ahead of time or they won't be preserved.
+ $encValue = strtr( $encValue, [
+ "\n" => '&#10;',
+ "\r" => '&#13;',
+ "\t" => '&#9;',
+ ] );
+ $ret .= " $key=\"$encValue\"";
}
}
return $ret;
@@ -671,10 +685,22 @@ class Html {
$attribs['type'] = $type;
$attribs['value'] = $value;
$attribs['name'] = $name;
- if ( in_array( $type, [ 'text', 'search', 'email', 'password', 'number' ] ) ) {
+ $textInputAttributes = [
+ 'text' => true,
+ 'search' => true,
+ 'email' => true,
+ 'password' => true,
+ 'number' => true
+ ];
+ if ( isset( $textInputAttributes[$type] ) ) {
$attribs = self::getTextInputAttributes( $attribs );
}
- if ( in_array( $type, [ 'button', 'reset', 'submit' ] ) ) {
+ $buttonAttributes = [
+ 'button' => true,
+ 'reset' => true,
+ 'submit' => true
+ ];
+ if ( isset( $buttonAttributes[$type] ) ) {
$attribs = self::buttonAttributes( $attribs );
}
return self::element( 'input', $attribs );