diff options
author | Tim Starling <tstarling@wikimedia.org> | 2021-07-29 21:19:14 +1000 |
---|---|---|
committer | Krinkle <krinklemail@gmail.com> | 2021-08-02 18:10:59 +0000 |
commit | 5deb6a2a4546318d1fa94ad8c3fa54e9eb8fc67c (patch) | |
tree | a8333e22bdc3fb7ddb3d49ffb02b6a3c67c7dc24 /includes/Html.php | |
parent | e82c5e52d50a9afd67045f984dc3fb84e2daef44 (diff) | |
download | mediawikicore-5deb6a2a4546318d1fa94ad8c3fa54e9eb8fc67c.tar.gz mediawikicore-5deb6a2a4546318d1fa94ad8c3fa54e9eb8fc67c.zip |
Html::openElement() micro-optimisations
* Use isset() instead of in_array()
In dropDefaults():
* Remove strtolower(), since the performance cost of running
strtolower() on all attribute names far outweighs the performance
benefit of dropping default values for attributes with non-canonical
capitalisation. Attribute names with non-canonical case are extremely
rare in the MediaWiki ecosystem. The change is harmless in the sense
that a default attribute is semantically equivalent to an omitted
attribute.
* Handle common cases first.
* Avoid value normalization when the attribute name is unknown.
* For class attributes, check if the value is empty using strict
comparison rather than normalization.
In expandAttributes():
* If the value is an array, explode each element and add the parts to
an array rather than doing implode() then explode().
* If the value is an array, check for spaces in the loop rather than
calling array_diff().
* Replace $quote with a literal
* Skip the check for boolean attributes if we already know it is a
space-separated attribute.
* Inline Sanitizer::encodeAttribute
Measuring instruction count per iteration with perf stat, averaged over
10M iterations, PS1. Test case:
Html::openElement('a', [ 'class' => [ 'foo', 'bar' ] ] )
* Baseline: 11160.7265433
* in_array(): 10390.3837233
* dropDefaults() changes: 9674.1248824
* expandAttributes() misc: 9248.1947500
* implode/explode and space check: 8318.9800417
* Sanitizer inline: 8021.7371794
Change-Id: I7680b8939da50430316d77d42ebc1987752e99dc
Diffstat (limited to 'includes/Html.php')
-rw-r--r-- | includes/Html.php | 244 |
1 files changed, 135 insertions, 109 deletions
diff --git a/includes/Html.php b/includes/Html.php index 3ffea80dcff8..8b19456debfc 100644 --- a/includes/Html.php +++ b/includes/Html.php @@ -47,59 +47,57 @@ use MediaWiki\MediaWikiServices; * @since 1.16 */ class Html { - /** @var string[] List of void elements from HTML5, section 8.1.2 as of 2016-09-19 */ + /** @var bool[] List of void elements from HTML5, section 8.1.2 as of 2016-09-19 */ private static $voidElements = [ - 'area', - 'base', - 'br', - 'col', - 'embed', - 'hr', - 'img', - 'input', - 'keygen', - 'link', - 'meta', - 'param', - 'source', - 'track', - 'wbr', + 'area' => true, + 'base' => true, + 'br' => true, + 'col' => true, + 'embed' => true, + 'hr' => true, + 'img' => true, + 'input' => true, + 'keygen' => true, + 'link' => true, + 'meta' => true, + 'param' => true, + 'source' => true, + 'track' => true, + 'wbr' => true, ]; /** * Boolean attributes, which may have the value omitted entirely. Manually * collected from the HTML5 spec as of 2011-08-12. - * @var string[] + * @var bool[] */ private static $boolAttribs = [ - 'async', - 'autofocus', - 'autoplay', - 'checked', - 'controls', - 'default', - 'defer', - 'disabled', - 'formnovalidate', - 'hidden', - 'ismap', - 'itemscope', - 'loop', - 'multiple', - 'muted', - 'novalidate', - 'open', - 'pubdate', - 'readonly', - 'required', - 'reversed', - 'scoped', - 'seamless', - 'selected', - 'truespeed', - 'typemustmatch', - // HTML5 Microdata - 'itemscope', + 'async' => true, + 'autofocus' => true, + 'autoplay' => true, + 'checked' => true, + 'controls' => true, + 'default' => true, + 'defer' => true, + 'disabled' => true, + 'formnovalidate' => true, + 'hidden' => true, + 'ismap' => true, + 'itemscope' => true, + 'loop' => true, + 'multiple' => true, + 'muted' => true, + 'novalidate' => true, + 'open' => true, + 'pubdate' => true, + 'readonly' => true, + 'required' => true, + 'reversed' => true, + 'scoped' => true, + 'seamless' => true, + 'selected' => true, + 'truespeed' => true, + 'typemustmatch' => true, ]; /** @@ -211,7 +209,7 @@ class Html { */ public static function rawElement( $element, $attribs = [], $contents = '' ) { $start = self::openElement( $element, $attribs ); - if ( in_array( $element, self::$voidElements ) ) { + if ( isset( self::$voidElements[$element] ) ) { // Silly XML. return substr( $start, 0, -1 ) . '/>'; } else { @@ -266,33 +264,33 @@ class Html { // Remove invalid input types if ( $element == 'input' ) { $validTypes = [ - 'hidden', - 'text', - 'password', - 'checkbox', - 'radio', - 'file', - 'submit', - 'image', - 'reset', - 'button', + 'hidden' => true, + 'text' => true, + 'password' => true, + 'checkbox' => true, + 'radio' => true, + 'file' => true, + 'submit' => true, + 'image' => true, + 'reset' => true, + 'button' => true, // HTML input types - 'datetime', - 'datetime-local', - 'date', - 'month', - 'time', - 'week', - 'number', - 'range', - 'email', - 'url', - 'search', - 'tel', - 'color', + 'datetime' => true, + 'datetime-local' => true, + 'date' => true, + 'month' => true, + 'time' => true, + 'week' => true, + 'number' => true, + 'range' => true, + 'email' => true, + 'url' => true, + 'search' => true, + 'tel' => true, + 'color' => true, ]; - if ( isset( $attribs['type'] ) && !in_array( $attribs['type'], $validTypes ) ) { + if ( isset( $attribs['type'] ) && !isset( $validTypes[$attribs['type']] ) ) { unset( $attribs['type'] ); } } @@ -371,25 +369,20 @@ class Html { 'textarea' => [ 'wrap' => 'soft' ], ]; - $element = strtolower( $element ); - foreach ( $attribs as $attrib => $value ) { - $lcattrib = strtolower( $attrib ); - if ( is_array( $value ) ) { - $value = implode( ' ', $value ); - } else { - $value = strval( $value ); - } - - // Simple checks using $attribDefaults - if ( isset( $attribDefaults[$element][$lcattrib] ) - && $attribDefaults[$element][$lcattrib] == $value - ) { - unset( $attribs[$attrib] ); - } - - if ( $lcattrib == 'class' && $value == '' ) { - unset( $attribs[$attrib] ); + if ( $attrib === 'class' ) { + if ( $value === '' || $value === [] || $value === [ '' ] ) { + unset( $attribs[$attrib] ); + } + } elseif ( isset( $attribDefaults[$element][$attrib] ) ) { + if ( is_array( $value ) ) { + $value = implode( ' ', $value ); + } else { + $value = strval( $value ); + } + if ( $attribDefaults[$element][$attrib] == $value ) { + unset( $attribs[$attrib] ); + } } } @@ -490,7 +483,7 @@ class Html { // For boolean attributes, support [ 'foo' ] instead of // requiring [ 'foo' => 'meaningless' ]. - if ( is_int( $key ) && in_array( strtolower( $value ), self::$boolAttribs ) ) { + if ( is_int( $key ) && isset( self::$boolAttribs[strtolower( $value )] ) ) { $key = $value; } @@ -501,23 +494,23 @@ class Html { // https://www.w3.org/TR/html401/index/attributes.html ("space-separated") // https://www.w3.org/TR/html5/index.html#attributes-1 ("space-separated") $spaceSeparatedListAttributes = [ - 'class', // html4, html5 - 'accesskey', // as of html5, multiple space-separated values allowed + 'class' => true, // html4, html5 + 'accesskey' => true, // as of html5, multiple space-separated values allowed // html4-spec doesn't document rel= as space-separated // but has been used like that and is now documented as such // in the html5-spec. - 'rel', + 'rel' => true, ]; // Specific features for attributes that allow a list of space-separated values - if ( in_array( $key, $spaceSeparatedListAttributes ) ) { + if ( isset( $spaceSeparatedListAttributes[$key] ) ) { // Apply some normalization and remove duplicates // Convert into correct array. Array can contain space-separated // values. Implode/explode to get those into the main array as well. if ( is_array( $value ) ) { // If input wasn't an array, we can skip this step - $newValue = []; + $arrayValue = []; foreach ( $value as $k => $v ) { if ( is_string( $v ) ) { // String values should be normal `[ 'foo' ]` @@ -526,34 +519,55 @@ class Html { // As a special case don't set 'foo' if a // separate 'foo' => true/false exists in the array // keys should be authoritative - $newValue[] = $v; + foreach ( explode( ' ', $v ) as $part ) { + // Normalize spacing by fixing up cases where people used + // more than 1 space and/or a trailing/leading space + if ( $part !== '' && $part !== ' ' ) { + $arrayValue[] = $part; + } + } } } elseif ( $v ) { // If the value is truthy but not a string this is likely // an [ 'foo' => true ], falsy values don't add strings - $newValue[] = $k; + $arrayValue[] = $k; } } - $value = implode( ' ', $newValue ); + } else { + $arrayValue = explode( ' ', $value ); + // Normalize spacing by fixing up cases where people used + // more than 1 space and/or a trailing/leading space + $arrayValue = array_diff( $arrayValue, [ '', ' ' ] ); } - $value = explode( ' ', $value ); - - // Normalize spacing by fixing up cases where people used - // more than 1 space and/or a trailing/leading space - $value = array_diff( $value, [ '', ' ' ] ); // Remove duplicates and create the string - $value = implode( ' ', array_unique( $value ) ); + $value = implode( ' ', array_unique( $arrayValue ) ); + + // Optimization: Skip below boolAttribs check and jump straight + // to its `else` block. The current $spaceSeparatedListAttributes + // block is mutually exclusive with $boolAttribs. + // phpcs:ignore Generic.PHP.DiscourageGoto + goto not_bool; // NOSONAR } elseif ( is_array( $value ) ) { throw new MWException( "HTML attribute $key can not contain a list of values" ); } - $quote = '"'; - - if ( in_array( $key, self::$boolAttribs ) ) { + if ( isset( self::$boolAttribs[$key] ) ) { $ret .= " $key=\"\""; } else { - $ret .= " $key=$quote" . Sanitizer::encodeAttribute( $value ) . $quote; + // phpcs:ignore Generic.PHP.DiscourageGoto + not_bool: + // Inlined from Sanitizer::encodeAttribute() for improved performance + $encValue = htmlspecialchars( $value, ENT_QUOTES ); + // Whitespace is normalized during attribute decoding, + // so if we've been passed non-spaces we must encode them + // ahead of time or they won't be preserved. + $encValue = strtr( $encValue, [ + "\n" => ' ', + "\r" => ' ', + "\t" => '	', + ] ); + $ret .= " $key=\"$encValue\""; } } return $ret; @@ -671,10 +685,22 @@ class Html { $attribs['type'] = $type; $attribs['value'] = $value; $attribs['name'] = $name; - if ( in_array( $type, [ 'text', 'search', 'email', 'password', 'number' ] ) ) { + $textInputAttributes = [ + 'text' => true, + 'search' => true, + 'email' => true, + 'password' => true, + 'number' => true + ]; + if ( isset( $textInputAttributes[$type] ) ) { $attribs = self::getTextInputAttributes( $attribs ); } - if ( in_array( $type, [ 'button', 'reset', 'submit' ] ) ) { + $buttonAttributes = [ + 'button' => true, + 'reset' => true, + 'submit' => true + ]; + if ( isset( $buttonAttributes[$type] ) ) { $attribs = self::buttonAttributes( $attribs ); } return self::element( 'input', $attribs ); |