diff options
author | Daniel Friesen <dantman@users.mediawiki.org> | 2012-02-20 22:32:18 +0000 |
---|---|---|
committer | Daniel Friesen <dantman@users.mediawiki.org> | 2012-02-20 22:32:18 +0000 |
commit | 73482615dd43e7985ba43c5cac3168361fbeec5b (patch) | |
tree | 2d3f41b9a2da4ae79945256ba226f869c71f194f /includes | |
parent | 4943b75b89099bc6c92a698920f865dec0b4dbb5 (diff) | |
download | mediawikicore-73482615dd43e7985ba43c5cac3168361fbeec5b.tar.gz mediawikicore-73482615dd43e7985ba43c5cac3168361fbeec5b.zip |
Revert Microdata improvements in r111891, r111898, r111899, r111901, r111903, and r111906 till after the git migration.
Notes
Notes:
http://mediawiki.org/wiki/Special:Code/MediaWiki/111973
Diffstat (limited to 'includes')
-rw-r--r-- | includes/Sanitizer.php | 103 | ||||
-rw-r--r-- | includes/parser/Tidy.php | 14 | ||||
-rw-r--r-- | includes/tidy.conf | 5 |
3 files changed, 15 insertions, 107 deletions
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php index eb8710866aad..196abd9f4354 100644 --- a/includes/Sanitizer.php +++ b/includes/Sanitizer.php @@ -364,17 +364,14 @@ class Sanitizer { * @return string */ static function removeHTMLtags( $text, $processCallback = null, $args = array(), $extratags = array(), $removetags = array() ) { - global $wgUseTidy, $wgHtml5, $wgAllowMicrodataAttributes, $wgAllowImageTag; + global $wgUseTidy; static $htmlpairsStatic, $htmlsingle, $htmlsingleonly, $htmlnest, $tabletags, $htmllist, $listtags, $htmlsingleallowed, $htmlelementsStatic, $staticInitialised; wfProfileIn( __METHOD__ ); - // Base our staticInitialised variable off of the global config state so that if the globals - // are changed (like in the secrewed up test system) we will re-initialise the settings. - $globalContext = implode( '-', compact( 'wgHtml5', 'wgAllowMicrodataAttributes', 'wgAllowImageTag' ) ); - if ( !$staticInitialised || $staticInitialised != $globalContext ) { + if ( !$staticInitialised ) { $htmlpairsStatic = array( # Tags that must be closed 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', @@ -384,19 +381,12 @@ class Sanitizer { 'ruby', 'rt' , 'rb' , 'rp', 'p', 'span', 'abbr', 'dfn', 'kbd', 'samp' ); - if ( $wgHtml5 ) { - $htmlpairsStatic = array_merge( $htmlpairsStatic, array( 'data', 'time' ) ); - } $htmlsingle = array( 'br', 'hr', 'li', 'dt', 'dd' ); $htmlsingleonly = array( # Elements that cannot have close tags 'br', 'hr' ); - if ( $wgHtml5 && $wgAllowMicrodataAttributes ) { - $htmlsingle[] = $htmlsingleonly[] = 'meta'; - $htmlsingle[] = $htmlsingleonly[] = 'link'; - } $htmlnest = array( # Tags that can be nested--?? 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', 'dl', 'font', 'big', 'small', 'sub', 'sup', 'span' @@ -411,6 +401,7 @@ class Sanitizer { 'li', ); + global $wgAllowImageTag; if ( $wgAllowImageTag ) { $htmlsingle[] = 'img'; $htmlsingleonly[] = 'img'; @@ -425,7 +416,7 @@ class Sanitizer { foreach ( $vars as $var ) { $$var = array_flip( $$var ); } - $staticInitialised = $globalContext; + $staticInitialised = true; } # Populate $htmlpairs and $htmlelements with the $extratags and $removetags arrays $extratags = array_flip( $extratags ); @@ -537,10 +528,6 @@ class Sanitizer { call_user_func_array( $processCallback, array( &$params, $args ) ); } - if ( !Sanitizer::validateTag( $params, $t ) ) { - $badtag = true; - } - # Strip non-approved attributes from the tag $newparams = Sanitizer::fixTagAttributes( $params, $t ); } @@ -564,24 +551,16 @@ class Sanitizer { preg_match( '/^(\\/?)(\\w+)([^>]*?)(\\/{0,1}>)([^<]*)$/', $x, $regs ); @list( /* $qbar */, $slash, $t, $params, $brace, $rest ) = $regs; - $badtag = false; if ( isset( $htmlelements[$t = strtolower( $t )] ) ) { if( is_callable( $processCallback ) ) { call_user_func_array( $processCallback, array( &$params, $args ) ); } - - if ( !Sanitizer::validateTag( $params, $t ) ) { - $badtag = true; - } - $newparams = Sanitizer::fixTagAttributes( $params, $t ); - if ( !$badtag ) { - $rest = str_replace( '>', '>', $rest ); - $text .= "<$slash$t$newparams$brace$rest"; - continue; - } + $rest = str_replace( '>', '>', $rest ); + $text .= "<$slash$t$newparams$brace$rest"; + } else { + $text .= '<' . str_replace( '>', '>', $x); } - $text .= '<' . str_replace( '>', '>', $x); } } wfProfileOut( __METHOD__ ); @@ -730,37 +709,6 @@ class Sanitizer { } /** - * Takes attribute names and values for a tag and the tah name and - * validates that the tag is allowed to be present. - * This DOES NOT validate the attributes, nor does it validate the - * tags themselves. This method only handles the special circumstances - * where we may want to allow a tag within content but ONLY when it has - * specific attributes set. - * - * @param $ - */ - static function validateTag( $params, $element ) { - $params = Sanitizer::decodeTagAttributes( $params ); - - if ( $element == 'meta' || $element == 'link' ) { - if ( !isset( $params['itemprop'] ) ) { - // <meta> and <link> must have an itemprop="" otherwise they are not valid or safe in content - return false; - } - if ( $element == 'meta' && !isset( $params['content'] ) ) { - // <meta> must have a content="" for the itemprop - return false; - } - if ( $element == 'link' && !isset( $params['href'] ) ) { - // <link> must have an associated href="" - return false; - } - } - - return true; - } - - /** * Take an array of attribute names and values and normalize or discard * illegal values for the given element type. * @@ -861,7 +809,7 @@ class Sanitizer { unset( $out['itemid'] ); unset( $out['itemref'] ); } - # TODO: Strip itemprop if we aren't descendants of an itemscope or pointed to by an itemref. + # TODO: Strip itemprop if we aren't descendants of an itemscope. } return $out; } @@ -1486,7 +1434,10 @@ class Sanitizer { * @return Array */ static function attributeWhitelist( $element ) { - $list = Sanitizer::setupAttributeWhitelist(); + static $list; + if( !isset( $list ) ) { + $list = Sanitizer::setupAttributeWhitelist(); + } return isset( $list[$element] ) ? $list[$element] : array(); @@ -1500,13 +1451,6 @@ class Sanitizer { static function setupAttributeWhitelist() { global $wgAllowRdfaAttributes, $wgHtml5, $wgAllowMicrodataAttributes; - static $whitelist, $staticInitialised; - $globalContext = implode( '-', compact( 'wgAllowRdfaAttributes', 'wgHtml5', 'wgAllowMicrodataAttributes' ) ); - - if ( isset( $whitelist ) && $staticInitialised == $globalContext ) { - return $whitelist; - } - $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' ); if ( $wgAllowRdfaAttributes ) { @@ -1539,7 +1483,7 @@ class Sanitizer { # Numbers refer to sections in HTML 4.01 standard describing the element. # See: http://www.w3.org/TR/html4/ - $whitelist = array( + $whitelist = array ( # 7.5.4 'div' => $block, 'center' => $common, # deprecated @@ -1667,26 +1611,7 @@ class Sanitizer { # 'title' may not be 100% valid here; it's XHTML # http://www.w3.org/TR/REC-MathML/ 'math' => array( 'class', 'style', 'id', 'title' ), - ); - - if ( $wgHtml5 ) { - # HTML5 elements, defined by: - # http://www.whatwg.org/specs/web-apps/current-work/multipage/ - $whitelist += array( - 'data' => array_merge( $common, array( 'value' ) ), - 'time' => array_merge( $common, array( 'datetime' ) ), - - // meta and link are only present when Microdata is allowed anyways - // so we don't bother adding another condition here - // meta and link are only valid for use as Microdata so we do not - // allow the common attributes here. - 'meta' => array( 'itemprop', 'content' ), - 'link' => array( 'itemprop', 'href' ), ); - } - - $staticInitialised = $globalContext; - return $whitelist; } diff --git a/includes/parser/Tidy.php b/includes/parser/Tidy.php index 2ae9e99dff5c..8bd80b2b736a 100644 --- a/includes/parser/Tidy.php +++ b/includes/parser/Tidy.php @@ -41,15 +41,9 @@ class MWTidyWrapper { dechex( mt_rand( 0, 0x7fffffff ) ) . dechex( mt_rand( 0, 0x7fffffff ) ); $this->mMarkerIndex = 0; - // Replace <mw:editsection> elements with placeholders $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX, array( &$this, 'replaceEditSectionLinksCallback' ), $text ); - // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so - // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config - $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext ); - - // Wrap the whole thing in a doctype and body for Tidy. $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'. ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>'. '<head><title>test</title></head><body>'.$wrappedtext.'</body></html>'; @@ -74,13 +68,7 @@ class MWTidyWrapper { * @return string */ public function postprocess( $text ) { - // Revert <html-{link,meta}> back to <{link,meta}> - $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text ); - - // Restore the contents of placeholder tokens - $text = $this->mTokens->replace( $text ); - - return $text; + return $this->mTokens->replace( $text ); } } diff --git a/includes/tidy.conf b/includes/tidy.conf index 6ae8d4515b61..09412f051739 100644 --- a/includes/tidy.conf +++ b/includes/tidy.conf @@ -16,8 +16,3 @@ quiet: yes quote-nbsp: yes fix-backslash: no fix-uri: no - -# Don't strip html5 elements we support -# html-{meta,link} is a hack we use to prevent Tidy from stripping <meta> and <link> used in the body for Microdata -new-empty-tags: html-meta, html-link -new-inline-tags: data, time |