<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[33380] branches/4.1: Shortcodes: Improve the reliablity of shortcodes inside HTML tags.</title>
</head>
<body>

<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta" style="font-size: 105%">
<dt style="float: left; width: 6em; font-weight: bold">Revision</dt> <dd><a style="font-weight: bold" href="https://core.trac.wordpress.org/changeset/33380">33380</a><script type="application/ld+json">{"@context":"http://schema.org","@type":"EmailMessage","description":"Review this Commit","action":{"@type":"ViewAction","url":"https://core.trac.wordpress.org/changeset/33380","name":"Review Commit"}}</script></dd>
<dt style="float: left; width: 6em; font-weight: bold">Author</dt> <dd>pento</dd>
<dt style="float: left; width: 6em; font-weight: bold">Date</dt> <dd>2015-07-23 04:36:55 +0000 (Thu, 23 Jul 2015)</dd>
</dl>

<pre style='padding-left: 1em; margin: 2em 0; border-left: 2px solid #ccc; line-height: 1.25; font-size: 105%; font-family: sans-serif'>Shortcodes: Improve the reliablity of shortcodes inside HTML tags.

Merge of <a href="https://core.trac.wordpress.org/changeset/33359">[33359]</a> to the 4.1 branch.

Props miqrogroove.

See <a href="https://core.trac.wordpress.org/ticket/15694">#15694</a>.</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#branches41srcwpincludesclasswpembedphp">branches/4.1/src/wp-includes/class-wp-embed.php</a></li>
<li><a href="#branches41srcwpincludesformattingphp">branches/4.1/src/wp-includes/formatting.php</a></li>
<li><a href="#branches41srcwpincludesksesphp">branches/4.1/src/wp-includes/kses.php</a></li>
<li><a href="#branches41srcwpincludesshortcodesphp">branches/4.1/src/wp-includes/shortcodes.php</a></li>
<li><a href="#branches41testsphpunittestsksesphp">branches/4.1/tests/phpunit/tests/kses.php</a></li>
<li><a href="#branches41testsphpunittestsshortcodephp">branches/4.1/tests/phpunit/tests/shortcode.php</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="branches41srcwpincludesclasswpembedphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/4.1/src/wp-includes/class-wp-embed.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/4.1/src/wp-includes/class-wp-embed.php   2015-07-23 04:26:36 UTC (rev 33379)
+++ branches/4.1/src/wp-includes/class-wp-embed.php     2015-07-23 04:36:55 UTC (rev 33380)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -57,7 +57,7 @@
</span><span class="cx" style="display: block; padding: 0 10px">                add_shortcode( 'embed', array( $this, 'shortcode' ) );
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px">                // Do the shortcode (only the [embed] one is registered)
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                $content = do_shortcode( $content );
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+         $content = do_shortcode( $content, true );
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px">                // Put the original shortcodes back
</span><span class="cx" style="display: block; padding: 0 10px">                $shortcode_tags = $orig_shortcode_tags;
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -312,6 +312,10 @@
</span><span class="cx" style="display: block; padding: 0 10px">         * @return string Potentially modified $content.
</span><span class="cx" style="display: block; padding: 0 10px">         */
</span><span class="cx" style="display: block; padding: 0 10px">        public function autoembed( $content ) {
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+                // Strip newlines from all elements.
+               $content = wp_replace_in_html_tags( $content, array( "\n" => " " ) );
+
+               // Find URLs that are on their own line.
</ins><span class="cx" style="display: block; padding: 0 10px">                 return preg_replace_callback( '|^\s*(https?://[^\s"]+)\s*$|im', array( $this, 'autoembed_callback' ), $content );
</span><span class="cx" style="display: block; padding: 0 10px">        }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span></span></pre></div>
<a id="branches41srcwpincludesformattingphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/4.1/src/wp-includes/formatting.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/4.1/src/wp-includes/formatting.php       2015-07-23 04:26:36 UTC (rev 33379)
+++ branches/4.1/src/wp-includes/formatting.php 2015-07-23 04:36:55 UTC (rev 33380)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -410,6 +410,9 @@
</span><span class="cx" style="display: block; padding: 0 10px">        $pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee);
</span><span class="cx" style="display: block; padding: 0 10px">        $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+        // Strip newlines from all elements.
+       $pee = wp_replace_in_html_tags( $pee, array( "\n" => " " ) );
+
</ins><span class="cx" style="display: block; padding: 0 10px">         if ( strpos( $pee, '<option' ) !== false ) {
</span><span class="cx" style="display: block; padding: 0 10px">                // no P/BR around option
</span><span class="cx" style="display: block; padding: 0 10px">                $pee = preg_replace( '|\s*<option|', '<option', $pee );
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -465,6 +468,74 @@
</span><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px"> /**
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ * Replace characters or phrases within HTML elements only.
+ *
+ * @since 4.2.3
+ *
+ * @param string $haystack The text which has to be formatted.
+ * @param array $replace_pairs In the form array('from' => 'to', ...).
+ * @return string The formatted text.
+ */
+function wp_replace_in_html_tags( $haystack, $replace_pairs ) {
+       // Find all elements.
+       $comments =
+                 '!'           // Start of comment, after the <.
+               . '(?:'         // Unroll the loop: Consume everything until --> is found.
+               .     '-(?!->)' // Dash not followed by end of comment.
+               .     '[^\-]*+' // Consume non-dashes.
+               . ')*+'         // Loop possessively.
+               . '(?:-->)?';   // End of comment. If not found, match all input.
+
+       $regex =
+                 '/('              // Capture the entire match.
+               .     '<'           // Find start of element.
+               .     '(?(?=!--)'   // Is this a comment?
+               .         $comments // Find end of comment.
+               .     '|'
+               .         '[^>]*>?' // Find end of element. If not found, match all input.
+               .     ')'
+               . ')/s';
+
+       $textarr = preg_split( $regex, $haystack, -1, PREG_SPLIT_DELIM_CAPTURE );
+       $changed = false;
+
+       // Optimize when searching for one item.
+       if ( 1 === count( $replace_pairs ) ) {
+               // Extract $needle and $replace.
+               foreach ( $replace_pairs as $needle => $replace );
+
+               // Loop through delimeters (elements) only.
+               for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { 
+                       if ( false !== strpos( $textarr[$i], $needle ) ) {
+                               $textarr[$i] = str_replace( $needle, $replace, $textarr[$i] );
+                               $changed = true;
+                       }
+               }
+       } else {
+               // Extract all $needles.
+               $needles = array_keys( $replace_pairs );
+
+               // Loop through delimeters (elements) only.
+               for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { 
+                       foreach ( $needles as $needle ) {
+                               if ( false !== strpos( $textarr[$i], $needle ) ) {
+                                       $textarr[$i] = strtr( $textarr[$i], $replace_pairs );
+                                       $changed = true;
+                                       // After one strtr() break out of the foreach loop and look at next element.
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       if ( $changed ) {
+               $haystack = implode( $textarr );
+       }
+
+       return $haystack;
+}
+
+/**
</ins><span class="cx" style="display: block; padding: 0 10px">  * Newline preservation help function for wpautop
</span><span class="cx" style="display: block; padding: 0 10px">  *
</span><span class="cx" style="display: block; padding: 0 10px">  * @since 3.1.0
</span></span></pre></div>
<a id="branches41srcwpincludesksesphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/4.1/src/wp-includes/kses.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/4.1/src/wp-includes/kses.php     2015-07-23 04:26:36 UTC (rev 33379)
+++ branches/4.1/src/wp-includes/kses.php       2015-07-23 04:36:55 UTC (rev 33380)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -528,6 +528,82 @@
</span><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px"> /**
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ * Filters one attribute only and ensures its value is allowed.
+ *
+ * This function has the advantage of being more secure than esc_attr() and can
+ * escape data in some situations where wp_kses() must strip the whole attribute.
+ *
+ * @since 4.2.3
+ *
+ * @param string $string The 'whole' attribute, including name and value.
+ * @param string $element The element name to which the attribute belongs.
+ * @return string Filtered attribute.
+ */
+function wp_kses_one_attr( $string, $element ) {
+       $uris = array('xmlns', 'profile', 'href', 'src', 'cite', 'classid', 'codebase', 'data', 'usemap', 'longdesc', 'action');
+       $allowed_html = wp_kses_allowed_html( 'post' );
+       $allowed_protocols = wp_allowed_protocols();
+       $string = wp_kses_no_null( $string, array( 'slash_zero' => 'keep' ) );
+       $string = wp_kses_js_entities( $string );
+       $string = wp_kses_normalize_entities( $string );
+
+       // Preserve leading and trailing whitespace.
+       $matches = array();
+       preg_match('/^\s*/', $string, $matches);
+       $lead = $matches[0];
+       preg_match('/\s*$/', $string, $matches);
+       $trail = $matches[0];
+       if ( empty( $trail ) ) {
+               $string = substr( $string, strlen( $lead ) );
+       } else {
+               $string = substr( $string, strlen( $lead ), -strlen( $trail ) );
+       }
+       
+       // Parse attribute name and value from input.
+       $split = preg_split( '/\s*=\s*/', $string, 2 );
+       $name = $split[0];
+       if ( count( $split ) == 2 ) {
+               $value = $split[1];
+
+               // Remove quotes surrounding $value.
+               // Also guarantee correct quoting in $string for this one attribute.
+               if ( '' == $value ) {
+                       $quote = '';
+               } else {
+                       $quote = $value[0];
+               }
+               if ( '"' == $quote || "'" == $quote ) {
+                       if ( substr( $value, -1 ) != $quote ) {
+                               return '';
+                       }
+                       $value = substr( $value, 1, -1 );
+               } else {
+                       $quote = '"';
+               }
+
+               // Sanitize quotes and angle braces.
+               $value = htmlspecialchars( $value, ENT_QUOTES, null, false );
+
+               // Sanitize URI values.
+               if ( in_array( strtolower( $name ), $uris ) ) {
+                       $value = wp_kses_bad_protocol( $value, $allowed_protocols );
+               }
+
+               $string = "$name=$quote$value$quote";
+               $vless = 'n';
+       } else {
+               $value = '';
+               $vless = 'y';
+       }
+       
+       // Sanitize attribute by name.
+       wp_kses_attr_check( $name, $value, $string, $vless, $element, $allowed_html );
+
+       // Restore whitespace.
+       return $lead . $string . $trail;
+}
+
+/**
</ins><span class="cx" style="display: block; padding: 0 10px">  * Return a list of allowed tags and attributes for a given context.
</span><span class="cx" style="display: block; padding: 0 10px">  *
</span><span class="cx" style="display: block; padding: 0 10px">  * @since 3.5.0
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -747,50 +823,63 @@
</span><span class="cx" style="display: block; padding: 0 10px">        # Go through $attrarr, and save the allowed attributes for this element
</span><span class="cx" style="display: block; padding: 0 10px">        # in $attr2
</span><span class="cx" style="display: block; padding: 0 10px">        $attr2 = '';
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+        foreach ( $attrarr as $arreach ) {
+               if ( wp_kses_attr_check( $arreach['name'], $arreach['value'], $arreach['whole'], $arreach['vless'], $element, $allowed_html ) ) {
+                       $attr2 .= ' '.$arreach['whole'];
+               }
+       }
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-        $allowed_attr = $allowed_html[strtolower($element)];
-       foreach ($attrarr as $arreach) {
-               if ( ! isset( $allowed_attr[strtolower($arreach['name'])] ) )
-                       continue; # the attribute is not allowed
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ # Remove any "<" or ">" characters
+       $attr2 = preg_replace('/[<>]/', '', $attr2);
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                $current = $allowed_attr[strtolower($arreach['name'])];
-               if ( $current == '' )
-                       continue; # the attribute is not allowed
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ return "<$element$attr2$xhtml_slash>";
+}
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                if ( strtolower( $arreach['name'] ) == 'style' ) {
-                       $orig_value = $arreach['value'];
-                       $value = safecss_filter_attr( $orig_value );
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+/**
+ * Determine whether an attribute is allowed.
+ *
+ * @since 4.2.3
+ *
+ * @param string $name The attribute name. Returns empty string when not allowed.
+ * @param string $value The attribute value. Returns a filtered value.
+ * @param string $whole The name=value input. Returns filtered input.
+ * @param string $vless 'y' when attribute like "enabled", otherwise 'n'.
+ * @param string $element The name of the element to which this attribute belongs.
+ * @param array $allowed_html The full list of allowed elements and attributes.
+ * @return bool Is the attribute allowed?
+ */
+function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowed_html ) {
+       $allowed_attr = $allowed_html[strtolower( $element )];
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                        if ( empty( $value ) )
-                               continue;
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ $name_low = strtolower( $name );
+       if ( ! isset( $allowed_attr[$name_low] ) || '' == $allowed_attr[$name_low] ) {
+               $name = $value = $whole = '';
+               return false;
+       }
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                        $arreach['value'] = $value;
-                       $arreach['whole'] = str_replace( $orig_value, $value, $arreach['whole'] );
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ if ( 'style' == $name_low ) {
+               $new_value = safecss_filter_attr( $value );
+
+               if ( empty( $new_value ) ) {
+                       $name = $value = $whole = '';
+                       return false;
</ins><span class="cx" style="display: block; padding: 0 10px">                 }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                if ( ! is_array($current) ) {
-                       $attr2 .= ' '.$arreach['whole'];
-               # there are no checks
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+         $whole = str_replace( $value, $new_value, $whole );
+               $value = $new_value;
+       }
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                } else {
-                       # there are some checks
-                       $ok = true;
-                       foreach ($current as $currkey => $currval) {
-                               if ( ! wp_kses_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval) ) {
-                                       $ok = false;
-                                       break;
-                               }
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ if ( is_array( $allowed_attr[$name_low] ) ) {
+               // there are some checks
+               foreach ( $allowed_attr[$name_low] as $currkey => $currval ) {
+                       if ( ! wp_kses_check_attr_val( $value, $vless, $currkey, $currval ) ) {
+                               $name = $value = $whole = '';
+                               return false;
</ins><span class="cx" style="display: block; padding: 0 10px">                         }
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+                }
+       }
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                        if ( $ok )
-                               $attr2 .= ' '.$arreach['whole']; # it passed them
-               } # if !is_array($current)
-       } # foreach
-
-       # Remove any "<" or ">" characters
-       $attr2 = preg_replace('/[<>]/', '', $attr2);
-
-       return "<$element$attr2$xhtml_slash>";
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ return true;
</ins><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px"> /**
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -923,6 +1012,109 @@
</span><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px"> /**
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ * Finds all attributes of an HTML element.
+ *
+ * Does not modify input.  May return "evil" output.
+ *
+ * Based on wp_kses_split2() and wp_kses_attr()
+ *
+ * @since 4.2.3
+ *
+ * @param string $element HTML element/tag
+ * @return array|bool List of attributes found in $element. Returns false on failure.
+ */
+function wp_kses_attr_parse( $element ) {
+       $valid = preg_match('%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches);
+       if ( 1 !== $valid ) {
+               return false;
+       }
+
+       $begin =  $matches[1];
+       $slash =  $matches[2];
+       $elname = $matches[3];
+       $attr =   $matches[4];
+       $end =    $matches[5];
+
+       if ( '' !== $slash ) {
+               // Closing elements do not get parsed.
+               return false;
+       }
+
+       // Is there a closing XHTML slash at the end of the attributes?
+       if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) {
+               $xhtml_slash = $matches[0];
+               $attr = substr( $attr, 0, -strlen( $xhtml_slash ) );
+       } else {
+               $xhtml_slash = '';
+       }
+       
+       // Split it
+       $attrarr = wp_kses_hair_parse( $attr );
+       if ( false === $attrarr ) {
+               return false;
+       }
+
+       // Make sure all input is returned by adding front and back matter.
+       array_unshift( $attrarr, $begin . $slash . $elname );
+       array_push( $attrarr, $xhtml_slash . $end );
+       
+       return $attrarr;
+}
+
+/**
+ * Builds an attribute list from string containing attributes.
+ *
+ * Does not modify input.  May return "evil" output.
+ * In case of unexpected input, returns false instead of stripping things.
+ *
+ * Based on wp_kses_hair() but does not return a multi-dimensional array.
+ *
+ * @since 4.2.3
+ *
+ * @param string $attr Attribute list from HTML element to closing HTML element tag
+ * @return array|bool List of attributes found in $attr. Returns false on failure.
+ */
+function wp_kses_hair_parse( $attr ) {
+       if ( '' === $attr ) {
+               return array();
+       }
+
+       $regex =
+         '(?:'
+       .     '[-a-zA-Z:]+'   // Attribute name.
+       . '|'
+       .     '\[\[?[^\[\]]+\]\]?' // Shortcode in the name position implies unfiltered_html.
+       . ')'
+       . '(?:'               // Attribute value.
+       .     '\s*=\s*'       // All values begin with '='
+       .     '(?:'
+       .         '"[^"]*"'   // Double-quoted
+       .     '|'
+       .         "'[^']*'"   // Single-quoted
+       .     '|'
+       .         '[^\s"\']+' // Non-quoted
+       .         '(?:\s|$)'  // Must have a space
+       .     ')'
+       . '|'
+       .     '(?:\s|$)'      // If attribute has no value, space is required.
+       . ')'
+       . '\s*';              // Trailing space is optional except as mentioned above.
+
+       // Although it is possible to reduce this procedure to a single regexp,
+       // we must run that regexp twice to get exactly the expected result.
+
+       $validation = "%^($regex)+$%";
+       $extraction = "%$regex%";
+
+       if ( 1 === preg_match( $validation, $attr ) ) {
+               preg_match_all( $extraction, $attr, $attrarr );
+               return $attrarr[0];
+       } else {
+               return false;
+       }
+}
+
+/**
</ins><span class="cx" style="display: block; padding: 0 10px">  * Performs different checks for attribute values.
</span><span class="cx" style="display: block; padding: 0 10px">  *
</span><span class="cx" style="display: block; padding: 0 10px">  * The currently implemented checks are "maxlen", "minlen", "maxval", "minval"
</span></span></pre></div>
<a id="branches41srcwpincludesshortcodesphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/4.1/src/wp-includes/shortcodes.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/4.1/src/wp-includes/shortcodes.php       2015-07-23 04:26:36 UTC (rev 33379)
+++ branches/4.1/src/wp-includes/shortcodes.php 2015-07-23 04:36:55 UTC (rev 33380)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -181,9 +181,10 @@
</span><span class="cx" style="display: block; padding: 0 10px">  * @uses $shortcode_tags
</span><span class="cx" style="display: block; padding: 0 10px">  *
</span><span class="cx" style="display: block; padding: 0 10px">  * @param string $content Content to search for shortcodes
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ * @param bool $ignore_html When true, shortcodes inside HTML elements will be skipped.
</ins><span class="cx" style="display: block; padding: 0 10px">  * @return string Content with shortcodes filtered out.
</span><span class="cx" style="display: block; padding: 0 10px">  */
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-function do_shortcode($content) {
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+function do_shortcode( $content, $ignore_html = false ) {
</ins><span class="cx" style="display: block; padding: 0 10px">         global $shortcode_tags;
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px">        if ( false === strpos( $content, '[' ) ) {
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -193,8 +194,24 @@
</span><span class="cx" style="display: block; padding: 0 10px">        if (empty($shortcode_tags) || !is_array($shortcode_tags))
</span><span class="cx" style="display: block; padding: 0 10px">                return $content;
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+        $tagnames = array_keys($shortcode_tags);
+       $tagregexp = join( '|', array_map('preg_quote', $tagnames) );
+       $pattern = "/\\[($tagregexp)/s";
+
+       if ( 1 !== preg_match( $pattern, $content ) ) {
+               // Avoids parsing HTML when there are no shortcodes or embeds anyway.
+               return $content;
+       }
+
+       $content = do_shortcodes_in_html_tags( $content, $ignore_html );
+
</ins><span class="cx" style="display: block; padding: 0 10px">         $pattern = get_shortcode_regex();
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-        return preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ $content = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $content );
+       
+       // Always restore square braces so we don't break things like <!--[if IE ]>
+       $content = unescape_invalid_shortcodes( $content );
+       
+       return $content;
</ins><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px"> /**
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -288,6 +305,141 @@
</span><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px"> /**
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ * Search only inside HTML elements for shortcodes and process them.
+ *
+ * Any [ or ] characters remaining inside elements will be HTML encoded
+ * to prevent interference with shortcodes that are outside the elements.
+ * Assumes $content processed by KSES already.  Users with unfiltered_html
+ * capability may get unexpected output if angle braces are nested in tags.
+ *
+ * @since 4.2.3
+ *
+ * @param string $content Content to search for shortcodes
+ * @param bool $ignore_html When true, all square braces inside elements will be encoded.
+ * @return string Content with shortcodes filtered out.
+ */
+function do_shortcodes_in_html_tags( $content, $ignore_html ) {
+       // Normalize entities in unfiltered HTML before adding placeholders.
+       $trans = array( '&#91;' => '&#091;', '&#93;' => '&#093;' );
+       $content = strtr( $content, $trans );
+       $trans = array( '[' => '&#91;', ']' => '&#93;' );
+       
+       $pattern = get_shortcode_regex();
+
+       $comment_regex =
+                 '!'           // Start of comment, after the <.
+               . '(?:'         // Unroll the loop: Consume everything until --> is found.
+               .     '-(?!->)' // Dash not followed by end of comment.
+               .     '[^\-]*+' // Consume non-dashes.
+               . ')*+'         // Loop possessively.
+               . '(?:-->)?';   // End of comment. If not found, match all input.
+
+       $regex =
+                 '/('                   // Capture the entire match.
+               .     '<'                // Find start of element.
+               .     '(?(?=!--)'        // Is this a comment?
+               .         $comment_regex // Find end of comment.
+               .     '|'
+               .         '[^>]*>?'      // Find end of element. If not found, match all input.
+               .     ')'
+               . ')/s';
+
+       $textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
+
+       foreach ( $textarr as &$element ) {
+               if ( '<' !== $element[0] ) {
+                       continue;
+               }
+
+               $noopen = false === strpos( $element, '[' );
+               $noclose = false === strpos( $element, ']' );
+               if ( $noopen || $noclose ) {
+                       // This element does not contain shortcodes.
+                       if ( $noopen xor $noclose ) {
+                               // Need to encode stray [ or ] chars.
+                               $element = strtr( $element, $trans );
+                       }
+                       continue;
+               }
+
+               if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
+                       // Encode all [ and ] chars.
+                       $element = strtr( $element, $trans );
+                       continue;
+               }
+
+               $attributes = wp_kses_attr_parse( $element );
+               if ( false === $attributes ) {
+                       // Looks like we found some crazy unfiltered HTML.  Skipping it for sanity.
+                       $element = strtr( $element, $trans );
+                       continue;
+               }
+               
+               // Get element name
+               $front = array_shift( $attributes );
+               $back = array_pop( $attributes );
+               $matches = array();
+               preg_match('%[a-zA-Z0-9]+%', $front, $matches);
+               $elname = $matches[0];
+               
+               // Look for shortcodes in each attribute separately.
+               foreach ( $attributes as &$attr ) {
+                       $open = strpos( $attr, '[' );
+                       $close = strpos( $attr, ']' );
+                       if ( false === $open || false === $close ) {
+                               continue; // Go to next attribute.  Square braces will be escaped at end of loop.
+                       }
+                       $double = strpos( $attr, '"' );
+                       $single = strpos( $attr, "'" );
+                       if ( ( false === $single || $open < $single ) && ( false === $double || $open < $double ) ) {
+                               // $attr like '[shortcode]' or 'name = [shortcode]' implies unfiltered_html.
+                               // In this specific situation we assume KSES did not run because the input
+                               // was written by an administrator, so we should avoid changing the output
+                               // and we do not need to run KSES here.
+                               $attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr );
+                       } else {
+                               // $attr like 'name = "[shortcode]"' or "name = '[shortcode]'"
+                               // We do not know if $content was unfiltered. Assume KSES ran before shortcodes.
+                               $count = 0;
+                               $new_attr = preg_replace_callback( "/$pattern/s", 'do_shortcode_tag', $attr, -1, $count );
+                               if ( $count > 0 ) {
+                                       // Sanitize the shortcode output using KSES.
+                                       $new_attr = wp_kses_one_attr( $new_attr, $elname );
+                                       if ( '' !== $new_attr ) {
+                                               // The shortcode is safe to use now.
+                                               $attr = $new_attr;
+                                       }
+                               }
+                       }
+               }
+               $element = $front . implode( '', $attributes ) . $back;
+               
+               // Now encode any remaining [ or ] chars.
+               $element = strtr( $element, $trans );
+       }
+       
+       $content = implode( '', $textarr );
+       
+       return $content;
+}
+
+/**
+ * Remove placeholders added by do_shortcodes_in_html_tags().
+ *
+ * @since 4.2.3
+ *
+ * @param string $content Content to search for placeholders.
+ * @return string Content with placeholders removed.
+ */
+function unescape_invalid_shortcodes( $content ) {
+        // Clean up entire string, avoids re-parsing HTML.
+        $trans = array( '&#91;' => '[', '&#93;' => ']' );
+        $content = strtr( $content, $trans );
+        
+        return $content;
+}
+
+/**
</ins><span class="cx" style="display: block; padding: 0 10px">  * Retrieve all attributes from the shortcodes tag.
</span><span class="cx" style="display: block; padding: 0 10px">  *
</span><span class="cx" style="display: block; padding: 0 10px">  * The attributes list has the attribute name as the key and the value of the
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -386,9 +538,15 @@
</span><span class="cx" style="display: block; padding: 0 10px">        if (empty($shortcode_tags) || !is_array($shortcode_tags))
</span><span class="cx" style="display: block; padding: 0 10px">                return $content;
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+        $content = do_shortcodes_in_html_tags( $content, true );
+
</ins><span class="cx" style="display: block; padding: 0 10px">         $pattern = get_shortcode_regex();
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+        $content = preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-        return preg_replace_callback( "/$pattern/s", 'strip_shortcode_tag', $content );
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ // Always restore square braces so we don't break things like <!--[if IE ]>
+       $content = unescape_invalid_shortcodes( $content );
+       
+       return $content;
</ins><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px"> function strip_shortcode_tag( $m ) {
</span></span></pre></div>
<a id="branches41testsphpunittestsksesphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/4.1/tests/phpunit/tests/kses.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/4.1/tests/phpunit/tests/kses.php 2015-07-23 04:26:36 UTC (rev 33379)
+++ branches/4.1/tests/phpunit/tests/kses.php   2015-07-23 04:36:55 UTC (rev 33380)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -411,4 +411,182 @@
</span><span class="cx" style="display: block; padding: 0 10px">                        ),
</span><span class="cx" style="display: block; padding: 0 10px">                );
</span><span class="cx" style="display: block; padding: 0 10px">        }
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+
+       /**
+        * Test new function wp_kses_hair_parse().
+        *
+        * @dataProvider data_hair_parse
+        */
+       function test_hair_parse( $input, $output ) {
+               return $this->assertEquals( $output, wp_kses_hair_parse( $input ) );
+       }
+
+       function data_hair_parse() {
+               return array(
+                       array(
+                               'title="hello" href="#" id="my_id" ',
+                               array( 'title="hello" ', 'href="#" ', 'id="my_id" ' ),
+                       ),
+                       array(
+                               '[shortcode attr="value"] href="http://www.google.com/"title="moo"disabled',
+                               array( '[shortcode attr="value"] ', 'href="http://www.google.com/"', 'title="moo"', 'disabled' ),
+                       ),
+                       array(
+                               '',
+                               array(),
+                       ),
+                       array(
+                               'a',
+                               array( 'a' ),
+                       ),
+                       array(
+                               'title="hello"disabled href=# id=\'my_id\'',
+                               array( 'title="hello"', 'disabled ', 'href=# ', "id='my_id'" ),
+                       ),
+                       array(
+                               '     ', // Calling function is expected to strip leading whitespace.
+                               false,
+                       ),
+                       array(
+                               'abcd=abcd"abcd"',
+                               false,
+                       ),
+                       array(
+                               "array[1]='z'z'z'z",
+                               false,
+                       ),
+               );
+       }
+
+       /**
+        * Test new function wp_kses_attr_parse().
+        *
+        * @dataProvider data_attr_parse
+        */
+       function test_attr_parse( $input, $output ) {
+               return $this->assertEquals( $output, wp_kses_attr_parse( $input ) );
+       }
+
+       function data_attr_parse() {
+               return array(
+                       array(
+                               '<a title="hello" href="#" id="my_id" >',
+                               array( '<a ', 'title="hello" ', 'href="#" ', 'id="my_id" ', '>' ),
+                       ),
+                       array(
+                               '<a [shortcode attr="value"] href="http://www.google.com/"title="moo"disabled>',
+                               array( '<a ', '[shortcode attr="value"] ', 'href="http://www.google.com/"', 'title="moo"', 'disabled', '>' ),
+                       ),
+                       array(
+                               '',
+                               false,
+                       ),
+                       array(
+                               'a',
+                               false,
+                       ),
+                       array(
+                               '<a>',
+                               array( '<a', '>' ),
+                       ),
+                       array(
+                               '<a%%&&**>',
+                               false,
+                       ),
+                       array(
+                               '<a title="hello"disabled href=# id=\'my_id\'>',
+                               array( '<a ', 'title="hello"', 'disabled ', 'href=# ', "id='my_id'", ">" ),
+                       ),
+                       array(
+                               '<a     >',
+                               array( '<a     ', '>' ),
+                       ),
+                       array(
+                               '<a abcd=abcd"abcd">',
+                               false,
+                       ),
+                       array(
+                               "<a array[1]='z'z'z'z>",
+                               false,
+                       ),
+                       array(
+                               '<img title="hello" src="#" id="my_id" />',
+                               array( '<img ', 'title="hello" ', 'src="#" ', 'id="my_id"', ' />' ),
+                       ),
+               );
+       }
+
+       /**
+        * Test new function wp_kses_one_attr().
+        *
+        * @dataProvider data_one_attr
+        */
+       function test_one_attr( $element, $input, $output ) {
+               return $this->assertEquals( $output, wp_kses_one_attr( $input, $element ) );
+       }
+
+       function data_one_attr() {
+               return array(
+                       array(
+                               'a',
+                               ' title="hello" ',
+                               ' title="hello" ',
+                       ),
+                       array(
+                               'a',
+                               'title  =  "hello"',
+                               'title="hello"',
+                       ),
+                       array(
+                               'a',
+                               "title='hello'",
+                               "title='hello'",
+                       ),
+                       array(
+                               'a',
+                               'title=hello',
+                               'title="hello"',
+                       ),
+                       array(
+                               'a',
+                               'href="javascript:alert(1)"',
+                               'href="alert(1)"',
+                       ),
+                       array(
+                               'a',
+                               'style ="style "',
+                               'style="style"',
+                       ),
+                       array(
+                               'a',
+                               'style="style "',
+                               'style="style"',
+                       ),
+                       array(
+                               'a',
+                               'style ="style ="',
+                               '',
+                       ),
+                       array(
+                               'img',
+                               'src="mypic.jpg"',
+                               'src="mypic.jpg"',
+                       ),
+                       array(
+                               'img',
+                               'onerror=alert(1)',
+                               '',
+                       ),
+                       array(
+                               'img',
+                               'title=>',
+                               'title="&gt;"',
+                       ),
+                       array(
+                               'img',
+                               'title="&garbage";"',
+                               'title="&amp;garbage&quot;;"',
+                       ),
+               );
+       }
</ins><span class="cx" style="display: block; padding: 0 10px"> }
</span></span></pre></div>
<a id="branches41testsphpunittestsshortcodephp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/4.1/tests/phpunit/tests/shortcode.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/4.1/tests/phpunit/tests/shortcode.php    2015-07-23 04:26:36 UTC (rev 33379)
+++ branches/4.1/tests/phpunit/tests/shortcode.php      2015-07-23 04:36:55 UTC (rev 33380)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -386,6 +386,82 @@
</span><span class="cx" style="display: block; padding: 0 10px">        }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px">        /**
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+         * Check for bugginess using normal input with latest patches.
+        *
+        * @dataProvider data_escaping
+        */
+       function test_escaping( $input, $output ) {
+               return $this->assertEquals( $output, do_shortcode( $input ) );
+       }
+
+       function data_escaping() {
+               return array(
+                       array(
+                               '<!--[if lt IE 7]>',
+                               '<!--[if lt IE 7]>',
+                       ),
+                       array(
+                               '[gallery title="<div>hello</div>"]',
+                               '',
+                       ),
+                       array(
+                               '[caption caption="test" width="2"]<div>hello</div>[/caption]',
+                               '<div style="width: 12px" class="wp-caption alignnone"><div>hello</div><p class="wp-caption-text">test</p></div>',
+                       ),
+                       array(
+                               '<div [gallery]>',
+                               '<div >',
+                       ),
+                       array(
+                               '<div [[gallery]]>',
+                               '<div [gallery]>',
+                       ),
+                       array(
+                               '[gallery]<div>Hello</div>[/gallery]',
+                               '',
+                       ),
+               );
+       }
+
+       /**
+        * Check for bugginess using normal input with latest patches.
+        *
+        * @dataProvider data_escaping2
+        */
+       function test_escaping2( $input, $output ) {
+               return $this->assertEquals( $output, strip_shortcodes( $input ) );
+       }
+
+       function data_escaping2() {
+               return array(
+                       array(
+                               '<!--[if lt IE 7]>',
+                               '<!--[if lt IE 7]>',
+                       ),
+                       array(
+                               '[gallery title="<div>hello</div>"]',
+                               '',
+                       ),
+                       array(
+                               '[caption caption="test" width="2"]<div>hello</div>[/caption]',
+                               '',
+                       ),
+                       array(
+                               '<div [gallery]>', // Shortcodes will never be stripped inside elements.
+                               '<div [gallery]>',
+                       ),
+                       array(
+                               '<div [[gallery]]>', // Shortcodes will never be stripped inside elements.
+                               '<div [[gallery]]>',
+                       ),
+                       array(
+                               '[gallery]<div>Hello</div>[/gallery]',
+                               '',
+                       ),
+               );
+       }
+
+       /**
</ins><span class="cx" style="display: block; padding: 0 10px">          * @ticket 26343
</span><span class="cx" style="display: block; padding: 0 10px">         */
</span><span class="cx" style="display: block; padding: 0 10px">        function test_has_shortcode() {
</span></span></pre>
</div>
</div>

</body>
</html>