<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[56564] branches/6.3: HTML API: Skip over contents of RAWTEXT elements such as STYLE.</title>
</head>
<body>

<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { white-space: pre-line; overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta" style="font-size: 105%">
<dt style="float: left; width: 6em; font-weight: bold">Revision</dt> <dd><a style="font-weight: bold" href="https://core.trac.wordpress.org/changeset/56564">56564</a><script type="application/ld+json">{"@context":"http://schema.org","@type":"EmailMessage","description":"Review this Commit","action":{"@type":"ViewAction","url":"https://core.trac.wordpress.org/changeset/56564","name":"Review Commit"}}</script></dd>
<dt style="float: left; width: 6em; font-weight: bold">Author</dt> <dd>Bernhard Reiter</dd>
<dt style="float: left; width: 6em; font-weight: bold">Date</dt> <dd>2023-09-13 12:53:32 +0000 (Wed, 13 Sep 2023)</dd>
</dl>

<pre style='padding-left: 1em; margin: 2em 0; border-left: 2px solid #ccc; line-height: 1.25; font-size: 105%; font-family: sans-serif'>HTML API: Skip over contents of RAWTEXT elements such as STYLE.

When encountering elements that imply switching into the RAWTEXT parsing state,
the Tag Processor should skip processing until exiting the RAWTEXT state.

In this patch the Tag Processor does just that, except for the case of the
deprecated XMP element which implies further and more complicated rules.

There's an implicit assumption that the SCRIPT ENABLED flag in HTML parsing
is enabled so that the contents of NOSCRIPT can be skipped. Otherwise, it would
be required to parse the contents of that tag.

Props dmsnell.
Merges <a href="https://core.trac.wordpress.org/changeset/56563">[56563]</a> to the 6.3 branch.
Fixes <a href="https://core.trac.wordpress.org/ticket/59292">#59292</a>.</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#branches63srcwpincludeshtmlapiclasswphtmltagprocessorphp">branches/6.3/src/wp-includes/html-api/class-wp-html-tag-processor.php</a></li>
<li><a href="#branches63testsphpunittestshtmlapiwpHtmlTagProcessorphp">branches/6.3/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="branches63srcwpincludeshtmlapiclasswphtmltagprocessorphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/6.3/src/wp-includes/html-api/class-wp-html-tag-processor.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/6.3/src/wp-includes/html-api/class-wp-html-tag-processor.php     2023-09-13 12:47:25 UTC (rev 56563)
+++ branches/6.3/src/wp-includes/html-api/class-wp-html-tag-processor.php       2023-09-13 12:53:32 UTC (rev 56564)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -242,6 +242,8 @@
</span><span class="cx" style="display: block; padding: 0 10px">  * unquoted values will appear in the output with double-quotes.
</span><span class="cx" style="display: block; padding: 0 10px">  *
</span><span class="cx" style="display: block; padding: 0 10px">  * @since 6.2.0
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive.
+ * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE.
</ins><span class="cx" style="display: block; padding: 0 10px">  */
</span><span class="cx" style="display: block; padding: 0 10px"> class WP_HTML_Tag_Processor {
</span><span class="cx" style="display: block; padding: 0 10px">        /**
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -568,7 +570,14 @@
</span><span class="cx" style="display: block; padding: 0 10px">                         * of the tag name as a pre-check avoids a string allocation when it's not needed.
</span><span class="cx" style="display: block; padding: 0 10px">                         */
</span><span class="cx" style="display: block; padding: 0 10px">                        $t = $this->html[ $this->tag_name_starts_at ];
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-                        if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) {
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+                 if (
+                               ! $this->is_closing_tag &&
+                               (
+                                       'i' === $t || 'I' === $t ||
+                                       'n' === $t || 'N' === $t ||
+                                       's' === $t || 'S' === $t ||
+                                       't' === $t || 'T' === $t
+                               ) ) {
</ins><span class="cx" style="display: block; padding: 0 10px">                                 $tag_name = $this->get_tag();
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px">                                if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -580,6 +589,25 @@
</span><span class="cx" style="display: block; padding: 0 10px">                                ) {
</span><span class="cx" style="display: block; padding: 0 10px">                                        $this->bytes_already_parsed = strlen( $this->html );
</span><span class="cx" style="display: block; padding: 0 10px">                                        return false;
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+                                } elseif (
+                                       (
+                                               'IFRAME' === $tag_name ||
+                                               'NOEMBED' === $tag_name ||
+                                               'NOFRAMES' === $tag_name ||
+                                               'NOSCRIPT' === $tag_name ||
+                                               'STYLE' === $tag_name
+                                       ) &&
+                                       ! $this->skip_rawtext( $tag_name )
+                               ) {
+                                       /*
+                                        * "XMP" should be here too but its rules are more complicated and require the
+                                        * complexity of the HTML Processor (it needs to close out any open P element,
+                                        * meaning it can't be skipped here or else the HTML Processor will lose its
+                                        * place). For now, it can be ignored as it's a rare HTML tag in practice and
+                                        * any normative HTML should be using PRE instead.
+                                        */
+                                       $this->bytes_already_parsed = strlen( $this->html );
+                                       return false;
</ins><span class="cx" style="display: block; padding: 0 10px">                                 }
</span><span class="cx" style="display: block; padding: 0 10px">                        }
</span><span class="cx" style="display: block; padding: 0 10px">                } while ( $already_found < $this->sought_match_offset );
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -710,15 +738,33 @@
</span><span class="cx" style="display: block; padding: 0 10px">                return true;
</span><span class="cx" style="display: block; padding: 0 10px">        }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+        /**
+        * Skips contents of generic rawtext elements.
+        *
+        * @since 6.3.2
+        *
+        * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm
+        *
+        * @param string $tag_name The uppercase tag name which will close the RAWTEXT region.
+        * @return bool Whether an end to the RAWTEXT region was found before the end of the document.
+        */
+       private function skip_rawtext( $tag_name ) {
+               /*
+                * These two functions distinguish themselves on whether character references are
+                * decoded, and since functionality to read the inner markup isn't supported, it's
+                * not necessary to implement these two functions separately.
+                */
+               return $this->skip_rcdata( $tag_name );
+       }
</ins><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px">        /**
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-         * Skips contents of title and textarea tags.
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+  * Skips contents of RCDATA elements, namely title and textarea tags.
</ins><span class="cx" style="display: block; padding: 0 10px">          *
</span><span class="cx" style="display: block; padding: 0 10px">         * @since 6.2.0
</span><span class="cx" style="display: block; padding: 0 10px">         *
</span><span class="cx" style="display: block; padding: 0 10px">         * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
</span><span class="cx" style="display: block; padding: 0 10px">         *
</span><del style="background-color: #fdd; text-decoration:none; display:block; padding: 0 10px">-         * @param string $tag_name The lowercase tag name which will close the RCDATA region.
</del><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+  * @param string $tag_name The uppercase tag name which will close the RCDATA region.
</ins><span class="cx" style="display: block; padding: 0 10px">          * @return bool Whether an end to the RCDATA region was found before the end of the document.
</span><span class="cx" style="display: block; padding: 0 10px">         */
</span><span class="cx" style="display: block; padding: 0 10px">        private function skip_rcdata( $tag_name ) {
</span></span></pre></div>
<a id="branches63testsphpunittestshtmlapiwpHtmlTagProcessorphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: branches/6.3/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- branches/6.3/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php  2023-09-13 12:47:25 UTC (rev 56563)
+++ branches/6.3/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php    2023-09-13 12:53:32 UTC (rev 56564)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -1872,6 +1872,43 @@
</span><span class="cx" style="display: block; padding: 0 10px">        }
</span><span class="cx" style="display: block; padding: 0 10px"> 
</span><span class="cx" style="display: block; padding: 0 10px">        /**
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+         * @ticket 59292
+        *
+        * @covers WP_HTML_Tag_Processor::next_tag
+        *
+        * @dataProvider data_next_tag_ignores_contents_of_rawtext_tags
+        *
+        * @param string $rawtext_element_then_target_node HTML starting with a RAWTEXT-specifying element such as STYLE,
+        *                                                 then an element afterward containing the "target" attribute.
+        */
+       public function test_next_tag_ignores_contents_of_rawtext_tags( $rawtext_element_then_target_node ) {
+               $processor = new WP_HTML_Tag_Processor( $rawtext_element_then_target_node );
+               $processor->next_tag();
+
+               $processor->next_tag();
+               $this->assertNotNull(
+                       $processor->get_attribute( 'target' ),
+                       "Expected to find element with target attribute but found {$processor->get_tag()} instead."
+               );
+       }
+
+       /**
+        * Data provider.
+        *
+        * @return array[].
+        */
+       public function data_next_tag_ignores_contents_of_rawtext_tags() {
+               return array(
+                       'IFRAME'           => array( '<iframe><section>Inside</section></iframe><section target>' ),
+                       'NOEMBED'          => array( '<noembed><p></p></noembed><div target>' ),
+                       'NOFRAMES'         => array( '<noframes><p>Check the rules here.</p></noframes><div target>' ),
+                       'NOSCRIPT'         => array( '<noscript><span>This assumes that scripting mode is enabled.</span></noscript><p target>' ),
+                       'STYLE'            => array( '<style>* { margin: 0 }</style><div target>' ),
+                       'STYLE hiding DIV' => array( '<style>li::before { content: "<div non-target>" }</style><div target>' ),
+               );
+       }
+
+       /**
</ins><span class="cx" style="display: block; padding: 0 10px">          * Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
</span><span class="cx" style="display: block; padding: 0 10px">         *
</span><span class="cx" style="display: block; padding: 0 10px">         * @ticket 58007
</span></span></pre>
</div>
</div>

</body>
</html>