<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[56493] trunk: HTML API: Stop processing HTML when encountering unsupported markup.</title>
</head>
<body>
<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { white-space: pre-line; overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta" style="font-size: 105%">
<dt style="float: left; width: 6em; font-weight: bold">Revision</dt> <dd><a style="font-weight: bold" href="https://core.trac.wordpress.org/changeset/56493">56493</a><script type="application/ld+json">{"@context":"http://schema.org","@type":"EmailMessage","description":"Review this Commit","action":{"@type":"ViewAction","url":"https://core.trac.wordpress.org/changeset/56493","name":"Review Commit"}}</script></dd>
<dt style="float: left; width: 6em; font-weight: bold">Author</dt> <dd>Bernhard Reiter</dd>
<dt style="float: left; width: 6em; font-weight: bold">Date</dt> <dd>2023-08-30 15:37:33 +0000 (Wed, 30 Aug 2023)</dd>
</dl>
<pre style='padding-left: 1em; margin: 2em 0; border-left: 2px solid #ccc; line-height: 1.25; font-size: 105%; font-family: sans-serif'>HTML API: Stop processing HTML when encountering unsupported markup.
It was a design goal of the HTML Processor to abort processing its input document when encountering unsupported markup. Unfortunately there was no test for this and so-far, the HTML Processor has paused, but continued processing in these situations.
In this patch a new test ensures that the HTML Processor stops and refuses to move forward after encountering any unsupported markup. It also ensures that it doesn't report any current tag names since unsupported markup could imply that the read tag name is different than the parsed tag name.
Props dmsnell.
Fixes <a href="https://core.trac.wordpress.org/ticket/59167">#59167</a>.</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunksrcwpincludeshtmlapiclasswphtmlprocessorphp">trunk/src/wp-includes/html-api/class-wp-html-processor.php</a></li>
<li><a href="#trunktestsphpunittestshtmlapiwpHtmlProcessorphp">trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunksrcwpincludeshtmlapiclasswphtmlprocessorphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: trunk/src/wp-includes/html-api/class-wp-html-processor.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- trunk/src/wp-includes/html-api/class-wp-html-processor.php 2023-08-30 10:49:27 UTC (rev 56492)
+++ trunk/src/wp-includes/html-api/class-wp-html-processor.php 2023-08-30 15:37:33 UTC (rev 56493)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -432,6 +432,11 @@
</span><span class="cx" style="display: block; padding: 0 10px"> * @return bool Whether a tag was matched.
</span><span class="cx" style="display: block; padding: 0 10px"> */
</span><span class="cx" style="display: block; padding: 0 10px"> public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ // Refuse to proceed if there was a previous error.
+ if ( null !== $this->last_error ) {
+ return false;
+ }
+
</ins><span class="cx" style="display: block; padding: 0 10px"> if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
</span><span class="cx" style="display: block; padding: 0 10px"> $top_node = $this->state->stack_of_open_elements->current_node();
</span><span class="cx" style="display: block; padding: 0 10px"> if ( $top_node && self::is_void( $top_node->node_name ) ) {
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -744,6 +749,10 @@
</span><span class="cx" style="display: block; padding: 0 10px"> * @return string|null Name of currently matched tag in input HTML, or `null` if none found.
</span><span class="cx" style="display: block; padding: 0 10px"> */
</span><span class="cx" style="display: block; padding: 0 10px"> public function get_tag() {
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ if ( null !== $this->last_error ) {
+ return null;
+ }
+
</ins><span class="cx" style="display: block; padding: 0 10px"> $tag_name = parent::get_tag();
</span><span class="cx" style="display: block; padding: 0 10px">
</span><span class="cx" style="display: block; padding: 0 10px"> switch ( $tag_name ) {
</span></span></pre></div>
<a id="trunktestsphpunittestshtmlapiwpHtmlProcessorphp"></a>
<div class="modfile"><h4 style="background-color: #eee; color: inherit; margin: 1em 0; padding: 1.3em; font-size: 115%">Modified: trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php</h4>
<pre class="diff"><span>
<span class="info" style="display: block; padding: 0 10px; color: #888">--- trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php 2023-08-30 10:49:27 UTC (rev 56492)
+++ trunk/tests/phpunit/tests/html-api/wpHtmlProcessor.php 2023-08-30 15:37:33 UTC (rev 56493)
</span><span class="lines" style="display: block; padding: 0 10px; color: #888">@@ -43,6 +43,40 @@
</span><span class="cx" style="display: block; padding: 0 10px"> }
</span><span class="cx" style="display: block; padding: 0 10px">
</span><span class="cx" style="display: block; padding: 0 10px"> /**
</span><ins style="background-color: #dfd; text-decoration:none; display:block; padding: 0 10px">+ * Once stepping to the end of the document, WP_HTML_Processor::get_tag
+ * should no longer report a tag. It should report `null` because there
+ * is no tag matched or open.
+ *
+ * @ticket 59167
+ *
+ * @covers WP_HTML_Processor::get_tag
+ */
+ public function test_get_tag_is_null_once_document_is_finished() {
+ $p = WP_HTML_Processor::createFragment( '<div class="test">Test</div>' );
+ $p->next_tag();
+ $this->assertSame( 'DIV', $p->get_tag() );
+
+ $this->assertFalse( $p->next_tag() );
+ $this->assertNull( $p->get_tag() );
+ }
+
+ /**
+ * Ensures that if the HTML Processor encounters inputs that it can't properly handle,
+ * that it stops processing the rest of the document. This prevents data corruption.
+ *
+ * @ticket 59167
+ *
+ * @covers WP_HTML_Processor::next_tag
+ */
+ public function test_stops_processing_after_unsupported_elements() {
+ $p = WP_HTML_Processor::createFragment( '<p><x-not-supported></p><p></p>' );
+ $p->next_tag( 'P' );
+ $this->assertFalse( $p->next_tag(), 'Stepped into a tag after encountering X-NOT-SUPPORTED element when it should have aborted.' );
+ $this->assertNull( $p->get_tag(), "Should have aborted processing, but still reported tag {$p->get_tag()} after properly failing to step into tag." );
+ $this->assertFalse( $p->next_tag( 'P' ), 'Stepped into normal P element after X-NOT-SUPPORTED element when it should have aborted.' );
+ }
+
+ /**
</ins><span class="cx" style="display: block; padding: 0 10px"> * Ensures that the HTML Processor maintains its internal state through seek calls.
</span><span class="cx" style="display: block; padding: 0 10px"> *
</span><span class="cx" style="display: block; padding: 0 10px"> * Because the HTML Processor must track a stack of open elements and active formatting
</span></span></pre>
</div>
</div>
</body>
</html>