From 0c6657913e9cf78f0d03a7449d0a74b91226ad8b Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 28 Feb 2024 20:19:03 -0700 Subject: [PATCH 1/2] HTML API: Assert text nodes trigger reconstruction. --- ...portRequiredActiveFormatReconstruction.php | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 tests/phpunit/tests/html-api/wpHtmlSupportRequiredActiveFormatReconstruction.php diff --git a/tests/phpunit/tests/html-api/wpHtmlSupportRequiredActiveFormatReconstruction.php b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredActiveFormatReconstruction.php new file mode 100644 index 0000000000000..93f0ef611887e --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlSupportRequiredActiveFormatReconstruction.php @@ -0,0 +1,71 @@ +One

Two' ); + + // The SOURCE element doesn't trigger reconstruction, and this test asserts that. + $this->assertTrue( + $processor->next_tag( 'SOURCE' ), + 'Should have found the first custom element.' + ); + + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'SOURCE' ), + $processor->get_breadcrumbs(), + 'Should have closed formatting element at first P element.' + ); + + /* + * There are two ways this test could fail. One is to appropriately find the + * second text node but fail to reconstruct the implicitly-closed B element. + * The other way is to fail to abort when encountering the second text node + * because the kind of active format reconstruction isn't supported. + * + * At the time of writing this test, the HTML Processor bails whenever it + * needs to reconstruct active formats, unless there are no active formats. + * To ensure that this test properly works once that support is expanded, + * it's written to verify both circumstances. Once support is added, this + * can be simplified to only contain the first clause of the conditional. + * + * The use of the SOURCE element is important here because most elements + * will also trigger reconstruction, which would conflate the test results + * with the text node triggering reconstruction. The SOURCE element won't + * do this, making it neutral. Therefore, the implicitly-closed B element + * will only be reconstructed by the text node. + */ + + if ( $processor->next_tag( 'SOURCE' ) ) { + echo "\e[32mSOURCE\e[m\n"; + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'B', 'SOURCE' ), + $processor->get_breadcrumbs(), + 'Should have reconstructed the implicitly-closed B element.' + ); + } else { + $this->assertSame( + WP_HTML_Processor::ERROR_UNSUPPORTED, + $processor->get_last_error(), + 'Should have aborted for incomplete active format reconstruction when encountering the second text node.' + ); + } + } +} From bbf6e741b1e3ebb90351d3588bba669babe012a0 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 28 Feb 2024 20:38:15 -0700 Subject: [PATCH 2/2] Abort when reaching text nodes if reconstruction would create elements. --- .../html-api/class-wp-html-processor.php | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 51b6a5679eb76..c74291a53004e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -430,8 +430,11 @@ public function next_tag( $query = null ) { public function next_token() { $found_a_token = parent::next_token(); - if ( '#tag' === $this->get_token_type() ) { - $this->step( self::PROCESS_CURRENT_NODE ); + switch ( $this->get_token_type() ) { + case '#tag': + case '#text': + $this->step( self::PROCESS_CURRENT_NODE ); + break; } return $found_a_token; @@ -536,6 +539,11 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { if ( self::PROCESS_NEXT_NODE === $node_to_process ) { while ( parent::next_token() && '#tag' !== $this->get_token_type() ) { + if ( '#text' === $this->get_token_type() && $this->has_active_formats_needing_reconstruction() ) { + $this->last_error = self::ERROR_UNSUPPORTED; + return false; + } + continue; } } @@ -1498,6 +1506,44 @@ private function reconstruct_active_formatting_elements() { throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); } + /** + * Indicates if there are active formatting elements needing reconstruction. + * + * @since 6.5.0 + * + * @return bool False if reconstruction would definitely not create any new elements. + */ + private function has_active_formats_needing_reconstruction() { + /* + * > If there are no entries in the list of active formatting elements, then there is nothing + * > to reconstruct; stop this algorithm. + */ + if ( 0 === $this->state->active_formatting_elements->count() ) { + return false; + } + + $last_entry = $this->state->active_formatting_elements->current_node(); + if ( + + /* + * > If the last (most recently added) entry in the list of active formatting elements is a marker; + * > stop this algorithm. + */ + 'marker' === $last_entry->node_name || + + /* + * > If the last (most recently added) entry in the list of active formatting elements is an + * > element that is in the stack of open elements, then there is nothing to reconstruct; + * > stop this algorithm. + */ + $this->state->stack_of_open_elements->contains_node( $last_entry ) + ) { + return false; + } + + return true; + } + /** * Runs the adoption agency algorithm. *