diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 2f51482eee052..2428f9a88e8f1 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -43,6 +43,22 @@ class WP_HTML_Active_Formatting_Elements { */ private $stack = array(); + /** + * Returns the node at the given 1-offset index in the list of active formatting elements. + * + * Do not use this method; it is meant to be used only by the HTML Processor. + * + * @since 6.7.0 + * + * @access private + * + * @param int $index Number of nodes from the top node to return. + * @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null. + */ + public function at( $nth ) { + return $this->stack[ $nth - 1 ]; + } + /** * Reports if a specific node is in the stack of active formatting elements. * diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index cb913853f0ee9..55fc4e4178f8f 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -622,12 +622,23 @@ public function remove_node( WP_HTML_Token $token ): bool { * see WP_HTML_Open_Elements::walk_up(). * * @since 6.4.0 + * @since 6.7.0 Accepts $below_this_node to start traversal below a given node, if it exists. + * + * @param ?WP_HTML_Token $below_this_node Start traversing below this node, if provided and if the node exists. */ - public function walk_down() { - $count = count( $this->stack ); + public function walk_down( $below_this_node = null ) { + $has_found_node = null === $below_this_node; + $count = count( $this->stack ); for ( $i = 0; $i < $count; $i++ ) { - yield $this->stack[ $i ]; + $node = $this->stack[ $i ]; + + if ( ! $has_found_node ) { + $has_found_node = $node === $below_this_node; + continue; + } + + yield $node; } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cb581fac3988f..e768befb18c6d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2337,7 +2337,9 @@ private function step_in_body(): bool { break 2; case 'A': - $this->run_adoption_agency_algorithm(); + if ( 'any-other-end-tag' === $this->run_adoption_agency_algorithm() ) { + goto in_body_any_other_end_tag; + } $this->state->active_formatting_elements->remove_node( $item ); $this->state->stack_of_open_elements->remove_node( $item ); break 2; @@ -2378,7 +2380,9 @@ private function step_in_body(): bool { if ( $this->state->stack_of_open_elements->has_element_in_scope( 'NOBR' ) ) { // Parse error. - $this->run_adoption_agency_algorithm(); + if ( 'any-other-end-tag' === $this->run_adoption_agency_algorithm() ) { + goto in_body_any_other_end_tag; + } $this->reconstruct_active_formatting_elements(); } @@ -2404,7 +2408,9 @@ private function step_in_body(): bool { case '-STRONG': case '-TT': case '-U': - $this->run_adoption_agency_algorithm(); + if ( 'any-other-end-tag' === $this->run_adoption_agency_algorithm() ) { + goto in_body_any_other_end_tag; + } return true; /* @@ -2740,6 +2746,7 @@ private function step_in_body(): bool { /* * > Any other end tag */ + in_body_any_other_end_tag: /* * Find the corresponding tag opener in the stack of open elements, if @@ -5356,6 +5363,7 @@ private function get_adjusted_current_node(): ?WP_HTML_Token { * > been explicitly closed. * * @since 6.4.0 + * @since 6.7.0 Added additional support. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -5364,34 +5372,89 @@ private function get_adjusted_current_node(): ?WP_HTML_Token { * @return bool Whether any formatting elements needed to be reconstructed. */ private function reconstruct_active_formatting_elements(): bool { + $count = $this->state->active_formatting_elements->count(); /* - * > If there are no entries in the list of active formatting elements, then there is nothing - * > to reconstruct; stop this algorithm. + * > 1. If there are no entries in the list of active formatting elements, + * > then there is nothing to reconstruct; stop this algorithm. */ - if ( 0 === $this->state->active_formatting_elements->count() ) { + if ( 0 === $count ) { return false; } - $last_entry = $this->state->active_formatting_elements->current_node(); + $currently_at = $count; + $last_entry = $this->state->active_formatting_elements->at( $currently_at ); + /* + * > 2. If the last (most recently added) entry in the list of active formatting + * > elements is a marker, or if it is an element that is in the stack of open + * > elements, then there is nothing to reconstruct; stop this algorithm. + */ if ( - - /* - * > If the last (most recently added) entry in the list of active formatting elements is a marker; - * > stop this algorithm. - */ 'marker' === $last_entry->node_name || - - /* - * > If the last (most recently added) entry in the list of active formatting elements is an - * > element that is in the stack of open elements, then there is nothing to reconstruct; - * > stop this algorithm. - */ $this->state->stack_of_open_elements->contains_node( $last_entry ) ) { return false; } - $this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); + /* + * > 3. Let entry be the last (most recently added) element + * > in the list of active formatting elements. + */ + $entry = $last_entry; + + /* + * > 4. Rewind: If there are no entries before entry in the list of active + * > formatting elements, then jump to the step labeled create. + */ + rewind: + if ( 1 === $currently_at ) { + goto create; + } + + /* + * > 5. Let entry be the entry one earlier than entry + * > in the list of active formatting elements. + */ + $entry = $this->state->active_formatting_elements->at( --$currently_at ); + + /* + * > 6. If entry is neither a marker nor an element that is also in + * > the stack of open elements, go to the step labeled rewind. + */ + if ( + 'marker' !== $entry->node_name && + ! $this->state->stack_of_open_elements->contains_node( $entry ) + ) { + goto rewind; + } + + /* + * > 7. Advance: Let entry be the element one later than entry + * > in the list of active formatting elements. + */ + advance: + $entry = $this->state->active_formatting_elements->at( ++$currently_at ); + + /* + * > 8. Create: Insert an HTML element for the token for which the + * > element entry was created, to obtain new element. + */ + create: + $this->insert_html_element( $entry ); + + /* + * > 9. Replace the entry for entry in the list with an entry for new element. + * > This doesn't need to happen here since no DOM is being created. + */ + + /* + * > 10. If the entry for new element in the list of active formatting elements + * > is not the last entry in the list, return to the step labeled advance. + */ + if ( $count !== $currently_at ) { + goto advance; + } + + return true; } /** @@ -5595,32 +5658,31 @@ private function reset_insertion_mode_appropriately(): void { * * @see https://html.spec.whatwg.org/#adoption-agency-algorithm */ - private function run_adoption_agency_algorithm(): void { + private function run_adoption_agency_algorithm(): ?string { $budget = 1000; $subject = $this->get_tag(); $current_node = $this->state->stack_of_open_elements->current_node(); + /* + * > 2. If the current node is an HTML element whose tag name is subject, + * > and the current node is not in the list of active formatting elements, + * > then pop the current node off the stack of open elements and return. + */ if ( - // > If the current node is an HTML element whose tag name is subject - $current_node && $subject === $current_node->node_name && - // > the current node is not in the list of active formatting elements + 'html' === $current_node->namespace && + $subject === $current_node->node_name && ! $this->state->active_formatting_elements->contains_node( $current_node ) ) { $this->state->stack_of_open_elements->pop(); - return; + return null; } - $outer_loop_counter = 0; - while ( $budget-- > 0 ) { - if ( $outer_loop_counter++ >= 8 ) { - return; - } - + for ( $outer_loop_counter = 0; $outer_loop_counter < 8; ++$outer_loop_counter ) { /* - * > Let formatting element be the last element in the list of active formatting elements that: - * > - is between the end of the list and the last marker in the list, - * > if any, or the start of the list otherwise, - * > - and has the tag name subject. + * > 3. Let formatting element be the last element in the list of active formatting elements that: + * > - is between the end of the list and the last marker in the list, + * > if any, or the start of the list otherwise, + * > - and has the tag name subject. */ $formatting_element = null; foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { @@ -5634,38 +5696,41 @@ private function run_adoption_agency_algorithm(): void { } } - // > If there is no such element, then return and instead act as described in the "any other end tag" entry above. + /* + * > If there is no such element, then return and instead act as + * > described in the "any other end tag" entry above. + */ if ( null === $formatting_element ) { - $this->bail( 'Cannot run adoption agency when "any other end tag" is required.' ); + return 'any-other-end-tag'; } - // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. + /* + * > 4. If formatting element is not in the stack of open elements, then + * > this is a parse error; remove the element from the list, and return. + */ if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) { $this->state->active_formatting_elements->remove_node( $formatting_element ); - return; + return null; } - // > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return. + /* + * > 5. If formatting element is in the stack of open elements, but the element + * > is not in scope, then this is a parse error; return. + */ if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) { - return; + return null; } /* - * > Let furthest block be the topmost node in the stack of open elements that is lower in the stack - * > than formatting element, and is an element in the special category. There might not be one. + * > 6. If formatting element is not the current node, this is a parse error. (But do not return.) */ - $is_above_formatting_element = true; - $furthest_block = null; - foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) { - if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) { - continue; - } - - if ( $is_above_formatting_element ) { - $is_above_formatting_element = false; - continue; - } + /* + * > 7. Let furthest block be the topmost node in the stack of open elements that is lower in the stack + * > than formatting element, and is an element in the special category. There might not be one. + */ + $furthest_block = null; + foreach ( $this->state->stack_of_open_elements->walk_down( $formatting_element ) as $item ) { if ( self::is_special( $item ) ) { $furthest_block = $item; break; @@ -5673,25 +5738,153 @@ private function run_adoption_agency_algorithm(): void { } /* - * > If there is no furthest block, then the UA must first pop all the nodes from the bottom of the - * > stack of open elements, from the current node up to and including formatting element, then - * > remove formatting element from the list of active formatting elements, and finally return. + * > 8. If there is no furthest block, then the UA must first pop all the nodes from the bottom of + * > the stack of open elements, from the current node up to and including formatting element, + * > then remove formatting element from the list of active formatting elements, and finally return. */ if ( null === $furthest_block ) { foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { $this->state->stack_of_open_elements->pop(); - if ( $formatting_element->bookmark_name === $item->bookmark_name ) { + if ( $formatting_element === $item ) { $this->state->active_formatting_elements->remove_node( $formatting_element ); - return; + return null; } } } - $this->bail( 'Cannot extract common ancestor in adoption agency algorithm.' ); - } + /* + * > 9. Let common ancestor be the element immediately above + * > formatting element in the stack of open elements. + */ + $common_ancestor = null; + foreach ( $this->state->stack_of_open_elements->walk_up( $formatting_element ) as $item ) { + $common_ancestor = $item; + break; + } - $this->bail( 'Cannot run adoption agency when looping required.' ); + /* + * > 10. Let a bookmark note the position of formatting element in the list of active + * > formatting elements relative to the elements on either side of it in the list. + */ + $formatting_element_index = 0; + foreach ( $this->state->active_formatting_elements->walk_down() as $item ) { + if ( $formatting_element === $item ) { + break; + } + + ++$formatting_element_index; + } + + /* + * > 11. Let node and last node be furthest block. + */ + $node = $furthest_block; + $last_node = $furthest_block; + + $inner_loop_counter = 0; + while ( $budget-- > 0 ) { + /* + * > 1. Increment innerLoopCounter by 1. + */ + ++$inner_loop_counter; + + /* + * > 2. Let node be the element immediately above node in the stack of open elements, + * > or if node is no longer in the stack of open elements (e.g. because it got + * > removed by this algorithm), the element that was immediately above node in + * > the stack of open elements before node was removed. + */ + if ( $this->state->stack_of_open_elements->contains_node( $node ) ) { + foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { + $node = $item; + break; + } + } else { + $this->bail( 'Cannot adjust node pointer above removed node.' ); + } + + /* + * > 3. If node is formattingElement, the break. + */ + if ( $formatting_element === $node ) { + break; + } + + /* + * > 4. If innerLoopCounter is greater than 3 and node is in the list of active formatting + * > elements, then remove node from the list of active formatting elements. + */ + if ( $inner_loop_counter > 3 && $this->state->active_formatting_elements->contains_node( $node ) ) { + $this->state->active_formatting_elements->remove_node( $node ); + } + + /* + * > 5. If node is not in the list of active formatting elements, then remove node from + * > the stack of open elements and continue. + */ + if ( ! $this->state->active_formatting_elements->contains_node( $node ) ) { + $this->state->stack_of_open_elements->remove_node( $node ); + continue; + } + + /* + * > 6. Create an element for the token for which the element node was created, + * > in the HTML namespace, with common ancestor as the intended parent; + * > replace the entry for node in the list of active formatting elements + * > with an entry for the new element, replace the entry for node in the + * > stack of open elements with an entry for the new element, and let node + * > be the new element. + */ + $this->bail( 'Cannot create and reference new element for which no token exists.' ); + + /* + * > 7. If last node is furthestBlock, then move the aforementioned bookmark to + * > be immediately after the new node in the list of active formatting elements. + */ + + /* + * > 8. Append lastNode to node. + */ + + /* + * > 9. Set lastNode to node. + */ + $last_node = $node; + } + + /* + * > 14. Insert whatever last node ended up being in the previous step at the appropriate + * > place for inserting a node, but using common ancestor as the override target. + */ + $this->bail( 'Cannot create and reference new element for which no token exists.' ); + + /* + * > 15. Create an element for the token for which formattingElement was created, + * > in the HTML namespace, with furthestBlock as the intended parent. + */ + + /* + * > 16. Take all of the child nodes of furthestBlock and append them to the element + * > created in the last step. + */ + + /* + * > 17. Append that new element to furthestBlock. + */ + + /* + * > 18. Remove formattingElement from the list of active formatting elements, + * > and insert the new element into the list of active formatting elements + * > at the position of the aforementioned bookmark. + */ + + /* + * > 19. Remove formattingElement from the stack of open elements, and insert the + * > new element into the stack of open elements immediately below the position + * > of furthestBlock in that stack. + */ + } } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 17dd2ff7fbd68..a9c823fd2132e 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -112,18 +112,23 @@ public function test_clear_to_navigate_after_seeking() { } /** - * Ensures that support is added for reconstructing active formatting elements - * before the HTML Processor handles situations with unclosed formats requiring it. + * Ensures that support is added for reconstructing active formatting elements. * * @ticket 58517 * * @covers WP_HTML_Processor::reconstruct_active_formatting_elements */ - public function test_fails_to_reconstruct_formatting_elements() { - $processor = WP_HTML_Processor::create_fragment( '

One

Two

Three

Four' ); + public function test_reconstructs_formatting_elements() { + $processor = WP_HTML_Processor::create_fragment( '

One

Two

Three

Four' ); $this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find first EM.' ); - $this->assertFalse( $processor->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' ); + $this->assertSame( array( 'HTML', 'BODY', 'P', 'EM' ), $processor->get_breadcrumbs(), 'Found incorrect breadcrumbs for first EM.' ); + $this->assertTrue( $processor->next_tag( 'SPAN' ), 'Could not find test span.' ); + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'EM', 'EM', 'SPAN' ), + $processor->get_breadcrumbs(), + 'Found incorrect breadcrumbs for test SPAN; should have created two EMs.' + ); } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 911fa8b910b37..072a1d08818ee 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -165,45 +165,55 @@ public static function data_single_tag_of_supported_elements() { } /** - * @ticket 58517 - * - * @dataProvider data_unsupported_markup + * Ensures that formats inside unclosed A elements are reconstructed. * - * @param string $html HTML containing unsupported markup. + * @ticket 61576 */ - public function test_fails_when_encountering_unsupported_markup( $html, $description ) { - $processor = WP_HTML_Processor::create_fragment( $html ); - - while ( $processor->next_token() && null === $processor->get_attribute( 'supported' ) ) { - continue; - } + public function test_reconstructs_formatting_from_unclosed_a_elements() { + $processor = WP_HTML_Processor::create_fragment( 'Click Here' ); - $this->assertNull( - $processor->get_last_error(), - 'Bailed on unsupported input before finding supported checkpoint: check test code.' + $processor->next_tag( 'STRONG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'STRONG' ), + $processor->get_breadcrumbs(), + 'Failed to construct starting breadcrumbs properly.' ); - $this->assertTrue( $processor->get_attribute( 'supported' ), 'Did not find required supported element.' ); - $processor->next_token(); - $this->assertNotNull( $processor->get_last_error(), "Didn't properly reject unsupported markup: {$description}" ); + $processor->next_tag( 'BIG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'STRONG', 'A', 'BIG' ), + $processor->get_breadcrumbs(), + 'Failed to reconstruct the active formatting elements after an unclosed A element.' + ); } /** - * Data provider. + * Ensures that unclosed A elements are reconstructed. * - * @return array[] + * @ticket 61576 */ - public static function data_unsupported_markup() { - return array( - 'A with formatting following unclosed A' => array( - 'Click Here', - 'Unclosed formatting requires complicated reconstruction.', - ), + public function test_reconstructs_unclosed_a_elements() { + $processor = WP_HTML_Processor::create_fragment( '

' ); - 'A after unclosed A inside DIV' => array( - '
', - 'A is a formatting element, which requires more complicated reconstruction.', - ), + $processor->next_tag( 'DIV' ); + $this->assertSame( + array( 'HTML', 'BODY', 'DIV' ), + $processor->get_breadcrumbs(), + 'Failed to construct breadcrumbs properly - the DIV should have closed the A element.' + ); + + // When the DIV re-opens, it reconstructs an unclosed A, then the A in the text is a second A. + $processor->next_tag( 'A' ); + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', 'A' ), + 'Failed to create proper breadcrumbs for recreated A element.' + ); + + // This is the one that's second in the raw text. + $processor->next_tag( 'A' ); + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', 'A' ), + 'Failed to create proper breadcrumbs for explicit A element - this A should have closed the reconstructed A.' ); }