diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 2f51482eee052..c7e38e6da00c5 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -43,6 +43,18 @@ class WP_HTML_Active_Formatting_Elements { */ private $stack = array(); + /** + * Returns the node at the given 1-offset index in the list of active formatting elements. + * + * @since 7.0.0 + * + * @param int $index Number of nodes from the top node to return. + * @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null. + */ + public function at( $nth ) { + return $this->stack[ $nth - 1 ]; + } + /** * Reports if a specific node is in the stack of active formatting elements. * @@ -110,8 +122,9 @@ public function insert_marker(): void { * @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements * * @param WP_HTML_Token $token Push this node onto the stack. + * @return bool Whether a node was pushed onto the stack of active formatting elements. */ - public function push( WP_HTML_Token $token ) { + public function push( WP_HTML_Token $token ): bool { /* * > If there are already three elements in the list of active formatting elements after the last marker, * > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and @@ -120,11 +133,32 @@ public function push( WP_HTML_Token $token ) { * > created by the parser; two elements have the same attributes if all their parsed attributes can be * > paired such that the two attributes in each pair have identical names, namespaces, and values * > (the order of the attributes does not matter). - * - * @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack. */ + + if ( 'marker' !== $token->node_name ) { + $existing_count = 0; + foreach ( $this->walk_up() as $item ) { + if ( 'marker' === $item->node_name ) { + break; + } + + if ( + $item->node_name === $token->node_name && + $item->namespace === $token->namespace + // @todo Compare attributes. For now, bail if there are three matching tag names + namespaces. + ) { + ++$existing_count; + if ( $existing_count >= 3 ) { + // @todo Implement removing the earliest element and moving forward. + return false; + } + } + } + } + // > Add element to the list of active formatting elements. $this->stack[] = $token; + return true; } /** diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 55f955f2c1a9a..991923ba870e4 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -256,6 +256,18 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $context_node = null; + /** + * If a formatting element has been reconstructed, this will hold + * the parsed attributes from the original format, once requested. + * + * These attributes are not modifiable. + * + * @since 7.0.0 + * + * @var array + */ + protected $actively_reconstructed_formatting_attributes = array(); + /* * Public Interface Functions */ @@ -2766,7 +2778,10 @@ private function step_in_body(): bool { $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); - $this->state->active_formatting_elements->push( $this->state->current_token ); + if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) { + $this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' ); + } + $this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes; return true; /* @@ -2787,7 +2802,10 @@ private function step_in_body(): bool { case '+U': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); - $this->state->active_formatting_elements->push( $this->state->current_token ); + if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) { + $this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' ); + } + $this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes; return true; /* @@ -2803,7 +2821,10 @@ private function step_in_body(): bool { } $this->insert_html_element( $this->state->current_token ); - $this->state->active_formatting_elements->push( $this->state->current_token ); + if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) { + $this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' ); + } + $this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes; return true; /* @@ -5284,7 +5305,46 @@ public function get_token_type(): ?string { * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`. */ public function get_attribute( $name ) { - return $this->is_virtual() ? null : parent::get_attribute( $name ); + if ( $this->is_virtual() ) { + $virtual_attributes = $this->actively_reconstructed_formatting_attributes[ $this->current_element->token->bookmark_name ?? '' ] ?? null; + if ( null === $virtual_attributes ) { + return null; + } + + $current_attributes = $this->attributes; + $current_updates = $this->lexical_updates; + $this->lexical_updates = array(); + $this->attributes = $virtual_attributes; + $parser_state = $this->parser_state; + $this->parser_state = WP_HTML_Tag_Processor::STATE_MATCHED_TAG; + $attribute_names = parent::get_attribute( $name ); + $this->attributes = $current_attributes; + $this->parser_state = $parser_state; + $this->lexical_updates = $current_updates; + + return $attribute_names; + } + + return parent::get_attribute( $name ); + } + + /** + * Returns the adjusted attribute name for a given attribute, taking into + * account the current parsing context, whether HTML, SVG, or MathML. + * + * @since 7.0.0 Subclassed for the HTML Processor. + * + * @param string $attribute_name Which attribute name to adjust. + * + * @return string|null The qualified attribute name or null if not on matched tag. + */ + public function get_qualified_attribute_name( $attribute_name ): ?string { + if ( $this->is_virtual() ) { + $namespace = $this->current_element->token->namespace; + return self::lookup_qualified_attribute_name( $namespace, $attribute_name ); + } + + return parent::get_qualified_attribute_name( $attribute_name ); } /** @@ -5362,7 +5422,24 @@ public function remove_attribute( $name ): bool { * @return array|null List of attribute names, or `null` when no tag opener is matched. */ public function get_attribute_names_with_prefix( $prefix ): ?array { - return $this->is_virtual() ? null : parent::get_attribute_names_with_prefix( $prefix ); + if ( $this->is_virtual() ) { + $virtual_attributes = $this->actively_reconstructed_formatting_attributes[ $this->current_element->token->bookmark_name ?? '' ] ?? null; + if ( null === $virtual_attributes ) { + return null; + } + + $current_attributes = $this->attributes; + $this->attributes = $virtual_attributes; + $parser_state = $this->parser_state; + $this->parser_state = WP_HTML_Tag_Processor::STATE_MATCHED_TAG; + $attribute_names = parent::get_attribute_names_with_prefix( $prefix ); + $this->attributes = $current_attributes; + $this->parser_state = $parser_state; + + return $attribute_names; + } + + return parent::get_attribute_names_with_prefix( $prefix ); } /** @@ -5865,6 +5942,7 @@ private function get_adjusted_current_node(): ?WP_HTML_Token { * > been explicitly closed. * * @since 6.4.0 + * @since 7.0.0 Added additional support. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -5873,34 +5951,89 @@ private function get_adjusted_current_node(): ?WP_HTML_Token { * @return bool Whether any formatting elements needed to be reconstructed. */ private function reconstruct_active_formatting_elements(): bool { + $count = $this->state->active_formatting_elements->count(); /* - * > If there are no entries in the list of active formatting elements, then there is nothing - * > to reconstruct; stop this algorithm. + * > 1. If there are no entries in the list of active formatting elements, + * > then there is nothing to reconstruct; stop this algorithm. */ - if ( 0 === $this->state->active_formatting_elements->count() ) { + if ( 0 === $count ) { return false; } - $last_entry = $this->state->active_formatting_elements->current_node(); + $currently_at = $count; + $last_entry = $this->state->active_formatting_elements->at( $currently_at ); + /* + * > 2. If the last (most recently added) entry in the list of active formatting + * > elements is a marker, or if it is an element that is in the stack of open + * > elements, then there is nothing to reconstruct; stop this algorithm. + */ if ( - - /* - * > If the last (most recently added) entry in the list of active formatting elements is a marker; - * > stop this algorithm. - */ 'marker' === $last_entry->node_name || - - /* - * > If the last (most recently added) entry in the list of active formatting elements is an - * > element that is in the stack of open elements, then there is nothing to reconstruct; - * > stop this algorithm. - */ $this->state->stack_of_open_elements->contains_node( $last_entry ) ) { return false; } - $this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); + /* + * > 3. Let entry be the last (most recently added) element + * > in the list of active formatting elements. + */ + $entry = $last_entry; + + /* + * > 4. Rewind: If there are no entries before entry in the list of active + * > formatting elements, then jump to the step labeled create. + */ + rewind: + if ( 1 === $currently_at ) { + goto create; + } + + /* + * > 5. Let entry be the entry one earlier than entry + * > in the list of active formatting elements. + */ + $entry = $this->state->active_formatting_elements->at( --$currently_at ); + + /* + * > 6. If entry is neither a marker nor an element that is also in + * > the stack of open elements, go to the step labeled rewind. + */ + if ( + 'marker' !== $entry->node_name && + ! $this->state->stack_of_open_elements->contains_node( $entry ) + ) { + goto rewind; + } + + /* + * > 7. Advance: Let entry be the element one later than entry + * > in the list of active formatting elements. + */ + advance: + $entry = $this->state->active_formatting_elements->at( ++$currently_at ); + + /* + * > 8. Create: Insert an HTML element for the token for which the + * > element entry was created, to obtain new element. + */ + create: + $this->insert_html_element( $entry ); + + /* + * > 9. Replace the entry for _entry_ in the list with an entry for new element. + * > This doesn't need to happen here since no DOM is being created. + */ + + /* + * > 10. If the entry for new element in the list of active formatting elements + * > is not the last entry in the list, return to the step labeled advance. + */ + if ( $count !== $currently_at ) { + goto advance; + } + + return true; } /** diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 3cdbd91480ca0..b71aa841a8f6d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -708,7 +708,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var WP_HTML_Attribute_Token[] */ - private $attributes = array(); + protected $attributes = array(); /** * Tracks spans of duplicate attributes on a given tag, used for removing @@ -3051,23 +3051,37 @@ public function get_qualified_tag_name(): ?string { * * @since 6.7.0 * - * @param string $attribute_name Which attribute to adjust. + * @param string $attribute_name Which attribute name to adjust. * - * @return string|null + * @return string|null The qualified attribute name or null if not on matched tag. */ public function get_qualified_attribute_name( $attribute_name ): ?string { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { return null; } + $namespace = $this->get_namespace(); + return self::lookup_qualified_attribute_name( $namespace, $attribute_name ); + } - $namespace = $this->get_namespace(); + /** + * Returns the adjusted attribute name for a given attribute, taking into + * account the provided namespace. + * + * @since 7.0.0 + * + * @param string $ns The namespace to use: 'html', 'svg', or 'math'. + * @param string $attribute_name Which attribute to adjust. + * + * @return string The qualified attribute name. + */ + final protected static function lookup_qualified_attribute_name( string $ns, string $attribute_name ): string { $lower_name = strtolower( $attribute_name ); - if ( 'math' === $namespace && 'definitionurl' === $lower_name ) { + if ( 'math' === $ns && 'definitionurl' === $lower_name ) { return 'definitionURL'; } - if ( 'svg' === $this->get_namespace() ) { + if ( 'svg' === $ns ) { switch ( $lower_name ) { case 'attributename': return 'attributeName'; @@ -3245,7 +3259,7 @@ public function get_qualified_attribute_name( $attribute_name ): ?string { } } - if ( 'html' !== $namespace ) { + if ( 'html' !== $ns ) { switch ( $lower_name ) { case 'xlink:actuate': return 'xlink actuate'; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 13e0728ca912a..ce6230c97a713 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -185,18 +185,23 @@ public function test_clear_to_navigate_after_seeking() { } /** - * Ensures that support is added for reconstructing active formatting elements - * before the HTML Processor handles situations with unclosed formats requiring it. + * Ensures that support is added for reconstructing active formatting elements. * * @ticket 58517 * * @covers WP_HTML_Processor::reconstruct_active_formatting_elements */ - public function test_fails_to_reconstruct_formatting_elements() { - $processor = WP_HTML_Processor::create_fragment( '

One

Two

Three

Four' ); + public function test_reconstructs_formatting_elements() { + $processor = WP_HTML_Processor::create_fragment( '

One

Two

Three

Four' ); $this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find first EM.' ); - $this->assertFalse( $processor->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' ); + $this->assertSame( array( 'HTML', 'BODY', 'P', 'EM' ), $processor->get_breadcrumbs(), 'Found incorrect breadcrumbs for first EM.' ); + $this->assertTrue( $processor->next_tag( 'SPAN' ), 'Could not find test span.' ); + $this->assertSame( + array( 'HTML', 'BODY', 'P', 'EM', 'EM', 'SPAN' ), + $processor->get_breadcrumbs(), + 'Found incorrect breadcrumbs for test SPAN; should have created two EMs.' + ); } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index 911fa8b910b37..d0e3582b35d13 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -165,46 +165,58 @@ public static function data_single_tag_of_supported_elements() { } /** - * @ticket 58517 - * - * @dataProvider data_unsupported_markup + * Ensures that formats inside unclosed A elements are reconstructed. * - * @param string $html HTML containing unsupported markup. + * @ticket 61576 */ - public function test_fails_when_encountering_unsupported_markup( $html, $description ) { - $processor = WP_HTML_Processor::create_fragment( $html ); - - while ( $processor->next_token() && null === $processor->get_attribute( 'supported' ) ) { - continue; - } + public function test_reconstructs_formatting_from_unclosed_a_elements() { + $processor = WP_HTML_Processor::create_fragment( 'Click Here' ); - $this->assertNull( - $processor->get_last_error(), - 'Bailed on unsupported input before finding supported checkpoint: check test code.' + $processor->next_tag( 'STRONG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'A', 'STRONG' ), + $processor->get_breadcrumbs(), + 'Failed to construct starting breadcrumbs properly.' ); - $this->assertTrue( $processor->get_attribute( 'supported' ), 'Did not find required supported element.' ); - $processor->next_token(); - $this->assertNotNull( $processor->get_last_error(), "Didn't properly reject unsupported markup: {$description}" ); + $processor->next_tag( 'BIG' ); + $this->assertSame( + array( 'HTML', 'BODY', 'STRONG', 'A', 'BIG' ), + $processor->get_breadcrumbs(), + 'Failed to reconstruct the active formatting elements after an unclosed A element.' + ); } /** - * Data provider. + * Ensures that unclosed A elements are reconstructed. * - * @return array[] + * @ticket 61576 */ - public static function data_unsupported_markup() { - return array( - 'A with formatting following unclosed A' => array( - 'Click Here', - 'Unclosed formatting requires complicated reconstruction.', - ), + public function test_reconstructs_unclosed_a_elements() { + $processor = WP_HTML_Processor::create_fragment( '

Found me!' ); - 'A after unclosed A inside DIV' => array( - '
', - 'A is a formatting element, which requires more complicated reconstruction.', - ), + // First, there's an A tag inside the DIV. + $this->assertTrue( $processor->next_tag( 'A' ) ); + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', 'A' ), + $processor->get_breadcrumbs() ); + + /* + * There's a second A tag containing the text outside the DIV. + * When the DIV closes, the unclosed A is reconstructed from inside the DIV + * to contain the following text. + */ + $this->assertTrue( $processor->next_tag( 'A' ) ); + $this->assertSame( + array( 'HTML', 'BODY', 'A' ), + $processor->get_breadcrumbs() + ); + + // Finally, the trailing text is inside the A. + $processor->next_token(); + $this->assertSame( '#text', $processor->get_token_type() ); + $this->assertSame( 'Found me!', $processor->get_modifiable_text() ); } /** diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index a03a9ab806a93..5c3f99649324d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -152,6 +152,9 @@ private static function should_skip_test( ?string $test_context_element, string /** * Generates the tree-like structure represented in the Html5lib tests. * + * @throws WP_HTML_Unsupported_Exception Raises unsupported exceptions for test reporting. + * @throws Error For unexpected "impossible" cases. + * * @param string|null $fragment_context Context element in which to parse HTML, such as BODY or SVG. * @param string $html Given test HTML. * @return string|null Tree structure of parsed HTML, if supported, else null. @@ -160,6 +163,7 @@ private static function build_tree_representation( ?string $fragment_context, st $processor = $fragment_context ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" ) : WP_HTML_Processor::create_full_parser( $html ); + if ( null === $processor ) { throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); } @@ -264,6 +268,7 @@ static function ( $a, $b ) { foreach ( $sorted_attributes as $attribute_name => $display_name ) { $val = $processor->get_attribute( $attribute_name ); + /* * Attributes with no value are `true` with the HTML API, * We map use the empty string value in the tree structure.