Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
2b2d6fe
HTML API: Simplify breadcrumb accounting.
dmsnell Jul 6, 2024
6962fa2
HTML API: Expand Unsupported class and make it available for debugging.
dmsnell Jul 6, 2024
ab1096f
HTML API: Implement "reconstruct the active formatting elements" algo…
dmsnell Jul 6, 2024
ad82d3a
Merge branch 'trunk' into html-api/improve-active-element-reconstruction
dmsnell Aug 8, 2024
374cba3
Continue iterating
dmsnell Aug 9, 2024
9bf12f4
Merge branch 'trunk' into html-api/improve-active-element-reconstruction
sirreal Aug 29, 2024
5e4bab9
Merge branch 'trunk' into html-api/improve-active-element-reconstruction
dmsnell Sep 3, 2024
9856fce
Merge branch 'trunk' into html-api/improve-active-element-reconstruction
dmsnell Sep 4, 2024
0c2f0c3
WIP: Explore reading attributes from recreated formatting elements.
dmsnell Sep 6, 2024
4eba4a0
Merge branch 'trunk' into html-api/improve-active-element-reconstruction
sirreal Jan 3, 2025
8b00f86
Update 6.7.0 version to 6.8.0
sirreal Jan 3, 2025
026c3c2
Improve actively_reconstructed_formatting_attributes type annotation
sirreal Jan 3, 2025
8e5a423
Ensure that get_qualified_attribute_name works with virtual tokens
sirreal Jan 3, 2025
d2e5aab
Remove debugging code
sirreal Jan 3, 2025
b4269af
Merge branch 'trunk' into html-api/improve-active-element-reconstruction
sirreal Dec 1, 2025
8cab020
Bump since tags to 7.0.0
sirreal Dec 1, 2025
1c4a5c7
Remove redundant private comments and annotations
sirreal Dec 2, 2025
849dbec
Correct A reconstruction test
sirreal Dec 2, 2025
9ad7278
Remove trailing params comma
sirreal Dec 2, 2025
3e8e06b
Remove another trailing param comma
sirreal Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,18 @@ class WP_HTML_Active_Formatting_Elements {
*/
private $stack = array();

/**
* Returns the node at the given 1-offset index in the list of active formatting elements.
*
* @since 7.0.0
*
* @param int $index Number of nodes from the top node to return.
* @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null.
*/
public function at( $nth ) {
return $this->stack[ $nth - 1 ];
}

/**
* Reports if a specific node is in the stack of active formatting elements.
*
Expand Down Expand Up @@ -110,8 +122,9 @@ public function insert_marker(): void {
* @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements
*
* @param WP_HTML_Token $token Push this node onto the stack.
* @return bool Whether a node was pushed onto the stack of active formatting elements.
*/
public function push( WP_HTML_Token $token ) {
public function push( WP_HTML_Token $token ): bool {
/*
* > If there are already three elements in the list of active formatting elements after the last marker,
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
Expand All @@ -120,11 +133,32 @@ public function push( WP_HTML_Token $token ) {
* > created by the parser; two elements have the same attributes if all their parsed attributes can be
* > paired such that the two attributes in each pair have identical names, namespaces, and values
* > (the order of the attributes does not matter).
*
* @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack.
*/

if ( 'marker' !== $token->node_name ) {
$existing_count = 0;
foreach ( $this->walk_up() as $item ) {
if ( 'marker' === $item->node_name ) {
break;
}

if (
$item->node_name === $token->node_name &&
$item->namespace === $token->namespace
// @todo Compare attributes. For now, bail if there are three matching tag names + namespaces.
) {
++$existing_count;
if ( $existing_count >= 3 ) {
// @todo Implement removing the earliest element and moving forward.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return false;
}
}
}
}

// > Add element to the list of active formatting elements.
$this->stack[] = $token;
return true;
}

/**
Expand Down
175 changes: 154 additions & 21 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,18 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private $context_node = null;

/**
* If a formatting element has been reconstructed, this will hold
* the parsed attributes from the original format, once requested.
*
* These attributes are not modifiable.
*
* @since 7.0.0
*
* @var array
*/
protected $actively_reconstructed_formatting_attributes = array();

/*
* Public Interface Functions
*/
Expand Down Expand Up @@ -2766,7 +2778,10 @@ private function step_in_body(): bool {

$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->active_formatting_elements->push( $this->state->current_token );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
}
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
return true;

/*
Expand All @@ -2787,7 +2802,10 @@ private function step_in_body(): bool {
case '+U':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->active_formatting_elements->push( $this->state->current_token );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
}
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
return true;

/*
Expand All @@ -2803,7 +2821,10 @@ private function step_in_body(): bool {
}

$this->insert_html_element( $this->state->current_token );
$this->state->active_formatting_elements->push( $this->state->current_token );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
}
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
return true;

/*
Expand Down Expand Up @@ -5284,7 +5305,46 @@ public function get_token_type(): ?string {
* @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
*/
public function get_attribute( $name ) {
return $this->is_virtual() ? null : parent::get_attribute( $name );
if ( $this->is_virtual() ) {
$virtual_attributes = $this->actively_reconstructed_formatting_attributes[ $this->current_element->token->bookmark_name ?? '' ] ?? null;
if ( null === $virtual_attributes ) {
return null;
}

$current_attributes = $this->attributes;
$current_updates = $this->lexical_updates;
$this->lexical_updates = array();
$this->attributes = $virtual_attributes;
$parser_state = $this->parser_state;
$this->parser_state = WP_HTML_Tag_Processor::STATE_MATCHED_TAG;
$attribute_names = parent::get_attribute( $name );
$this->attributes = $current_attributes;
$this->parser_state = $parser_state;
$this->lexical_updates = $current_updates;

return $attribute_names;
}

return parent::get_attribute( $name );
}

/**
* Returns the adjusted attribute name for a given attribute, taking into
* account the current parsing context, whether HTML, SVG, or MathML.
*
* @since 7.0.0 Subclassed for the HTML Processor.
*
* @param string $attribute_name Which attribute name to adjust.
*
* @return string|null The qualified attribute name or null if not on matched tag.
*/
public function get_qualified_attribute_name( $attribute_name ): ?string {
if ( $this->is_virtual() ) {
$namespace = $this->current_element->token->namespace;
return self::lookup_qualified_attribute_name( $namespace, $attribute_name );
}

return parent::get_qualified_attribute_name( $attribute_name );
}

/**
Expand Down Expand Up @@ -5362,7 +5422,24 @@ public function remove_attribute( $name ): bool {
* @return array|null List of attribute names, or `null` when no tag opener is matched.
*/
public function get_attribute_names_with_prefix( $prefix ): ?array {
return $this->is_virtual() ? null : parent::get_attribute_names_with_prefix( $prefix );
if ( $this->is_virtual() ) {
$virtual_attributes = $this->actively_reconstructed_formatting_attributes[ $this->current_element->token->bookmark_name ?? '' ] ?? null;
if ( null === $virtual_attributes ) {
return null;
}

$current_attributes = $this->attributes;
$this->attributes = $virtual_attributes;
$parser_state = $this->parser_state;
$this->parser_state = WP_HTML_Tag_Processor::STATE_MATCHED_TAG;
$attribute_names = parent::get_attribute_names_with_prefix( $prefix );
$this->attributes = $current_attributes;
$this->parser_state = $parser_state;

return $attribute_names;
}

return parent::get_attribute_names_with_prefix( $prefix );
}

/**
Expand Down Expand Up @@ -5865,6 +5942,7 @@ private function get_adjusted_current_node(): ?WP_HTML_Token {
* > been explicitly closed.
*
* @since 6.4.0
* @since 7.0.0 Added additional support.
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
Expand All @@ -5873,34 +5951,89 @@ private function get_adjusted_current_node(): ?WP_HTML_Token {
* @return bool Whether any formatting elements needed to be reconstructed.
*/
private function reconstruct_active_formatting_elements(): bool {
$count = $this->state->active_formatting_elements->count();
/*
* > If there are no entries in the list of active formatting elements, then there is nothing
* > to reconstruct; stop this algorithm.
* > 1. If there are no entries in the list of active formatting elements,
* > then there is nothing to reconstruct; stop this algorithm.
*/
if ( 0 === $this->state->active_formatting_elements->count() ) {
if ( 0 === $count ) {
return false;
}

$last_entry = $this->state->active_formatting_elements->current_node();
$currently_at = $count;
$last_entry = $this->state->active_formatting_elements->at( $currently_at );
/*
* > 2. If the last (most recently added) entry in the list of active formatting
* > elements is a marker, or if it is an element that is in the stack of open
* > elements, then there is nothing to reconstruct; stop this algorithm.
*/
if (

/*
* > If the last (most recently added) entry in the list of active formatting elements is a marker;
* > stop this algorithm.
*/
'marker' === $last_entry->node_name ||

/*
* > If the last (most recently added) entry in the list of active formatting elements is an
* > element that is in the stack of open elements, then there is nothing to reconstruct;
* > stop this algorithm.
*/
$this->state->stack_of_open_elements->contains_node( $last_entry )
) {
return false;
}

$this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
/*
* > 3. Let entry be the last (most recently added) element
* > in the list of active formatting elements.
*/
$entry = $last_entry;

/*
* > 4. Rewind: If there are no entries before entry in the list of active
* > formatting elements, then jump to the step labeled create.
*/
rewind:
if ( 1 === $currently_at ) {
goto create;
}

/*
* > 5. Let entry be the entry one earlier than entry
* > in the list of active formatting elements.
*/
$entry = $this->state->active_formatting_elements->at( --$currently_at );

/*
* > 6. If entry is neither a marker nor an element that is also in
* > the stack of open elements, go to the step labeled rewind.
*/
if (
'marker' !== $entry->node_name &&
! $this->state->stack_of_open_elements->contains_node( $entry )
) {
goto rewind;
}

/*
* > 7. Advance: Let entry be the element one later than entry
* > in the list of active formatting elements.
*/
advance:
$entry = $this->state->active_formatting_elements->at( ++$currently_at );

/*
* > 8. Create: Insert an HTML element for the token for which the
* > element entry was created, to obtain new element.
*/
create:
$this->insert_html_element( $entry );

/*
* > 9. Replace the entry for _entry_ in the list with an entry for new element.
* > This doesn't need to happen here since no DOM is being created.
*/

/*
* > 10. If the entry for new element in the list of active formatting elements
* > is not the last entry in the list, return to the step labeled advance.
*/
if ( $count !== $currently_at ) {
goto advance;
}

return true;
}

/**
Expand Down
28 changes: 21 additions & 7 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ class WP_HTML_Tag_Processor {
* @since 6.2.0
* @var WP_HTML_Attribute_Token[]
*/
private $attributes = array();
protected $attributes = array();

/**
* Tracks spans of duplicate attributes on a given tag, used for removing
Expand Down Expand Up @@ -3051,23 +3051,37 @@ public function get_qualified_tag_name(): ?string {
*
* @since 6.7.0
*
* @param string $attribute_name Which attribute to adjust.
* @param string $attribute_name Which attribute name to adjust.
*
* @return string|null
* @return string|null The qualified attribute name or null if not on matched tag.
*/
public function get_qualified_attribute_name( $attribute_name ): ?string {
if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
return null;
}
$namespace = $this->get_namespace();
return self::lookup_qualified_attribute_name( $namespace, $attribute_name );
}

$namespace = $this->get_namespace();
/**
* Returns the adjusted attribute name for a given attribute, taking into
* account the provided namespace.
*
* @since 7.0.0
*
* @param string $ns The namespace to use: 'html', 'svg', or 'math'.
* @param string $attribute_name Which attribute to adjust.
*
* @return string The qualified attribute name.
*/
final protected static function lookup_qualified_attribute_name( string $ns, string $attribute_name ): string {
$lower_name = strtolower( $attribute_name );

if ( 'math' === $namespace && 'definitionurl' === $lower_name ) {
if ( 'math' === $ns && 'definitionurl' === $lower_name ) {
return 'definitionURL';
}

if ( 'svg' === $this->get_namespace() ) {
if ( 'svg' === $ns ) {
switch ( $lower_name ) {
case 'attributename':
return 'attributeName';
Expand Down Expand Up @@ -3245,7 +3259,7 @@ public function get_qualified_attribute_name( $attribute_name ): ?string {
}
}

if ( 'html' !== $namespace ) {
if ( 'html' !== $ns ) {
switch ( $lower_name ) {
case 'xlink:actuate':
return 'xlink actuate';
Expand Down
15 changes: 10 additions & 5 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -185,18 +185,23 @@ public function test_clear_to_navigate_after_seeking() {
}

/**
* Ensures that support is added for reconstructing active formatting elements
* before the HTML Processor handles situations with unclosed formats requiring it.
* Ensures that support is added for reconstructing active formatting elements.
*
* @ticket 58517
*
* @covers WP_HTML_Processor::reconstruct_active_formatting_elements
*/
public function test_fails_to_reconstruct_formatting_elements() {
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em>Two<p><em>Three<p><em>Four' );
public function test_reconstructs_formatting_elements() {
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em><span>Two<p><em>Three<p><em>Four' );

$this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find first EM.' );
$this->assertFalse( $processor->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
$this->assertSame( array( 'HTML', 'BODY', 'P', 'EM' ), $processor->get_breadcrumbs(), 'Found incorrect breadcrumbs for first EM.' );
$this->assertTrue( $processor->next_tag( 'SPAN' ), 'Could not find test span.' );
$this->assertSame(
array( 'HTML', 'BODY', 'P', 'EM', 'EM', 'SPAN' ),
$processor->get_breadcrumbs(),
'Found incorrect breadcrumbs for test SPAN; should have created two EMs.'
);
}

/**
Expand Down
Loading
Loading