Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 83 additions & 19 deletions src/wp-includes/html-api/class-wp-html-active-formatting-elements.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ class WP_HTML_Active_Formatting_Elements {
*/
private $stack = array();

/**
* Holds a stack of hashes representing uniquely representing the active formatting element.
*
* This is important to efficiently track and remove duplicate elements when pushing.
*
* @since 7.0.0
*
* @var string[]
*/
private $hash_stack = array();

/**
* Returns the node at the given 1-offset index in the list of active formatting elements.
*
Expand Down Expand Up @@ -111,7 +122,52 @@ public function current_node() {
* @since 6.7.0
*/
public function insert_marker(): void {
$this->push( new WP_HTML_Token( null, 'marker', false ) );
$this->stack[] = new WP_HTML_Token( null, 'marker', false );
$this->hash_stack[] = 'marker';
}

/**
* Generates a hash string for a given token, based on its
* tag name, namespace, and attributes.
*
* @since 7.0.0
*
* @param WP_HTML_Token $token Token to generate a hash for.
* @param string $token_html The original HTML of the token.
* @return string Generated hash string.
*/
private function get_token_hash( WP_HTML_Token $token, string $token_html ): string {
$processor = new WP_HTML_Tag_Processor( $token_html );
$processor->change_parsing_namespace( $token->namespace );
$processor->next_tag();

$node_name = $processor->get_qualified_tag_name();
$hash_string = "{$token->namespace}::<{$node_name}";

$attribute_names = $processor->get_attribute_names_with_prefix( '' );
if ( ! empty( $attribute_names ) ) {
$attr_parts = [];
sort( $attribute_names, SORT_STRING );
foreach ( $attribute_names as $attribute_name ) {
$display_name = $processor->get_qualified_attribute_name( $attribute_name );
$val = $processor->get_attribute( $attribute_name );

/*
* Attributes with no value are `true` with the HTML API,
* We map use the empty string value in the tree structure.
*/
if ( true === $val ) {
$val = '';
}
$val = strtr( $val, '"', '&quot;' );

$attr_parts[] = "{$display_name}=\"{$val}\"";
}
$hash_string .= ' ' . implode( ' ', $attr_parts );
}
$hash_string .= '>';

return dechex( crc32( $hash_string ) );
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't require hashing, the strings could be compared directly.

The strings with attributes risk getting quite long, the hash could be skipped for a simpler representation if there are no attributes.

}

/**
Expand All @@ -124,7 +180,7 @@ public function insert_marker(): void {
* @param WP_HTML_Token $token Push this node onto the stack.
* @return bool Whether a node was pushed onto the stack of active formatting elements.
*/
public function push( WP_HTML_Token $token ): bool {
public function push( WP_HTML_Token $token, string $token_html ): bool {
/*
* > If there are already three elements in the list of active formatting elements after the last marker,
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
Expand All @@ -135,29 +191,35 @@ public function push( WP_HTML_Token $token ): bool {
* > (the order of the attributes does not matter).
*/

if ( 'marker' !== $token->node_name ) {
$existing_count = 0;
foreach ( $this->walk_up() as $item ) {
if ( 'marker' === $item->node_name ) {
break;
}
if ( 'marker' === $token->node_name ) {
_doing_it_wrong(
__METHOD__,
'Markers must be added using the WP_HTML_Active_Formatting_Elements::insert_marker() method.',
'7.0.0'
);
return false;
}

if (
$item->node_name === $token->node_name &&
$item->namespace === $token->namespace
// @todo Compare attributes. For now, bail if there are three matching tag names + namespaces.
) {
++$existing_count;
if ( $existing_count >= 3 ) {
// @todo Implement removing the earliest element and moving forward.
return false;
}
$token_hash = $this->get_token_hash( $token, $token_html );
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be represented in another way, perhaps:

[ "namespace", "tag_name", "attrs_hash"|null ]

The most costly part (making a way to correctly compare attributes) could be done lazily.

That's difficult because this class doesn't have access to the underlying HTML or the processor.

$existing_count = 0;
for ( $i = count( $this->hash_stack ) - 1; $i >= 0; $i-- ) {
Comment on lines +203 to +205
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All this work could be skipped if the stack is smaller than 3 or if the stack up to the last marker is smaller than three. In those cases there's no chance that something needs to be removed.

$item_hash = $this->hash_stack[ $i ];

if ( 'marker' === $item_hash ) {
break;
}

if ( $item_hash === $token_hash ) {
if ( ++$existing_count >= 3 ) {
$this->remove_node( $this->stack[ $i ] );
break;
}
}
}

// > Add element to the list of active formatting elements.
$this->stack[] = $token;
$this->stack[] = $token;
$this->hash_stack[] = $token_hash;
return true;
}

Expand All @@ -177,6 +239,7 @@ public function remove_node( WP_HTML_Token $token ) {

$position_from_start = $this->count() - $position_from_end - 1;
array_splice( $this->stack, $position_from_start, 1 );
array_splice( $this->hash_stack, $position_from_start, 1 );
return true;
}

Expand Down Expand Up @@ -255,6 +318,7 @@ public function walk_up() {
public function clear_up_to_last_marker(): void {
foreach ( $this->walk_up() as $item ) {
array_pop( $this->stack );
array_pop( $this->hash_stack );
if ( 'marker' === $item->node_name ) {
break;
}
Expand Down
12 changes: 9 additions & 3 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2778,7 +2778,9 @@ private function step_in_body(): bool {

$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
$bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ];
$token_html = substr( $this->html, $bookmark->start, $bookmark->length );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) {
Comment on lines +2781 to +2783
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's annoying, but it may be essential to compare attributes. The active formatting elements class doesn't have a good way to access attributes at this time.

$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
}
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
Expand All @@ -2802,7 +2804,9 @@ private function step_in_body(): bool {
case '+U':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
$bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ];
$token_html = substr( $this->html, $bookmark->start, $bookmark->length );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) {
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
}
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
Expand All @@ -2821,7 +2825,9 @@ private function step_in_body(): bool {
}

$this->insert_html_element( $this->state->current_token );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
$bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ];
$token_html = substr( $this->html, $bookmark->start, $bookmark->length );
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) {
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
}
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,31 +49,34 @@ public function test_get_depth() {
}

$processor = new WP_Block_Processor( $html );
$n = new NumberFormatter( 'en-US', NumberFormatter::ORDINAL );

for ( $i = 0; $i < $max_depth; $i++ ) {
$nth = $i + 1;

$this->assertTrue(
$processor->next_delimiter(),
"Should have found {$n->format( $i + 1 )} opening delimiter: check test setup."
"Should have found opening delimiter #{$nth}: check test setup."
);

$this->assertSame(
$i + 1,
$processor->get_depth(),
"Should have identified the proper depth of the {$n->format( $i + 1 )} opening delimiter."
"Should have identified the proper depth of opening delimiter #{$nth}."
);
}

for ( $i = 0; $i < $max_depth; $i++ ) {
$nth = $i + 1;

$this->assertTrue(
$processor->next_delimiter(),
"Should have found {$n->format( $i + 1 )} closing delimiter: check test setup."
"Should have found closing delimiter #{$nth}: check test setup."
);

$this->assertSame(
$max_depth - $i - 1,
$processor->get_depth(),
"Should have identified the proper depth of the {$n->format( $i + 1 )} closing delimiter."
"Should have identified the proper depth of closing delimiter #{$nth}."
);
}
}
Expand Down