-
Notifications
You must be signed in to change notification settings - Fork 0
HTML API: Active formatting element reconstruction Noah's Ark #24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: html-api/improve-active-element-reconstruction
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,6 +43,17 @@ class WP_HTML_Active_Formatting_Elements { | |
| */ | ||
| private $stack = array(); | ||
|
|
||
| /** | ||
| * Holds a stack of hashes representing uniquely representing the active formatting element. | ||
| * | ||
| * This is important to efficiently track and remove duplicate elements when pushing. | ||
| * | ||
| * @since 7.0.0 | ||
| * | ||
| * @var string[] | ||
| */ | ||
| private $hash_stack = array(); | ||
|
|
||
| /** | ||
| * Returns the node at the given 1-offset index in the list of active formatting elements. | ||
| * | ||
|
|
@@ -111,7 +122,52 @@ public function current_node() { | |
| * @since 6.7.0 | ||
| */ | ||
| public function insert_marker(): void { | ||
| $this->push( new WP_HTML_Token( null, 'marker', false ) ); | ||
| $this->stack[] = new WP_HTML_Token( null, 'marker', false ); | ||
| $this->hash_stack[] = 'marker'; | ||
| } | ||
|
|
||
| /** | ||
| * Generates a hash string for a given token, based on its | ||
| * tag name, namespace, and attributes. | ||
| * | ||
| * @since 7.0.0 | ||
| * | ||
| * @param WP_HTML_Token $token Token to generate a hash for. | ||
| * @param string $token_html The original HTML of the token. | ||
| * @return string Generated hash string. | ||
| */ | ||
| private function get_token_hash( WP_HTML_Token $token, string $token_html ): string { | ||
| $processor = new WP_HTML_Tag_Processor( $token_html ); | ||
| $processor->change_parsing_namespace( $token->namespace ); | ||
| $processor->next_tag(); | ||
|
|
||
| $node_name = $processor->get_qualified_tag_name(); | ||
| $hash_string = "{$token->namespace}::<{$node_name}"; | ||
|
|
||
| $attribute_names = $processor->get_attribute_names_with_prefix( '' ); | ||
| if ( ! empty( $attribute_names ) ) { | ||
| $attr_parts = []; | ||
| sort( $attribute_names, SORT_STRING ); | ||
| foreach ( $attribute_names as $attribute_name ) { | ||
| $display_name = $processor->get_qualified_attribute_name( $attribute_name ); | ||
| $val = $processor->get_attribute( $attribute_name ); | ||
|
|
||
| /* | ||
| * Attributes with no value are `true` with the HTML API, | ||
| * We map use the empty string value in the tree structure. | ||
| */ | ||
| if ( true === $val ) { | ||
| $val = ''; | ||
| } | ||
| $val = strtr( $val, '"', '"' ); | ||
|
|
||
| $attr_parts[] = "{$display_name}=\"{$val}\""; | ||
| } | ||
| $hash_string .= ' ' . implode( ' ', $attr_parts ); | ||
| } | ||
| $hash_string .= '>'; | ||
|
|
||
| return dechex( crc32( $hash_string ) ); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -124,7 +180,7 @@ public function insert_marker(): void { | |
| * @param WP_HTML_Token $token Push this node onto the stack. | ||
| * @return bool Whether a node was pushed onto the stack of active formatting elements. | ||
| */ | ||
| public function push( WP_HTML_Token $token ): bool { | ||
| public function push( WP_HTML_Token $token, string $token_html ): bool { | ||
| /* | ||
| * > If there are already three elements in the list of active formatting elements after the last marker, | ||
| * > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and | ||
|
|
@@ -135,29 +191,35 @@ public function push( WP_HTML_Token $token ): bool { | |
| * > (the order of the attributes does not matter). | ||
| */ | ||
|
|
||
| if ( 'marker' !== $token->node_name ) { | ||
| $existing_count = 0; | ||
| foreach ( $this->walk_up() as $item ) { | ||
| if ( 'marker' === $item->node_name ) { | ||
| break; | ||
| } | ||
| if ( 'marker' === $token->node_name ) { | ||
| _doing_it_wrong( | ||
| __METHOD__, | ||
| 'Markers must be added using the WP_HTML_Active_Formatting_Elements::insert_marker() method.', | ||
| '7.0.0' | ||
| ); | ||
| return false; | ||
| } | ||
|
|
||
| if ( | ||
| $item->node_name === $token->node_name && | ||
| $item->namespace === $token->namespace | ||
| // @todo Compare attributes. For now, bail if there are three matching tag names + namespaces. | ||
| ) { | ||
| ++$existing_count; | ||
| if ( $existing_count >= 3 ) { | ||
| // @todo Implement removing the earliest element and moving forward. | ||
| return false; | ||
| } | ||
| $token_hash = $this->get_token_hash( $token, $token_html ); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be represented in another way, perhaps:
The most costly part (making a way to correctly compare attributes) could be done lazily. That's difficult because this class doesn't have access to the underlying HTML or the processor. |
||
| $existing_count = 0; | ||
| for ( $i = count( $this->hash_stack ) - 1; $i >= 0; $i-- ) { | ||
|
Comment on lines
+203
to
+205
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All this work could be skipped if the stack is smaller than 3 or if the stack up to the last marker is smaller than three. In those cases there's no chance that something needs to be removed. |
||
| $item_hash = $this->hash_stack[ $i ]; | ||
|
|
||
| if ( 'marker' === $item_hash ) { | ||
| break; | ||
| } | ||
|
|
||
| if ( $item_hash === $token_hash ) { | ||
| if ( ++$existing_count >= 3 ) { | ||
| $this->remove_node( $this->stack[ $i ] ); | ||
| break; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // > Add element to the list of active formatting elements. | ||
| $this->stack[] = $token; | ||
| $this->stack[] = $token; | ||
| $this->hash_stack[] = $token_hash; | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -177,6 +239,7 @@ public function remove_node( WP_HTML_Token $token ) { | |
|
|
||
| $position_from_start = $this->count() - $position_from_end - 1; | ||
| array_splice( $this->stack, $position_from_start, 1 ); | ||
| array_splice( $this->hash_stack, $position_from_start, 1 ); | ||
| return true; | ||
| } | ||
|
|
||
|
|
@@ -255,6 +318,7 @@ public function walk_up() { | |
| public function clear_up_to_last_marker(): void { | ||
| foreach ( $this->walk_up() as $item ) { | ||
| array_pop( $this->stack ); | ||
| array_pop( $this->hash_stack ); | ||
| if ( 'marker' === $item->node_name ) { | ||
| break; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2778,7 +2778,9 @@ private function step_in_body(): bool { | |
|
|
||
| $this->reconstruct_active_formatting_elements(); | ||
| $this->insert_html_element( $this->state->current_token ); | ||
| if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) { | ||
| $bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ]; | ||
| $token_html = substr( $this->html, $bookmark->start, $bookmark->length ); | ||
| if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) { | ||
|
Comment on lines
+2781
to
+2783
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's annoying, but it may be essential to compare attributes. The active formatting elements class doesn't have a good way to access attributes at this time. |
||
| $this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' ); | ||
| } | ||
| $this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes; | ||
|
|
@@ -2802,7 +2804,9 @@ private function step_in_body(): bool { | |
| case '+U': | ||
| $this->reconstruct_active_formatting_elements(); | ||
| $this->insert_html_element( $this->state->current_token ); | ||
| if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) { | ||
| $bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ]; | ||
| $token_html = substr( $this->html, $bookmark->start, $bookmark->length ); | ||
| if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) { | ||
| $this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' ); | ||
| } | ||
| $this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes; | ||
|
|
@@ -2821,7 +2825,9 @@ private function step_in_body(): bool { | |
| } | ||
|
|
||
| $this->insert_html_element( $this->state->current_token ); | ||
| if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) { | ||
| $bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ]; | ||
| $token_html = substr( $this->html, $bookmark->start, $bookmark->length ); | ||
| if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) { | ||
| $this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' ); | ||
| } | ||
| $this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't require hashing, the strings could be compared directly.
The strings with attributes risk getting quite long, the hash could be skipped for a simpler representation if there are no attributes.