diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 28e5c150a8635..ed68efc52979c 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -422,6 +422,55 @@ function ( WP_HTML_Token $token ): void { }; } + /** + * Creates a fragment processor with the current node as its context element. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm + * + * @param string $html Input HTML fragment to process. + * @return static|null The created processor if successful, otherwise null. + */ + private function spawn_fragment_parser( string $html ): ?self { + if ( $this->get_token_type() !== '#tag' ) { + return null; + } + + /* + * Prevent creating fragments at "self-contained" nodes. + * + * @see https://github.com/WordPress/wordpress-develop/pull/7141 + * @see https://github.com/WordPress/wordpress-develop/pull/7198 + */ + if ( + 'html' === $this->get_namespace() && + in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ) { + return null; + } + + $fragment_processor = self::create_fragment( $html ); + $fragment_processor->compat_mode = $this->compat_mode; + + // @todo The context element probably needs a namespace{ + $context_element = array( $this->get_tag(), array() ); + foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { + $context_element[1][ $name ] = $value; + } + $fragment_processor->state->context_node = $context_element; + + if ( 'TEMPLATE' === $context_element[0] ) { + $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + } + + $fragment_processor->reset_insertion_mode_appropriately(); + + // @todo Set the parser's form element pointer. + + $fragment_processor->state->encoding_confidence = 'irrelevant'; + + return $fragment_processor; + } + /** * Stops the parser and terminates its execution when encountering unsupported markup. * @@ -5020,6 +5069,158 @@ public function get_comment_type(): ?string { return $this->is_virtual() ? null : parent::get_comment_type(); } + /** + * Normalize an HTML string by serializing it. + * + * This removes any partial syntax at the end of the string. + * + * @since 6.7.0 + * + * @param string $html Input HTML to normalize. + * + * @return string|null Normalized output, or `null` if unable to normalize. + */ + public static function normalize( string $html ): ?string { + return static::create_fragment( $html )->serialize(); + } + + /** + * Generate normalized markup for the HTML in the provided processor. + * + * This removes any partial syntax at the end of the string. + * + * @since 6.7.0 + * + * @return string|null Normalized HTML markup represented by processor, + * or `null` if unable to generate serialization. + */ + public function serialize(): ?string { + if ( WP_HTML_Tag_Processor::STATE_READY !== $this->parser_state ) { + return null; + } + + $html = ''; + while ( $this->next_token() ) { + $token_type = $this->get_token_type(); + + switch ( $token_type ) { + case '#text': + $html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' ); + break; + + case '#funky-comment': + case '#comment': + $html .= ""; + break; + + case '#cdata-section': + $html .= "get_modifiable_text()}]]>"; + break; + + case 'html': + $html .= ''; + break; + } + + if ( '#tag' !== $token_type ) { + continue; + } + + if ( $this->is_tag_closer() ) { + $html .= "get_qualified_tag_name()}>"; + continue; + } + + $attribute_names = $this->get_attribute_names_with_prefix( '' ); + if ( ! isset( $attribute_names ) ) { + $html .= "<{$this->get_qualified_tag_name()}>"; + continue; + } + + $html .= "<{$this->get_qualified_tag_name()}"; + foreach ( $attribute_names as $attribute_name ) { + $html .= " {$this->get_qualified_attribute_name( $attribute_name )}"; + $value = $this->get_attribute( $attribute_name ); + + if ( is_string( $value ) ) { + $html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"'; + } + } + + if ( 'html' !== $this->get_namespace() && $this->has_self_closing_flag() ) { + $html .= '/'; + } + + $html .= '>'; + } + + if ( null !== $this->get_last_error() ) { + return null; + } + + return $html; + } + + /** + * Replaces the inner markup of the currently-matched tag with provided HTML. + * + * This function will normalize the given input and enforce the boundaries + * within the existing HTML where it's called. + * + * @since 6.8.0 + * + * @param string $new_inner_html New HTML to inject as inner HTML for the currently-matched tag. + * @return bool Whether the inner markup was modified for the currently-matched tag, or `NULL` + * if called on a node which doesn't allow changing the inner HTML. + */ + public function set_inner_html( string $new_inner_html ): ?bool { + $tag_name = $this->get_tag(); + + if ( + WP_HTML_Tag_Processor::STATE_MATCHED_TAG !== $this->parser_state || + $this->is_tag_closer() || + ( 'html' === $this->get_namespace() && + ( + self::is_void( $tag_name ) || + in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ) + ) + ) { + // @todo Support setting inner HTML for SCRIPT, STYLE, TEXTAREA, and TITLE. + return null; + } + + $fragment = $this->spawn_fragment_parser( $new_inner_html ); + $new_markup = $fragment->serialize(); + + $this->set_bookmark( 'start' ); + $depth = $this->get_current_depth(); + while ( $this->get_current_depth() >= $depth && $this->next_token() ) { + continue; + } + + if ( + $this->paused_at_incomplete_token() || + null !== $this->get_last_error() + ) { + return false; + } + + $this->set_bookmark( 'end' ); + $start = $this->bookmarks['_start']; + $end = $this->bookmarks['_end']; + + $this->lexical_updates[] = new WP_HTML_Text_Replacement( + $start->start + $start->length, + $end->start - ( $start->start + $start->length ), + $new_markup + ); + + $this->get_updated_html(); + $this->seek( 'start' ); + return true; + } + /** * Removes a bookmark that is no longer needed. * diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 233d47eb8da95..355a7bb001923 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2840,7 +2840,7 @@ public function get_qualified_tag_name(): ?string { } if ( 'html' === $this->get_namespace() ) { - return $tag_name; + return strtolower( $tag_name ); } $lower_tag_name = strtolower( $tag_name );