Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 201 additions & 0 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,55 @@ function ( WP_HTML_Token $token ): void {
};
}

/**
* Creates a fragment processor with the current node as its context element.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm
*
* @param string $html Input HTML fragment to process.
* @return static|null The created processor if successful, otherwise null.
*/
private function spawn_fragment_parser( string $html ): ?self {
if ( $this->get_token_type() !== '#tag' ) {
return null;
}

/*
* Prevent creating fragments at "self-contained" nodes.
*
* @see https://github.com/WordPress/wordpress-develop/pull/7141
* @see https://github.com/WordPress/wordpress-develop/pull/7198
*/
if (
'html' === $this->get_namespace() &&
in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true )
) {
return null;
}

$fragment_processor = self::create_fragment( $html );
$fragment_processor->compat_mode = $this->compat_mode;

// @todo The context element probably needs a namespace{
$context_element = array( $this->get_tag(), array() );
foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) {
$context_element[1][ $name ] = $value;
}
$fragment_processor->state->context_node = $context_element;

if ( 'TEMPLATE' === $context_element[0] ) {
$fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
}

$fragment_processor->reset_insertion_mode_appropriately();

// @todo Set the parser's form element pointer.

$fragment_processor->state->encoding_confidence = 'irrelevant';

return $fragment_processor;
}

/**
* Stops the parser and terminates its execution when encountering unsupported markup.
*
Expand Down Expand Up @@ -5020,6 +5069,158 @@ public function get_comment_type(): ?string {
return $this->is_virtual() ? null : parent::get_comment_type();
}

/**
* Normalize an HTML string by serializing it.
*
* This removes any partial syntax at the end of the string.
*
* @since 6.7.0
*
* @param string $html Input HTML to normalize.
*
* @return string|null Normalized output, or `null` if unable to normalize.
*/
public static function normalize( string $html ): ?string {
return static::create_fragment( $html )->serialize();
}

/**
* Generate normalized markup for the HTML in the provided processor.
*
* This removes any partial syntax at the end of the string.
*
* @since 6.7.0
*
* @return string|null Normalized HTML markup represented by processor,
* or `null` if unable to generate serialization.
*/
public function serialize(): ?string {
if ( WP_HTML_Tag_Processor::STATE_READY !== $this->parser_state ) {
return null;
}

$html = '';
while ( $this->next_token() ) {
$token_type = $this->get_token_type();

switch ( $token_type ) {
case '#text':
$html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
break;

case '#funky-comment':
case '#comment':
$html .= "<!--{$this->get_modifiable_text()}-->";
break;

case '#cdata-section':
$html .= "<![CDATA[{$this->get_modifiable_text()}]]>";
break;

case 'html':
$html .= '<!DOCTYPE html>';
break;
}

if ( '#tag' !== $token_type ) {
continue;
}

if ( $this->is_tag_closer() ) {
$html .= "</{$this->get_qualified_tag_name()}>";
continue;
}

$attribute_names = $this->get_attribute_names_with_prefix( '' );
if ( ! isset( $attribute_names ) ) {
$html .= "<{$this->get_qualified_tag_name()}>";
continue;
}

$html .= "<{$this->get_qualified_tag_name()}";
foreach ( $attribute_names as $attribute_name ) {
$html .= " {$this->get_qualified_attribute_name( $attribute_name )}";
$value = $this->get_attribute( $attribute_name );

if ( is_string( $value ) ) {
$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
}
}

if ( 'html' !== $this->get_namespace() && $this->has_self_closing_flag() ) {
$html .= '/';
}

$html .= '>';
}

if ( null !== $this->get_last_error() ) {
return null;
}

return $html;
}

/**
* Replaces the inner markup of the currently-matched tag with provided HTML.
*
* This function will normalize the given input and enforce the boundaries
* within the existing HTML where it's called.
*
* @since 6.8.0
*
* @param string $new_inner_html New HTML to inject as inner HTML for the currently-matched tag.
* @return bool Whether the inner markup was modified for the currently-matched tag, or `NULL`
* if called on a node which doesn't allow changing the inner HTML.
*/
public function set_inner_html( string $new_inner_html ): ?bool {
$tag_name = $this->get_tag();

if (
WP_HTML_Tag_Processor::STATE_MATCHED_TAG !== $this->parser_state ||
$this->is_tag_closer() ||
( 'html' === $this->get_namespace() &&
(
self::is_void( $tag_name ) ||
in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true )
)
)
) {
// @todo Support setting inner HTML for SCRIPT, STYLE, TEXTAREA, and TITLE.
return null;
}

$fragment = $this->spawn_fragment_parser( $new_inner_html );
$new_markup = $fragment->serialize();

$this->set_bookmark( 'start' );
$depth = $this->get_current_depth();
while ( $this->get_current_depth() >= $depth && $this->next_token() ) {
continue;
}

if (
$this->paused_at_incomplete_token() ||
null !== $this->get_last_error()
) {
return false;
}

$this->set_bookmark( 'end' );
$start = $this->bookmarks['_start'];
$end = $this->bookmarks['_end'];

$this->lexical_updates[] = new WP_HTML_Text_Replacement(
$start->start + $start->length,
$end->start - ( $start->start + $start->length ),
$new_markup
);

$this->get_updated_html();
$this->seek( 'start' );
return true;
}

/**
* Removes a bookmark that is no longer needed.
*
Expand Down
2 changes: 1 addition & 1 deletion src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2840,7 +2840,7 @@ public function get_qualified_tag_name(): ?string {
}

if ( 'html' === $this->get_namespace() ) {
return $tag_name;
return strtolower( $tag_name );
}

$lower_tag_name = strtolower( $tag_name );
Expand Down