diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index c50032829f63f..5bf69bab3ebbe 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -422,6 +422,61 @@ function ( WP_HTML_Token $token ): void { }; } + /** + * Creates a fragment processor with the current node as its context element. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm + * + * @param string $html Input HTML fragment to process. + * @return static|null The created processor if successful, otherwise null. + */ + public function spawn_fragment_parser( string $html ): ?self { + if ( $this->get_token_type() !== '#tag' ) { + return null; + } + + $namespace = $this->get_namespace(); + + /* + * Prevent creating fragments at "self-contained" nodes. + * + * @see https://github.com/WordPress/wordpress-develop/pull/7141 + * @see https://github.com/WordPress/wordpress-develop/pull/7198 + */ + if ( + 'html' === $namespace && + in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ) { + return null; + } + + $fragment_processor = self::create_fragment( $html ); + $fragment_processor->compat_mode = $this->compat_mode; + + $fragment_processor->context_node = clone $this->state->current_token; + $fragment_processor->context_node->bookmark_name = 'context-node'; + $fragment_processor->context_node->on_destroy = null; + + $context_element = array( $fragment_processor->context_node->node_name, array() ); + foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { + $context_element[1][ $name ] = $value; + } + + $fragment_processor->breadcrumbs = array(); + + if ( 'TEMPLATE' === $context_element[0] ) { + $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + } + + $fragment_processor->reset_insertion_mode_appropriately(); + + // @todo Set the parser's form element pointer. + + $fragment_processor->state->encoding_confidence = 'irrelevant'; + + return $fragment_processor; + } + /** * Stops the parser and terminates its execution when encountering unsupported markup. * @@ -4501,7 +4556,7 @@ private function step_in_foreign_content(): bool { $this->state->stack_of_open_elements->pop(); } - return $this->step( self::REPROCESS_CURRENT_NODE ); + goto in_foreign_content_process_in_current_insertion_mode; } /* @@ -4577,6 +4632,7 @@ private function step_in_foreign_content(): bool { goto in_foreign_content_end_tag_loop; } + in_foreign_content_process_in_current_insertion_mode: switch ( $this->state->insertion_mode ) { case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: return $this->step_initial(); diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 54d60f8c78a66..f6b518b96c940 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -21,6 +21,8 @@ * @group html-api-html5lib-tests */ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase { + const TREE_INDENT = ' '; + /** * Skip specific tests that may not be supported or have known issues. */ @@ -139,10 +141,6 @@ public function data_external_html5lib_tests() { * @return bool True if the test case should be skipped. False otherwise. */ private static function should_skip_test( ?string $test_context_element, string $test_name ): bool { - if ( null !== $test_context_element && 'body' !== $test_context_element ) { - return true; - } - if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) { return true; } @@ -158,11 +156,77 @@ private static function should_skip_test( ?string $test_context_element, string * @return string|null Tree structure of parsed HTML, if supported, else null. */ private static function build_tree_representation( ?string $fragment_context, string $html ) { - $processor = $fragment_context - ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" ) - : WP_HTML_Processor::create_full_parser( $html ); - if ( null === $processor ) { - throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + $processor = null; + if ( $fragment_context ) { + if ( 'body' === $fragment_context ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + } else { + + /* + * If the string of characters starts with "svg ", the context + * element is in the SVG namespace and the substring after + * "svg " is the local name. If the string of characters starts + * with "math ", the context element is in the MathML namespace + * and the substring after "math " is the local name. + * Otherwise, the context element is in the HTML namespace and + * the string is the local name. + */ + if ( str_starts_with( $fragment_context, 'svg ' ) ) { + $tag_name = substr( $fragment_context, 4 ); + if ( 'svg' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { + $tag_name = substr( $fragment_context, 5 ); + if ( 'math' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } else { + if ( in_array( + $fragment_context, + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', + ), + true + ) ) { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); + $parent_processor->next_tag(); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); + } + $parent_processor->next_tag( $fragment_context ); + } + if ( null !== $parent_processor->get_unsupported_exception() ) { + throw $parent_processor->get_unsupported_exception(); + } + if ( null !== $parent_processor->get_last_error() ) { + throw new Exception( $parent_processor->get_last_error() ); + } + $processor = $parent_processor->spawn_fragment_parser( $html ); + } + + if ( null === $processor ) { + throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + } + } else { + $processor = WP_HTML_Processor::create_full_parser( $html ); + if ( null === $processor ) { + throw new Exception( 'Could not create a full parser.' ); + } } /* @@ -170,9 +234,8 @@ private static function build_tree_representation( ?string $fragment_context, st * and requires adjustment to initial parameters. * The full parser will not. */ - $output = $fragment_context ? "\n \n \n" : ''; - $indent_level = $fragment_context ? 2 : 0; - $indent = ' '; + $output = ''; + $indent_level = 0; $was_text = null; $text_node = ''; @@ -225,7 +288,7 @@ private static function build_tree_representation( ?string $fragment_context, st ++$indent_level; } - $output .= str_repeat( $indent, $tag_indent ) . "<{$tag_name}>\n"; + $output .= str_repeat( self::TREE_INDENT, $tag_indent ) . "<{$tag_name}>\n"; $attribute_names = $processor->get_attribute_names_with_prefix( '' ); if ( $attribute_names ) { @@ -278,18 +341,18 @@ static function ( $a, $b ) { if ( true === $val ) { $val = ''; } - $output .= str_repeat( $indent, $tag_indent + 1 ) . "{$display_name}=\"{$val}\"\n"; + $output .= str_repeat( self::TREE_INDENT, $tag_indent + 1 ) . "{$display_name}=\"{$val}\"\n"; } } // Self-contained tags contain their inner contents as modifiable text. $modifiable_text = $processor->get_modifiable_text(); if ( '' !== $modifiable_text ) { - $output .= str_repeat( $indent, $tag_indent + 1 ) . "\"{$modifiable_text}\"\n"; + $output .= str_repeat( self::TREE_INDENT, $tag_indent + 1 ) . "\"{$modifiable_text}\"\n"; } if ( 'html' === $namespace && 'TEMPLATE' === $token_name ) { - $output .= str_repeat( $indent, $indent_level ) . "content\n"; + $output .= str_repeat( self::TREE_INDENT, $indent_level ) . "content\n"; ++$indent_level; } @@ -303,14 +366,14 @@ static function ( $a, $b ) { } $was_text = true; if ( '' === $text_node ) { - $text_node .= str_repeat( $indent, $indent_level ) . '"'; + $text_node .= str_repeat( self::TREE_INDENT, $indent_level ) . '"'; } $text_node .= $text_content; break; case '#funky-comment': // Comments must be "<" then "!-- " then the data then " -->". - $output .= str_repeat( $indent, $indent_level ) . "\n"; + $output .= str_repeat( self::TREE_INDENT, $indent_level ) . "\n"; break; case '#comment': @@ -333,7 +396,7 @@ static function ( $a, $b ) { throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" ); } // Comments must be "<" then "!-- " then the data then " -->". - $output .= str_repeat( $indent, $indent_level ) . "\n"; + $output .= str_repeat( self::TREE_INDENT, $indent_level ) . "\n"; break; default: @@ -449,7 +512,7 @@ public static function parse_html5_dat_testfile( $filename ) { * context element as context. */ case 'document-fragment': - $test_context_element = explode( ' ', $line )[0]; + $test_context_element = trim( $line ); break; /*