From d8ac3610365caf54b86ccb75cf9b3390c0e49c2f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 12 Sep 2024 17:53:03 +0200 Subject: [PATCH 01/39] Add spawn_fragment_parser method --- .../html-api/class-wp-html-processor.php | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ed6ac0299b3c3..ad20c36e6205b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -424,6 +424,55 @@ function ( WP_HTML_Token $token ): void { }; } + /** + * Creates a fragment processor with the current node as its context element. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm + * + * @param string $html Input HTML fragment to process. + * @return static|null The created processor if successful, otherwise null. + */ + private function spawn_fragment_parser( string $html ): ?self { + if ( $this->get_token_type() !== '#tag' ) { + return null; + } + + /* + * Prevent creating fragments at "self-contained" nodes. + * + * @see https://github.com/WordPress/wordpress-develop/pull/7141 + * @see https://github.com/WordPress/wordpress-develop/pull/7198 + */ + if ( + 'html' === $this->get_namespace() && + in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ) { + return null; + } + + $fragment_processor = self::create_fragment( $html ); + $fragment_processor->compat_mode = $this->compat_mode; + + // @todo The context element probably needs a namespace{ + $context_element = array( $this->get_tag(), array() ); + foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { + $context_element[1][ $name ] = $value; + } + $fragment_processor->state->context_node = $context_element; + + if ( 'TEMPLATE' === $context_element[0] ) { + $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + } + + $fragment_processor->reset_insertion_mode_appropriately(); + + // @todo Set the parser's form element pointer. + + $fragment_processor->state->encoding_confidence = 'irrelevant'; + + return $fragment_processor; + } + /** * Stops the parser and terminates its execution when encountering unsupported markup. * From ad8f8db5589d3d88061dd714e8cf17a994fc9d55 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 12 Sep 2024 18:13:05 +0200 Subject: [PATCH 02/39] Fix the processor context_node --- src/wp-includes/html-api/class-wp-html-processor.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ad20c36e6205b..07a7b31450cd4 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -453,11 +453,18 @@ private function spawn_fragment_parser( string $html ): ?self { $fragment_processor = self::create_fragment( $html ); $fragment_processor->compat_mode = $this->compat_mode; - // @todo The context element probably needs a namespace{ $context_element = array( $this->get_tag(), array() ); foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { $context_element[1][ $name ] = $value; } + + $fragment_processor->context_node = new WP_HTML_Token( + 'context-node', + $context_element[0], + $this->has_self_closing_flag() + ); + $fragment_processor->context_node->namespace = $this->get_namespace(); + $fragment_processor->state->context_node = $context_element; if ( 'TEMPLATE' === $context_element[0] ) { From e2efee4d844eef86cd8146427f3b55466f6e3abb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:36:57 +0200 Subject: [PATCH 03/39] Make it public --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 07a7b31450cd4..073e23285d40b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -432,7 +432,7 @@ function ( WP_HTML_Token $token ): void { * @param string $html Input HTML fragment to process. * @return static|null The created processor if successful, otherwise null. */ - private function spawn_fragment_parser( string $html ): ?self { + public function spawn_fragment_parser( string $html ): ?self { if ( $this->get_token_type() !== '#tag' ) { return null; } From 4f5249c82ad100c38ccca30ccc8ebe9999fd91d6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:38:49 +0200 Subject: [PATCH 04/39] Fix spawn_fragment_parser method --- .../html-api/class-wp-html-processor.php | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 073e23285d40b..24a02a26819a7 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -437,6 +437,8 @@ public function spawn_fragment_parser( string $html ): ?self { return null; } + $namespace = $this->get_namespace(); + /* * Prevent creating fragments at "self-contained" nodes. * @@ -444,7 +446,7 @@ public function spawn_fragment_parser( string $html ): ?self { * @see https://github.com/WordPress/wordpress-develop/pull/7198 */ if ( - 'html' === $this->get_namespace() && + 'html' === $namespace && in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { return null; @@ -453,19 +455,17 @@ public function spawn_fragment_parser( string $html ): ?self { $fragment_processor = self::create_fragment( $html ); $fragment_processor->compat_mode = $this->compat_mode; - $context_element = array( $this->get_tag(), array() ); + + $fragment_processor->context_node = clone $this->state->current_token; + $fragment_processor->context_node->bookmark_name = 'context-node'; + $fragment_processor->context_node->on_destroy = null; + + $context_element = array( $fragment_processor->context_node->node_name, array() ); foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { $context_element[1][ $name ] = $value; } - $fragment_processor->context_node = new WP_HTML_Token( - 'context-node', - $context_element[0], - $this->has_self_closing_flag() - ); - $fragment_processor->context_node->namespace = $this->get_namespace(); - - $fragment_processor->state->context_node = $context_element; + $fragment_processor->breadcrumbs = array(); if ( 'TEMPLATE' === $context_element[0] ) { $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; From eaed8634dee6e084d63d597969a72d077c5c382f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:39:28 +0200 Subject: [PATCH 05/39] Process non-body context tests --- tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 808fa39d17f26..7f607ad63ebfc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -138,10 +138,6 @@ public function data_external_html5lib_tests() { * @return bool True if the test case should be skipped. False otherwise. */ private static function should_skip_test( ?string $test_context_element, string $test_name ): bool { - if ( null !== $test_context_element && 'body' !== $test_context_element ) { - return true; - } - if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) { return true; } From 25b18fa88d860b83ff7b126a12f37b205bfc13c3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:42:30 +0200 Subject: [PATCH 06/39] Handle all the different document context in html5lib tests --- .../html-api/wpHtmlProcessorHtml5lib.php | 76 +++++++++++++++++-- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 7f607ad63ebfc..041132ed50c20 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -153,11 +153,77 @@ private static function should_skip_test( ?string $test_context_element, string * @return string|null Tree structure of parsed HTML, if supported, else null. */ private static function build_tree_representation( ?string $fragment_context, string $html ) { - $processor = $fragment_context - ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" ) - : WP_HTML_Processor::create_full_parser( $html ); - if ( null === $processor ) { - throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + $processor = null; + if ( $fragment_context ) { + if ( 'body' === $fragment_context ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + } else { + + /* + * If the string of characters starts with "svg ", the context + * element is in the SVG namespace and the substring after + * "svg " is the local name. If the string of characters starts + * with "math ", the context element is in the MathML namespace + * and the substring after "math " is the local name. + * Otherwise, the context element is in the HTML namespace and + * the string is the local name. + */ + if ( str_starts_with( $fragment_context, 'svg ' ) ) { + $tag_name = substr( $fragment_context, 4 ); + if ( 'svg' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { + $tag_name = substr( $fragment_context, 5 ); + if ( 'math' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } else { + if ( in_array( + $fragment_context, + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', + ), + true + ) ) { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); + $parent_processor->next_tag(); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); + } + $parent_processor->next_tag( $fragment_context ); + } + if ( null !== $parent_processor->get_unsupported_exception() ) { + throw $parent_processor->get_unsupported_exception(); + } + if ( null !== $parent_processor->get_last_error() ) { + throw new Exception( $parent_processor->get_last_error() ); + } + $processor = $parent_processor->spawn_fragment_parser( $html ); + } + + if ( null === $processor ) { + throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + } + } else { + $processor = WP_HTML_Processor::create_full_parser( $html ); + if ( null === $processor ) { + throw new Exception( 'Could not create a full parser.' ); + } } /* From 9ac142f67a2a91a43a2daa669d467b5acf8efcfc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 13:45:28 +0200 Subject: [PATCH 07/39] lints --- src/wp-includes/html-api/class-wp-html-processor.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 24a02a26819a7..bce9949d286b0 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -455,7 +455,6 @@ public function spawn_fragment_parser( string $html ): ?self { $fragment_processor = self::create_fragment( $html ); $fragment_processor->compat_mode = $this->compat_mode; - $fragment_processor->context_node = clone $this->state->current_token; $fragment_processor->context_node->bookmark_name = 'context-node'; $fragment_processor->context_node->on_destroy = null; From 3f35886e4abe09c3a51e63fc8c88a680418f05b6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 15:49:25 +0100 Subject: [PATCH 08/39] Make spawned fragment parse have HTML > [context-node-tag] in breadcrumbs --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index b34ea958833f1..8daa92eabacb4 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -464,7 +464,7 @@ public function spawn_fragment_parser( string $html ): ?self { $context_element[1][ $name ] = $value; } - $fragment_processor->breadcrumbs = array(); + $fragment_processor->breadcrumbs = array( 'HTML', $fragment_processor->context_node->node_name ); if ( 'TEMPLATE' === $context_element[0] ) { $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; From ba9e218a32a0e401e1b5e2473f5f090be86d4e73 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 15:51:22 +0100 Subject: [PATCH 09/39] Fallback to context node when checking namespace --- src/wp-includes/html-api/class-wp-html-processor.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8daa92eabacb4..ba02ae5296dc2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4968,16 +4968,20 @@ private function bookmark_token() { */ /** - * Indicates the namespace of the current token, or "html" if there is none. + * Indicates the namespace of the current token, the context node, or "html". * * @return string One of "html", "math", or "svg". */ public function get_namespace(): string { - if ( ! isset( $this->current_element ) ) { - return parent::get_namespace(); + if ( isset( $this->current_element ) ) { + return $this->current_element->token->namespace; + } + + if ( isset( $this->context_node ) ) { + return $this->context_node->namespace; } - return $this->current_element->token->namespace; + return parent::get_namespace(); } /** From fe48fa517b2b90cd10f435d5400e43613aa9ce26 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 20:15:20 +0100 Subject: [PATCH 10/39] Add tests --- .../tests/html-api/wpHtmlProcessor.php | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 7e568286ccdf9..db6af678ea9a9 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1058,4 +1058,59 @@ public function test_ensure_next_token_method_extensibility( $html, $expected_to $this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) ); $this->assertEquals( $expected_xpaths, $actual_xpaths, 'Snapshot: ' . var_export( $actual_xpaths, true ) ); } + + /** + * @ticket TBD + */ + public function test_spawn_fragment_parser_in_foreign_content() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $this->assertTrue( $processor->next_tag( 'SVG' ) ); + + $fragment = $processor->spawn_fragment_parser( "\0preceded-by-nul-byte
" ); + + $this->assertSame( 'svg', $fragment->get_namespace() ); + $this->assertTrue( $fragment->next_token() ); + + /* + * In HTML parsing, a nul byte would be ignored. + * In SVG it should be replaced with a replacement character. + */ + $this->assertSame( '#text', $fragment->get_token_type() ); + $this->assertSame( "\u{FFFD}", $fragment->get_modifiable_text() ); + + $this->assertTrue( $fragment->next_tag( 'RECT' ) ); + $this->assertSame( 'svg', $fragment->get_namespace() ); + + $this->assertTrue( $fragment->next_tag( 'CIRCLE' ) ); + $this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() ); + $this->assertTrue( $fragment->next_tag( 'foreignObject' ) ); + $this->assertSame( 'svg', $fragment->get_namespace() ); + } + + /** + * @ticket TBD + */ + public function test_spawn_fragment_parser_in_foreign_content_integration_point() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $this->assertTrue( $processor->next_tag( 'foreignObject' ) ); + + $fragment = $processor->spawn_fragment_parser( "\0not-preceded-by-nul-byte" ); + + $this->assertSame( 'svg', $fragment->get_namespace() ); + $this->assertTrue( $fragment->next_token() ); + + // In HTML parsing, the nul byte is ignored and the text is reached. + $this->assertSame( '#text', $fragment->get_token_type() ); + $this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() ); + + /* + * svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace. + * RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close. + */ + $this->assertTrue( $fragment->next_tag( 'RECT' ) ); + $this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() ); + $this->assertSame( 'html', $fragment->get_namespace() ); + $this->assertTrue( $fragment->has_self_closing_flag() ); + $this->assertTrue( $fragment->expects_closer() ); + } } From fa4c5cb59aeeef9fb0736c745f8e0ff18f0ad0bf Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 20:15:36 +0100 Subject: [PATCH 11/39] Set the form element pointer on the fragment parser --- .../html-api/class-wp-html-processor.php | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ba02ae5296dc2..df0dd13ffe76d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -472,7 +472,18 @@ public function spawn_fragment_parser( string $html ): ?self { $fragment_processor->reset_insertion_mode_appropriately(); - // @todo Set the parser's form element pointer. + /* + * > Set the parser's form element pointer to the nearest node to the context element that + * > is a form element (going straight up the ancestor chain, and including the element + * > itself, if it is a form element), if any. (If there is no such form element, the + * > form element pointer keeps its initial value, null.) + */ + foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) { + if ( 'FORM' === $element->node_name && 'html' === $element->namespace ) { + $fragment_processor->state->form_element = $element; + break; + } + } $fragment_processor->state->encoding_confidence = 'irrelevant'; From 943bbdde4a158ef360e040a4e2956dfdf349082a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 20:43:30 +0100 Subject: [PATCH 12/39] Revert "Fallback to context node when checking namespace" This reverts commit ba9e218a32a0e401e1b5e2473f5f090be86d4e73. --- src/wp-includes/html-api/class-wp-html-processor.php | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 0574783a54d77..cd2802e959e2e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4984,20 +4984,16 @@ private function bookmark_token() { */ /** - * Indicates the namespace of the current token, the context node, or "html". + * Indicates the namespace of the current token, or "html" if there is none. * * @return string One of "html", "math", or "svg". */ public function get_namespace(): string { - if ( isset( $this->current_element ) ) { - return $this->current_element->token->namespace; - } - - if ( isset( $this->context_node ) ) { - return $this->context_node->namespace; + if ( ! isset( $this->current_element ) ) { + return parent::get_namespace(); } - return parent::get_namespace(); + return $this->current_element->token->namespace; } /** From e3a0a8685800729e5cc1e49b3f5f1e7d5306c89f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 21:01:21 +0100 Subject: [PATCH 13/39] Fix initial namespace on integration nodes --- src/wp-includes/html-api/class-wp-html-processor.php | 9 +++++++-- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cd2802e959e2e..ed926c45cfa76 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -437,7 +437,7 @@ public function spawn_fragment_parser( string $html ): ?self { return null; } - $namespace = $this->get_namespace(); + $namespace = $this->current_element->token->namespace; /* * Prevent creating fragments at "self-contained" nodes. @@ -452,7 +452,12 @@ public function spawn_fragment_parser( string $html ): ?self { return null; } - $fragment_processor = self::create_fragment( $html ); + $fragment_processor = self::create_fragment( $html ); + + $fragment_processor->change_parsing_namespace( + $this->current_element->token->integration_node_type ? 'html' : $namespace + ); + $fragment_processor->compat_mode = $this->compat_mode; $fragment_processor->context_node = clone $this->state->current_token; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 9a89d3f528958..851a10bdf3b39 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1053,9 +1053,14 @@ public function test_spawn_fragment_parser_in_foreign_content_integration_point( $processor = WP_HTML_Processor::create_full_parser( '' ); $this->assertTrue( $processor->next_tag( 'foreignObject' ) ); - $fragment = $processor->spawn_fragment_parser( "\0not-preceded-by-nul-byte" ); + $fragment = $processor->spawn_fragment_parser( "\0not-preceded-by-nul-byte" ); + + // Nothing has been processed, the html namespace should be used for parsing as an integration point. + $this->assertSame( 'html', $fragment->get_namespace() ); + + // HTML parsing transforms IMAGE into IMG. + $this->assertTrue( $fragment->next_tag( 'IMG' ) ); - $this->assertSame( 'svg', $fragment->get_namespace() ); $this->assertTrue( $fragment->next_token() ); // In HTML parsing, the nul byte is ignored and the text is reached. From 27a978146df84499c9a9cd31aa57620c751df01f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 7 Nov 2024 17:28:40 +0100 Subject: [PATCH 14/39] Rename method, use static constructor, add comments --- .../html-api/class-wp-html-processor.php | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ed926c45cfa76..9efcdd90aa15c 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -425,14 +425,23 @@ function ( WP_HTML_Token $token ): void { } /** - * Creates a fragment processor with the current node as its context element. + * Creates a fragment processor at the current node. + * + * HTML Fragment parsing always happens with a context node. HTML Fragment Processors can be + * instantiated with a `BODY` context node via `WP_HTML_Processor::create_fragment()`. + * + * The context node may impact how a fragment of HTML is parsed. For example, when parsing + * `AB`: + * + * With a BODY context node results in the following tree: + * * * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm * * @param string $html Input HTML fragment to process. * @return static|null The created processor if successful, otherwise null. */ - public function spawn_fragment_parser( string $html ): ?self { + public function create_fragment_at_current_node( string $html ) { if ( $this->get_token_type() !== '#tag' ) { return null; } @@ -452,7 +461,7 @@ public function spawn_fragment_parser( string $html ): ?self { return null; } - $fragment_processor = self::create_fragment( $html ); + $fragment_processor = static::create_fragment( $html ); $fragment_processor->change_parsing_namespace( $this->current_element->token->integration_node_type ? 'html' : $namespace From 07895389209e08913c6261f63f7bbe8422812754 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 8 Nov 2024 19:56:42 +0100 Subject: [PATCH 15/39] Update method name in tests --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 851a10bdf3b39..864093b66c0a8 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1021,11 +1021,11 @@ public function test_ensure_next_token_method_extensibility( $html, $expected_to /** * @ticket TBD */ - public function test_spawn_fragment_parser_in_foreign_content() { + public function test_create_fragment_at_current_node_in_foreign_content() { $processor = WP_HTML_Processor::create_full_parser( '' ); $this->assertTrue( $processor->next_tag( 'SVG' ) ); - $fragment = $processor->spawn_fragment_parser( "\0preceded-by-nul-byte
" ); + $fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte
" ); $this->assertSame( 'svg', $fragment->get_namespace() ); $this->assertTrue( $fragment->next_token() ); @@ -1049,11 +1049,11 @@ public function test_spawn_fragment_parser_in_foreign_content() { /** * @ticket TBD */ - public function test_spawn_fragment_parser_in_foreign_content_integration_point() { + public function test_create_fragment_at_current_node_in_foreign_content_integration_point() { $processor = WP_HTML_Processor::create_full_parser( '' ); $this->assertTrue( $processor->next_tag( 'foreignObject' ) ); - $fragment = $processor->spawn_fragment_parser( "\0not-preceded-by-nul-byte" ); + $fragment = $processor->create_fragment_at_current_node( "\0not-preceded-by-nul-byte" ); // Nothing has been processed, the html namespace should be used for parsing as an integration point. $this->assertSame( 'html', $fragment->get_namespace() ); From 5e8b82ed6025f325a5188d63788e30e30927d177 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 8 Nov 2024 19:58:54 +0100 Subject: [PATCH 16/39] Add ticket to tests --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 864093b66c0a8..68dbdf2817d33 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1019,7 +1019,7 @@ public function test_ensure_next_token_method_extensibility( $html, $expected_to } /** - * @ticket TBD + * @ticket 62357 */ public function test_create_fragment_at_current_node_in_foreign_content() { $processor = WP_HTML_Processor::create_full_parser( '' ); @@ -1047,7 +1047,7 @@ public function test_create_fragment_at_current_node_in_foreign_content() { } /** - * @ticket TBD + * @ticket 62357 */ public function test_create_fragment_at_current_node_in_foreign_content_integration_point() { $processor = WP_HTML_Processor::create_full_parser( '' ); From 37f9ff4d943a769337668ae4bc1394bf11fe2846 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 12 Nov 2024 11:59:55 +0100 Subject: [PATCH 17/39] Update method name in html5lib tests --- tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 041132ed50c20..aaef30dd09b5b 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -213,7 +213,7 @@ private static function build_tree_representation( ?string $fragment_context, st if ( null !== $parent_processor->get_last_error() ) { throw new Exception( $parent_processor->get_last_error() ); } - $processor = $parent_processor->spawn_fragment_parser( $html ); + $processor = $parent_processor->create_fragment_at_current_node( $html ); } if ( null === $processor ) { From 80ae6f267ec6797db16855e82d445b93749c297f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 12 Nov 2024 12:36:09 +0100 Subject: [PATCH 18/39] Handle null return from create_fragment null should not be returned in this case, but it is part of the signature and should be covered here. --- src/wp-includes/html-api/class-wp-html-processor.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 090e4491bf0f7..87c413250daa7 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -462,6 +462,9 @@ public function create_fragment_at_current_node( string $html ) { } $fragment_processor = static::create_fragment( $html ); + if ( null === $fragment_processor ) { + return null; + } $fragment_processor->change_parsing_namespace( $this->current_element->token->integration_node_type ? 'html' : $namespace From 98664028b150e29a957efe7439b07c61fae88143 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 12 Nov 2024 12:43:04 +0100 Subject: [PATCH 19/39] Use a cloned copy of the FORM element from the parent processor --- src/wp-includes/html-api/class-wp-html-processor.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 87c413250daa7..4738c0cc90932 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -497,7 +497,9 @@ public function create_fragment_at_current_node( string $html ) { */ foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) { if ( 'FORM' === $element->node_name && 'html' === $element->namespace ) { - $fragment_processor->state->form_element = $element; + $fragment_processor->state->form_element = clone $element; + $fragment_processor->state->form_element->bookmark_name = null; + $fragment_processor->state->form_element->on_destroy = null; break; } } From 00ed28c2191f7bd05e021d7d0fd0475b4f375a7f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 12 Nov 2024 14:19:26 +0100 Subject: [PATCH 20/39] Use create_fragement_at_node internally in create_fragment --- .../html-api/class-wp-html-processor.php | 74 ++++++++++--------- 1 file changed, 38 insertions(+), 36 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4738c0cc90932..a42af819ddf04 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -297,32 +297,22 @@ public static function create_fragment( $html, $context = '', $encoding = return null; } - $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); - $processor->state->context_node = array( 'BODY', array() ); - $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - $processor->state->encoding = $encoding; - $processor->state->encoding_confidence = 'certain'; - - // @todo Create "fake" bookmarks for non-existent but implied nodes. - $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); - $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); - - $root_node = new WP_HTML_Token( - 'root-node', - 'HTML', - false - ); - - $processor->state->stack_of_open_elements->push( $root_node ); + $context_processor = static::create_full_parser( "{$context}" ); + if ( null === $context_processor ) { + return null; + } - $context_node = new WP_HTML_Token( - 'context-node', - $processor->state->context_node[0], - false - ); + while ( $context_processor->next_tag() ) { + $context_processor->set_bookmark( 'final_node' ); + } + if ( $context_processor->has_bookmark( 'final_node' ) ) { + $context_processor->seek( 'final_node' ); + $processor = $context_processor->create_fragment_at_current_node( $html ); + } - $processor->context_node = $context_node; - $processor->breadcrumbs = array( 'HTML', $context_node->node_name ); + if ( ! isset( $processor ) ) { + return null; + } return $processor; } @@ -461,29 +451,32 @@ public function create_fragment_at_current_node( string $html ) { return null; } - $fragment_processor = static::create_fragment( $html ); - if ( null === $fragment_processor ) { - return null; - } - - $fragment_processor->change_parsing_namespace( - $this->current_element->token->integration_node_type ? 'html' : $namespace - ); + $fragment_processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); $fragment_processor->compat_mode = $this->compat_mode; - $fragment_processor->context_node = clone $this->state->current_token; + // @todo Create "fake" bookmarks for non-existent but implied nodes. + $fragment_processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); + $root_node = new WP_HTML_Token( + 'root-node', + 'HTML', + false + ); + $fragment_processor->state->stack_of_open_elements->push( $root_node ); + + $fragment_processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); + $fragment_processor->context_node = clone $this->current_element->token; $fragment_processor->context_node->bookmark_name = 'context-node'; $fragment_processor->context_node->on_destroy = null; - $context_element = array( $fragment_processor->context_node->node_name, array() ); + $fragment_processor->state->context_node = array( $fragment_processor->context_node->node_name, array() ); foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { - $context_element[1][ $name ] = $value; + $fragment_processor->state->context_node[1][ $name ] = $value; } $fragment_processor->breadcrumbs = array( 'HTML', $fragment_processor->context_node->node_name ); - if ( 'TEMPLATE' === $context_element[0] ) { + if ( 'TEMPLATE' === $fragment_processor->context_node->node_name ) { $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; } @@ -506,6 +499,15 @@ public function create_fragment_at_current_node( string $html ) { $fragment_processor->state->encoding_confidence = 'irrelevant'; + /* + * Updating the parsing namespace near the end of the process. + * This is important so that any push/pop from the stack of open + * elements does not change the parsing namespace. + */ + $fragment_processor->change_parsing_namespace( + $this->current_element->token->integration_node_type ? 'html' : $namespace + ); + return $fragment_processor; } From c247869bd9d6cd661ca004ee745cb63338e0c8e9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 12 Nov 2024 15:50:28 +0100 Subject: [PATCH 21/39] Use create_fragment_at_node internally in create_fragment --- .../html-api/class-wp-html-processor.php | 2 +- .../html-api/wpHtmlProcessorHtml5lib.php | 97 ++++++++----------- 2 files changed, 43 insertions(+), 56 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index a42af819ddf04..07df6901b3e22 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -293,7 +293,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return static|null The created processor if successful, otherwise null. */ public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { - if ( '' !== $context || 'UTF-8' !== $encoding ) { + if ( 'UTF-8' !== $encoding ) { return null; } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index aaef30dd09b5b..4cb5860c796b7 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -155,67 +155,54 @@ private static function should_skip_test( ?string $test_context_element, string private static function build_tree_representation( ?string $fragment_context, string $html ) { $processor = null; if ( $fragment_context ) { - if ( 'body' === $fragment_context ) { - $processor = WP_HTML_Processor::create_fragment( $html ); - } else { - - /* - * If the string of characters starts with "svg ", the context - * element is in the SVG namespace and the substring after - * "svg " is the local name. If the string of characters starts - * with "math ", the context element is in the MathML namespace - * and the substring after "math " is the local name. - * Otherwise, the context element is in the HTML namespace and - * the string is the local name. - */ - if ( str_starts_with( $fragment_context, 'svg ' ) ) { - $tag_name = substr( $fragment_context, 4 ); - if ( 'svg' === $tag_name ) { - $parent_processor = WP_HTML_Processor::create_full_parser( '' ); - } else { - $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); - } - $parent_processor->next_tag( $tag_name ); - } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { - $tag_name = substr( $fragment_context, 5 ); - if ( 'math' === $tag_name ) { - $parent_processor = WP_HTML_Processor::create_full_parser( '' ); - } else { - $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); - } - $parent_processor->next_tag( $tag_name ); + /* + * If the string of characters starts with "svg ", the context + * element is in the SVG namespace and the substring after + * "svg " is the local name. If the string of characters starts + * with "math ", the context element is in the MathML namespace + * and the substring after "math " is the local name. + * Otherwise, the context element is in the HTML namespace and + * the string is the local name. + */ + if ( str_starts_with( $fragment_context, 'svg ' ) ) { + $tag_name = substr( $fragment_context, 4 ); + if ( 'svg' === $tag_name ) { + $fragment_context_html = ''; } else { - if ( in_array( - $fragment_context, - array( - 'caption', - 'col', - 'colgroup', - 'tbody', - 'td', - 'tfoot', - 'th', - 'thead', - 'tr', - ), - true - ) ) { - $parent_processor = WP_HTML_Processor::create_full_parser( "
<{$fragment_context}>" ); - $parent_processor->next_tag(); - } else { - $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); - } - $parent_processor->next_tag( $fragment_context ); + $fragment_context_html = "<{$tag_name}>"; } - if ( null !== $parent_processor->get_unsupported_exception() ) { - throw $parent_processor->get_unsupported_exception(); + } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { + $tag_name = substr( $fragment_context, 5 ); + if ( 'math' === $tag_name ) { + $fragment_context_html = ''; + } else { + $fragment_context_html = "<{$tag_name}>"; } - if ( null !== $parent_processor->get_last_error() ) { - throw new Exception( $parent_processor->get_last_error() ); + } else { + // Tags that only appear in tables need a special case. + if ( in_array( + $fragment_context, + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', + ), + true + ) ) { + $fragment_context_html = "
<{$fragment_context}>"; + } else { + $fragment_context_html = "<{$fragment_context}>"; } - $processor = $parent_processor->create_fragment_at_current_node( $html ); } + $processor = WP_HTML_Processor::create_fragment( $html, $fragment_context_html ); + if ( null === $processor ) { throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); } From bcebeba51ee3be953b9c6e05275996d3432730b0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 12 Nov 2024 15:50:54 +0100 Subject: [PATCH 22/39] Remove stale comment --- tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index aaef30dd09b5b..7abe63a859954 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -226,11 +226,6 @@ private static function build_tree_representation( ?string $fragment_context, st } } - /* - * The fragment parser will start in 2 levels deep at: html > body > [position] - * and requires adjustment to initial parameters. - * The full parser will not. - */ $output = ''; $indent_level = 0; $was_text = null; From 9e11f195d3173adb2a119bdd6f81fef813426486 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 12 Nov 2024 18:18:05 +0100 Subject: [PATCH 23/39] Improve method documentation with examples --- .../html-api/class-wp-html-processor.php | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4738c0cc90932..0e492ff03b8e7 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -428,13 +428,35 @@ function ( WP_HTML_Token $token ): void { * Creates a fragment processor at the current node. * * HTML Fragment parsing always happens with a context node. HTML Fragment Processors can be - * instantiated with a `BODY` context node via `WP_HTML_Processor::create_fragment()`. + * instantiated with a `BODY` context node via `WP_HTML_Processor::create_fragment( $html )`. * - * The context node may impact how a fragment of HTML is parsed. For example, when parsing - * `AB`: + * The context node may impact how a fragment of HTML is parsed. For example, consider the HTML + * fragment ``. * * With a BODY context node results in the following tree: * + * └─#text Inside TD? + * + * Notice that the `
Inside TD?` tags are completely ignored. + * + * Compare that with an SVG context node that produces the following tree: + * + * ├─svg:td + * └─#text Inside TD? + * + * Here, a `td` node in the `svg` namespace is created, and its self-closing flag is respected. + * This is a peculiarity of parsing HTML in foreign content like SVG. + * + * Finally, consider the tree produced with a TABLE context node: + * + * └─TBODY + * └─TR + * └─TD + * └─#text Inside TD? + * + * These examples demonstrate how important the context node may be when processing an HTML + * fragment. Special care must be taken when processing fragments that are expected to appear + * in specific contexts. SVG and TABLE are good examples, but there are others. * * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm * From 662a9b5867481c12e7b4e5a8029451e569030a0c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 20 Nov 2024 18:44:58 +0100 Subject: [PATCH 24/39] Use starts_with assertion for nul byte test --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 7d2d59fbe1885..ed1d8358065db 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1060,7 +1060,7 @@ public function test_create_fragment_at_current_node_in_foreign_content() { * In SVG it should be replaced with a replacement character. */ $this->assertSame( '#text', $fragment->get_token_type() ); - $this->assertSame( "\u{FFFD}", $fragment->get_modifiable_text() ); + $this->assertStringStartsWith( "\u{FFFD}", $fragment->get_modifiable_text() ); $this->assertTrue( $fragment->next_tag( 'RECT' ) ); $this->assertSame( 'svg', $fragment->get_namespace() ); From 50a00a8286be9dbf0fb5f7d5d4fbf73d1105508e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 08:49:36 +0100 Subject: [PATCH 25/39] Add since tag, update comment --- src/wp-includes/html-api/class-wp-html-processor.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 7177421707114..869e284808051 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -450,6 +450,8 @@ function ( WP_HTML_Token $token ): void { * * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm * + * @since 6.8.0 + * * @param string $html Input HTML fragment to process. * @return static|null The created processor if successful, otherwise null. */ @@ -522,9 +524,8 @@ public function create_fragment_at_current_node( string $html ) { $fragment_processor->state->encoding_confidence = 'irrelevant'; /* - * Updating the parsing namespace near the end of the process. - * This is important so that any push/pop from the stack of open - * elements does not change the parsing namespace. + * The parsing namespace is set at the end of the process. + * This is important so that it is not modified by other operations. */ $fragment_processor->change_parsing_namespace( $this->current_element->token->integration_node_type ? 'html' : $namespace From c5487b753507824d1676f0c99a083a2b586b5343 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 14:43:46 +0100 Subject: [PATCH 26/39] Add test --- .../phpunit/tests/html-api/wpHtmlProcessor.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index a19af13c78925..3b7250d18f3aa 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1103,6 +1103,23 @@ public function test_create_fragment_at_current_node_in_foreign_content_integrat $this->assertTrue( $fragment->expects_closer() ); } + /** + * @ticket 62357 + */ + public function test_prevent_fragment_creation_on_closers() { + $processor = WP_HTML_Processor::create_full_parser( '

' ); + $processor->next_tag( 'P' ); + $processor->next_tag( + array( + 'tag_name' => 'P', + 'tag_closers' => 'visit', + ) + ); + $this->assertSame( 'P', $processor->get_tag() ); + $this->assertTrue( $processor->is_tag_closer() ); + $this->assertNull( $processor->create_fragment_at_current_node( '' ) ); + } + /** * Ensure that lowercased tag_name query matches tags case-insensitively. * From 32f50b468c89a5cf098390760ac93d27c4abba77 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 14:44:41 +0100 Subject: [PATCH 27/39] Prevent fragment creation on a tag closer --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 39196499fa5af..4bdb75fcac3eb 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -464,7 +464,7 @@ function ( WP_HTML_Token $token ): void { * @return static|null The created processor if successful, otherwise null. */ public function create_fragment_at_current_node( string $html ) { - if ( $this->get_token_type() !== '#tag' ) { + if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) { return null; } From 5216f21b60658615a0538288a65105f1d2152839 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 14:49:00 +0100 Subject: [PATCH 28/39] Include non-empty fragment HTML in test --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 3b7250d18f3aa..f80260cbc1aa6 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1117,7 +1117,7 @@ public function test_prevent_fragment_creation_on_closers() { ); $this->assertSame( 'P', $processor->get_tag() ); $this->assertTrue( $processor->is_tag_closer() ); - $this->assertNull( $processor->create_fragment_at_current_node( '' ) ); + $this->assertNull( $processor->create_fragment_at_current_node( 'fragment HTML' ) ); } /** From f9b5bea720c41a97c52b4b34958af10e42231eea Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 16:35:24 +0100 Subject: [PATCH 29/39] Remove redundant early initialization of processor var --- tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 710eac9962b10..5e0c3b77f8732 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -153,7 +153,6 @@ private static function should_skip_test( ?string $test_context_element, string * @return string|null Tree structure of parsed HTML, if supported, else null. */ private static function build_tree_representation( ?string $fragment_context, string $html ) { - $processor = null; if ( $fragment_context ) { /* * If the string of characters starts with "svg ", the context From 48e473890f2869efe88717b33385c9e412d63f06 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 16:37:04 +0100 Subject: [PATCH 30/39] Pass encoding from context into full processor --- src/wp-includes/html-api/class-wp-html-processor.php | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9304943f03369..c6a861ca4e0cf 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -293,11 +293,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return static|null The created processor if successful, otherwise null. */ public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { - if ( 'UTF-8' !== $encoding ) { - return null; - } - - $context_processor = static::create_full_parser( "{$context}" ); + $context_processor = static::create_full_parser( "{$context}", $encoding ); if ( null === $context_processor ) { return null; } From 479c0b3dc47cbf7cb756faf435212ea02525079e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 16:47:44 +0100 Subject: [PATCH 31/39] Update comments, remove "only context" mentions --- .../html-api/class-wp-html-processor.php | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index c6a861ca4e0cf..5e36fd111ffd2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -279,16 +279,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * form is provided because a context element may have attributes that * impact the parse, such as with a SCRIPT tag and its `type` attribute. * - * ## Current HTML Support - * - * - The only supported context is ``, which is the default value. - * - The only supported document encoding is `UTF-8`, which is the default value. + * UTF-8 is the only allowed encoding. If working with a document that + * isn't UTF-8, first convert the document to UTF-8, then pass in the + * converted HTML. * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. * * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment, must be default of ``. + * @param string $context Context element for the fragment. Defaults to ``. * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. * @return static|null The created processor if successful, otherwise null. */ @@ -320,9 +319,9 @@ public static function create_fragment( $html, $context = '', $encoding = * entire HTML document from start to finish. Consider a fragment parser with * a context node of ``. * - * Since UTF-8 is the only currently-accepted charset, if working with a - * document that isn't UTF-8, it's important to convert the document before - * creating the processor: pass in the converted HTML. + * UTF-8 is the only allowed encoding. If working with a document that + * isn't UTF-8, first convert the document to UTF-8, then pass in the + * converted HTML. * * @param string $html Input HTML document to process. * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used From ed3bb5405258e39b4f36234983c9e2b35a001cea Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 17:38:50 +0100 Subject: [PATCH 32/39] Improve documentation for create_fragment Inspired by https://github.com/WordPress/wordpress-develop/pull/7141 Co-authored-by: Dennis Snell --- .../html-api/class-wp-html-processor.php | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 5e36fd111ffd2..ab75c2b4cad9f 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -279,12 +279,37 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * form is provided because a context element may have attributes that * impact the parse, such as with a SCRIPT tag and its `type` attribute. * + * Example: + * + * // Usually, snippets of HTML ought to be processed in the default `` context: + * $processor = WP_HTML_Processor::create_fragment( '

Hi

' ); + * + * // Some fragments should be processed in the correct context like this SVG: + * $processor = WP_HTML_Processor::create_fragment( '', '' ); + * + * // This fragment with TD tags should be processed in a TR context: + * $processor = WP_HTML_Processor::create_fragment( + * '123', + * '' + * ); + * + * In order to create a fragment processor at the correct location, the + * provided fragment will be processed as part of a full HTML document. + * The processor will search for the last opener tag in the document and + * create a fragment processor at that location. The document will be + * forced into "no-quirks" mode by including the HTML5 doctype. + * + * For advanced usage and precise control over the context element, use + * `WP_HTML_Processor::create_full_processor()` and + * `WP_HTML_Processor::create_fragment_at_current_node()`. + * * UTF-8 is the only allowed encoding. If working with a document that * isn't UTF-8, first convert the document to UTF-8, then pass in the * converted HTML. * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. + * @since 6.8.0 Can create fragments with any context element. * * @param string $html Input HTML fragment to process. * @param string $context Context element for the fragment. Defaults to ``. From d2e4814e274308dba4bebccebc9dcd79726377a2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 21 Nov 2024 19:42:16 +0100 Subject: [PATCH 33/39] Add tests for unsupported contexts Inspired by https://github.com/WordPress/wordpress-develop/pull/7141 Co-authored-by: Dennis Snell --- .../wpHtmlProcessorFragmentParsing.php | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php new file mode 100644 index 0000000000000..655df03f312a8 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -0,0 +1,102 @@ +setExpectedIncorrectUsage( "WP_HTML_Processor::{$doing_it_wrong_method_name}" ); + $this->assertNull( + WP_HTML_Processor::create_fragment( 'just a test', $context ), + "Should not have been able to create a fragment parser with context node {$context}" + ); + } + + /** + * Data provider. + * + * @ticket 62357 + * + * @return array[] + */ + public static function data_invalid_fragment_contexts() { + return array( + /* + * Invalid contexts. + */ + /* + * The text node is confused with a virtual body open tag. + * This should fail to set a bookmark in `create_fragment` + * but currently does not, it slips through and fails in + * `create_fragment_at_current_node`. + */ + 'Invalid text' => array( 'just some text', 'create_fragment_at_current_node' ), + 'Invalid comment' => array( '', 'create_fragment' ), + 'Invalid closing' => array( '', 'create_fragment' ), + 'Invalid DOCTYPE' => array( '', 'create_fragment' ), + /* + * PLAINTEXT should appear in the unsupported elements, but at the + * moment it's completely unsupported by the processor so + * the context element cannot be found. + */ + 'Unsupported PLAINTEXT' => array( '', 'create_fragment' ), + + /* + * Invalid contexts. + */ + 'AREA' => array( '<area>', 'create_fragment_at_current_node' ), + 'BASE' => array( '<base>', 'create_fragment_at_current_node' ), + 'BASEFONT' => array( '<basefont>', 'create_fragment_at_current_node' ), + 'BGSOUND' => array( '<bgsound>', 'create_fragment_at_current_node' ), + 'BR' => array( '<br>', 'create_fragment_at_current_node' ), + 'COL' => array( '<table><colgroup><col>', 'create_fragment_at_current_node' ), + 'EMBED' => array( '<embed>', 'create_fragment_at_current_node' ), + 'FRAME' => array( '<frameset><frame>', 'create_fragment_at_current_node' ), + 'HR' => array( '<hr>', 'create_fragment_at_current_node' ), + 'IMG' => array( '<img>', 'create_fragment_at_current_node' ), + 'INPUT' => array( '<input>', 'create_fragment_at_current_node' ), + 'KEYGEN' => array( '<keygen>', 'create_fragment_at_current_node' ), + 'LINK' => array( '<link>', 'create_fragment_at_current_node' ), + 'META' => array( '<meta>', 'create_fragment_at_current_node' ), + 'PARAM' => array( '<param>', 'create_fragment_at_current_node' ), + 'SOURCE' => array( '<source>', 'create_fragment_at_current_node' ), + 'TRACK' => array( '<track>', 'create_fragment_at_current_node' ), + 'WBR' => array( '<wbr>', 'create_fragment_at_current_node' ), + + /* + * Unsupported elements. Include a tag closer to ensure the element can be found + * and does not pause the parser at an incomplete token. + */ + 'IFRAME' => array( '<iframe></iframe>', 'create_fragment_at_current_node' ), + 'NOEMBED' => array( '<noembed></noembed>', 'create_fragment_at_current_node' ), + 'NOFRAMES' => array( '<noframes></noframes>', 'create_fragment_at_current_node' ), + 'SCRIPT' => array( '<script></script>', 'create_fragment_at_current_node' ), + 'SCRIPT with type' => array( '<script type="javascript"></script>', 'create_fragment_at_current_node' ), + 'STYLE' => array( '<style></style>', 'create_fragment_at_current_node' ), + 'TEXTAREA' => array( '<textarea></textarea>', 'create_fragment_at_current_node' ), + 'TITLE' => array( '<title></title>', 'create_fragment_at_current_node' ), + 'XMP' => array( '<xmp></xmp>', 'create_fragment_at_current_node' ), + ); + } +} From 3412256d90b09484f03a7c8270dd5fc55b561bf4 Mon Sep 17 00:00:00 2001 From: Jon Surrell <sirreal@users.noreply.github.com> Date: Thu, 21 Nov 2024 19:43:13 +0100 Subject: [PATCH 34/39] Add _doing_it_wrong messages --- .../html-api/class-wp-html-processor.php | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ab75c2b4cad9f..b06f81775b1fc 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -325,16 +325,15 @@ public static function create_fragment( $html, $context = '<body>', $encoding = while ( $context_processor->next_tag() ) { $context_processor->set_bookmark( 'final_node' ); } + if ( $context_processor->has_bookmark( 'final_node' ) ) { $context_processor->seek( 'final_node' ); - $processor = $context_processor->create_fragment_at_current_node( $html ); - } - - if ( ! isset( $processor ) ) { + } else { + _doing_it_wrong( __METHOD__, __( 'No valid context element was detected.' ), '6.8.0' ); return null; } - return $processor; + return $context_processor->create_fragment_at_current_node( $html ); } /** @@ -477,19 +476,47 @@ function ( WP_HTML_Token $token ): void { */ public function create_fragment_at_current_node( string $html ) { if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) { + _doing_it_wrong( + __METHOD__, + __( 'The context element must be a start tag.' ), + '6.8.0' + ); return null; } + $tag_name = $this->current_element->token->node_name; $namespace = $this->current_element->token->namespace; + if ( 'html' === $namespace && self::is_void( $tag_name ) ) { + _doing_it_wrong( + __METHOD__, + sprintf( + // translators: %s: A tag name like INPUT or BR. + __( 'The context element cannot be a void element, found "%s".' ), + $tag_name + ), + '6.8.0' + ); + return null; + } + /* * Prevent creating fragments at nodes that require a special tokenizer state. * This is unsupported by the HTML Processor. */ if ( 'html' === $namespace && - in_array( $this->current_element->token->node_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true ) + in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true ) ) { + _doing_it_wrong( + __METHOD__, + sprintf( + // translators: %s: A tag name like IFRAME or TEXTAREA. + __( 'The context element "%s" is not supported.' ), + $tag_name + ), + '6.8.0' + ); return null; } From f3912558cd1ab17a211721ff92e3898b6d820156 Mon Sep 17 00:00:00 2001 From: Jon Surrell <sirreal@users.noreply.github.com> Date: Thu, 21 Nov 2024 19:54:31 +0100 Subject: [PATCH 35/39] Move fragment tests into the fragment test suite --- .../tests/html-api/wpHtmlProcessor.php | 77 ------------------ .../wpHtmlProcessorFragmentParsing.php | 78 +++++++++++++++++++ 2 files changed, 78 insertions(+), 77 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index dab38796f792d..1ca60e691f03e 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -1043,83 +1043,6 @@ public function test_ensure_next_token_method_extensibility( $html, $expected_to $this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) ); } - /** - * @ticket 62357 - */ - public function test_create_fragment_at_current_node_in_foreign_content() { - $processor = WP_HTML_Processor::create_full_parser( '<svg>' ); - $this->assertTrue( $processor->next_tag( 'SVG' ) ); - - $fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte<rect /><circle></circle><foreignobject><div></div></foreignobject><g>" ); - - $this->assertSame( 'svg', $fragment->get_namespace() ); - $this->assertTrue( $fragment->next_token() ); - - /* - * In HTML parsing, a nul byte would be ignored. - * In SVG it should be replaced with a replacement character. - */ - $this->assertSame( '#text', $fragment->get_token_type() ); - $this->assertStringStartsWith( "\u{FFFD}", $fragment->get_modifiable_text() ); - - $this->assertTrue( $fragment->next_tag( 'RECT' ) ); - $this->assertSame( 'svg', $fragment->get_namespace() ); - - $this->assertTrue( $fragment->next_tag( 'CIRCLE' ) ); - $this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() ); - $this->assertTrue( $fragment->next_tag( 'foreignObject' ) ); - $this->assertSame( 'svg', $fragment->get_namespace() ); - } - - /** - * @ticket 62357 - */ - public function test_create_fragment_at_current_node_in_foreign_content_integration_point() { - $processor = WP_HTML_Processor::create_full_parser( '<svg><foreignObject>' ); - $this->assertTrue( $processor->next_tag( 'foreignObject' ) ); - - $fragment = $processor->create_fragment_at_current_node( "<image>\0not-preceded-by-nul-byte<rect />" ); - - // Nothing has been processed, the html namespace should be used for parsing as an integration point. - $this->assertSame( 'html', $fragment->get_namespace() ); - - // HTML parsing transforms IMAGE into IMG. - $this->assertTrue( $fragment->next_tag( 'IMG' ) ); - - $this->assertTrue( $fragment->next_token() ); - - // In HTML parsing, the nul byte is ignored and the text is reached. - $this->assertSame( '#text', $fragment->get_token_type() ); - $this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() ); - - /* - * svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace. - * RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close. - */ - $this->assertTrue( $fragment->next_tag( 'RECT' ) ); - $this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() ); - $this->assertSame( 'html', $fragment->get_namespace() ); - $this->assertTrue( $fragment->has_self_closing_flag() ); - $this->assertTrue( $fragment->expects_closer() ); - } - - /** - * @ticket 62357 - */ - public function test_prevent_fragment_creation_on_closers() { - $processor = WP_HTML_Processor::create_full_parser( '<p></p>' ); - $processor->next_tag( 'P' ); - $processor->next_tag( - array( - 'tag_name' => 'P', - 'tag_closers' => 'visit', - ) - ); - $this->assertSame( 'P', $processor->get_tag() ); - $this->assertTrue( $processor->is_tag_closer() ); - $this->assertNull( $processor->create_fragment_at_current_node( '<i>fragment HTML</i>' ) ); - } - /** * Ensure that lowercased tag_name query matches tags case-insensitively. * diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php index 655df03f312a8..add65e5755689 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -12,6 +12,84 @@ * @coversDefaultClass WP_HTML_Processor */ class Tests_HtmlApi_WpHtmlProcessorFragmentParsing extends WP_UnitTestCase { + /** + * @ticket 62357 + */ + public function test_create_fragment_at_current_node_in_foreign_content() { + $processor = WP_HTML_Processor::create_full_parser( '<svg>' ); + $this->assertTrue( $processor->next_tag( 'SVG' ) ); + + $fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte<rect /><circle></circle><foreignobject><div></div></foreignobject><g>" ); + + $this->assertSame( 'svg', $fragment->get_namespace() ); + $this->assertTrue( $fragment->next_token() ); + + /* + * In HTML parsing, a nul byte would be ignored. + * In SVG it should be replaced with a replacement character. + */ + $this->assertSame( '#text', $fragment->get_token_type() ); + $this->assertStringStartsWith( "\u{FFFD}", $fragment->get_modifiable_text() ); + + $this->assertTrue( $fragment->next_tag( 'RECT' ) ); + $this->assertSame( 'svg', $fragment->get_namespace() ); + + $this->assertTrue( $fragment->next_tag( 'CIRCLE' ) ); + $this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() ); + $this->assertTrue( $fragment->next_tag( 'foreignObject' ) ); + $this->assertSame( 'svg', $fragment->get_namespace() ); + } + + /** + * @ticket 62357 + */ + public function test_create_fragment_at_current_node_in_foreign_content_integration_point() { + $processor = WP_HTML_Processor::create_full_parser( '<svg><foreignObject>' ); + $this->assertTrue( $processor->next_tag( 'foreignObject' ) ); + + $fragment = $processor->create_fragment_at_current_node( "<image>\0not-preceded-by-nul-byte<rect />" ); + + // Nothing has been processed, the html namespace should be used for parsing as an integration point. + $this->assertSame( 'html', $fragment->get_namespace() ); + + // HTML parsing transforms IMAGE into IMG. + $this->assertTrue( $fragment->next_tag( 'IMG' ) ); + + $this->assertTrue( $fragment->next_token() ); + + // In HTML parsing, the nul byte is ignored and the text is reached. + $this->assertSame( '#text', $fragment->get_token_type() ); + $this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() ); + + /* + * svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace. + * RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close. + */ + $this->assertTrue( $fragment->next_tag( 'RECT' ) ); + $this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() ); + $this->assertSame( 'html', $fragment->get_namespace() ); + $this->assertTrue( $fragment->has_self_closing_flag() ); + $this->assertTrue( $fragment->expects_closer() ); + } + + /** + * @expectedIncorrectUsage WP_HTML_Processor::create_fragment_at_current_node + * @ticket 62357 + */ + public function test_prevent_fragment_creation_on_closers() { + $processor = WP_HTML_Processor::create_full_parser( '<p></p>' ); + $processor->next_tag( 'P' ); + $processor->next_tag( + array( + 'tag_name' => 'P', + 'tag_closers' => 'visit', + ) + ); + $this->assertSame( 'P', $processor->get_tag() ); + $this->assertTrue( $processor->is_tag_closer() ); + $this->assertNull( $processor->create_fragment_at_current_node( '<i>fragment HTML</i>' ) ); + } + /** * Verifies that the fragment parser doesn't allow invalid context nodes. * From 75da0cce306c44bedb27e226241bfb540d7729c5 Mon Sep 17 00:00:00 2001 From: Jon Surrell <sirreal@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:25:02 +0100 Subject: [PATCH 36/39] Restore better comment from trunk --- src/wp-includes/html-api/class-wp-html-processor.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index b06f81775b1fc..a6a36d4117df5 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -573,8 +573,9 @@ public function create_fragment_at_current_node( string $html ) { $fragment_processor->state->encoding_confidence = 'irrelevant'; /* - * The parsing namespace is set at the end of the process. - * This is important so that it is not modified by other operations. + * Update the parsing namespace near the end of the process. + * This is important so that any push/pop from the stack of open + * elements does not change the parsing namespace. */ $fragment_processor->change_parsing_namespace( $this->current_element->token->integration_node_type ? 'html' : $namespace From 719dfe04055547d20f0c74ada12ef731ba4a4f6f Mon Sep 17 00:00:00 2001 From: Jon Surrell <sirreal@users.noreply.github.com> Date: Wed, 27 Nov 2024 12:30:09 +0100 Subject: [PATCH 37/39] Merge branch 'trunk' into html-api/use-create-fragment-at-node-for-main-create-fragment-method --- .../includes/class-wp-automatic-updater.php | 38 +++- src/wp-admin/includes/nav-menu.php | 2 +- src/wp-admin/includes/plugin.php | 215 ------------------ src/wp-includes/functions.php | 215 ++++++++++++++++++ .../class-wp-html-processor-state.php | 5 +- .../html-api/class-wp-html-processor.php | 9 - .../html-api/class-wp-html-tag-processor.php | 2 +- src/wp-includes/pluggable.php | 3 +- .../class-wp-rest-templates-controller.php | 2 +- .../class-wp-sitemaps-taxonomies.php | 2 +- src/wp-settings.php | 16 +- .../tests/html-api/wpHtmlTagProcessor.php | 62 +++++ 12 files changed, 334 insertions(+), 237 deletions(-) diff --git a/src/wp-admin/includes/class-wp-automatic-updater.php b/src/wp-admin/includes/class-wp-automatic-updater.php index b497b8324d41b..4dccd94ccddbd 100644 --- a/src/wp-admin/includes/class-wp-automatic-updater.php +++ b/src/wp-admin/includes/class-wp-automatic-updater.php @@ -936,6 +936,14 @@ protected function send_email( $type, $core_update, $result = null ) { return; } + $admin_user = get_user_by( 'email', get_site_option( 'admin_email' ) ); + + if ( $admin_user ) { + $switched_locale = switch_to_user_locale( $admin_user->ID ); + } else { + $switched_locale = switch_to_locale( get_locale() ); + } + switch ( $type ) { case 'success': // We updated. /* translators: Site updated notification email subject. 1: Site title, 2: WordPress version. */ @@ -1139,8 +1147,11 @@ protected function send_email( $type, $core_update, $result = null ) { $email = apply_filters( 'auto_core_update_email', $email, $type, $core_update, $result ); wp_mail( $email['to'], wp_specialchars_decode( $email['subject'] ), $email['body'], $email['headers'] ); - } + if ( $switched_locale ) { + restore_previous_locale(); + } + } /** * Checks whether an email should be sent after attempting plugin or theme updates. @@ -1255,6 +1266,14 @@ protected function send_plugin_theme_email( $type, $successful_updates, $failed_ } } + $admin_user = get_user_by( 'email', get_site_option( 'admin_email' ) ); + + if ( $admin_user ) { + $switched_locale = switch_to_user_locale( $admin_user->ID ); + } else { + $switched_locale = switch_to_locale( get_locale() ); + } + $body = array(); $successful_plugins = ( ! empty( $successful_updates['plugin'] ) ); $successful_themes = ( ! empty( $successful_updates['theme'] ) ); @@ -1526,6 +1545,10 @@ protected function send_plugin_theme_email( $type, $successful_updates, $failed_ if ( $result ) { update_option( 'auto_plugin_theme_update_emails', $past_failure_emails ); } + + if ( $switched_locale ) { + restore_previous_locale(); + } } /** @@ -1534,9 +1557,12 @@ protected function send_plugin_theme_email( $type, $successful_updates, $failed_ * @since 3.7.0 */ protected function send_debug_email() { - $update_count = 0; - foreach ( $this->update_results as $type => $updates ) { - $update_count += count( $updates ); + $admin_user = get_user_by( 'email', get_site_option( 'admin_email' ) ); + + if ( $admin_user ) { + $switched_locale = switch_to_user_locale( $admin_user->ID ); + } else { + $switched_locale = switch_to_locale( get_locale() ); } $body = array(); @@ -1715,6 +1741,10 @@ protected function send_debug_email() { $email = apply_filters( 'automatic_updates_debug_email', $email, $failures, $this->update_results ); wp_mail( $email['to'], wp_specialchars_decode( $email['subject'] ), $email['body'], $email['headers'] ); + + if ( $switched_locale ) { + restore_previous_locale(); + } } /** diff --git a/src/wp-admin/includes/nav-menu.php b/src/wp-admin/includes/nav-menu.php index 5ba3276b3aea5..c3b1244f47fd9 100644 --- a/src/wp-admin/includes/nav-menu.php +++ b/src/wp-admin/includes/nav-menu.php @@ -875,7 +875,7 @@ function wp_nav_menu_item_taxonomy_meta_box( $data_object, $box ) { } $num_pages = (int) ceil( - wp_count_terms( + (int) wp_count_terms( array_merge( $args, array( diff --git a/src/wp-admin/includes/plugin.php b/src/wp-admin/includes/plugin.php index de1468352b3d9..0969f956577ed 100644 --- a/src/wp-admin/includes/plugin.php +++ b/src/wp-admin/includes/plugin.php @@ -6,221 +6,6 @@ * @subpackage Administration */ -/** - * Parses the plugin contents to retrieve plugin's metadata. - * - * All plugin headers must be on their own line. Plugin description must not have - * any newlines, otherwise only parts of the description will be displayed. - * The below is formatted for printing. - * - * /* - * Plugin Name: Name of the plugin. - * Plugin URI: The home page of the plugin. - * Description: Plugin description. - * Author: Plugin author's name. - * Author URI: Link to the author's website. - * Version: Plugin version. - * Text Domain: Optional. Unique identifier, should be same as the one used in - * load_plugin_textdomain(). - * Domain Path: Optional. Only useful if the translations are located in a - * folder above the plugin's base path. For example, if .mo files are - * located in the locale folder then Domain Path will be "/locale/" and - * must have the first slash. Defaults to the base folder the plugin is - * located in. - * Network: Optional. Specify "Network: true" to require that a plugin is activated - * across all sites in an installation. This will prevent a plugin from being - * activated on a single site when Multisite is enabled. - * Requires at least: Optional. Specify the minimum required WordPress version. - * Requires PHP: Optional. Specify the minimum required PHP version. - * * / # Remove the space to close comment. - * - * The first 8 KB of the file will be pulled in and if the plugin data is not - * within that first 8 KB, then the plugin author should correct their plugin - * and move the plugin data headers to the top. - * - * The plugin file is assumed to have permissions to allow for scripts to read - * the file. This is not checked however and the file is only opened for - * reading. - * - * @since 1.5.0 - * @since 5.3.0 Added support for `Requires at least` and `Requires PHP` headers. - * @since 5.8.0 Added support for `Update URI` header. - * @since 6.5.0 Added support for `Requires Plugins` header. - * - * @param string $plugin_file Absolute path to the main plugin file. - * @param bool $markup Optional. If the returned data should have HTML markup applied. - * Default true. - * @param bool $translate Optional. If the returned data should be translated. Default true. - * @return array { - * Plugin data. Values will be empty if not supplied by the plugin. - * - * @type string $Name Name of the plugin. Should be unique. - * @type string $PluginURI Plugin URI. - * @type string $Version Plugin version. - * @type string $Description Plugin description. - * @type string $Author Plugin author's name. - * @type string $AuthorURI Plugin author's website address (if set). - * @type string $TextDomain Plugin textdomain. - * @type string $DomainPath Plugin's relative directory path to .mo files. - * @type bool $Network Whether the plugin can only be activated network-wide. - * @type string $RequiresWP Minimum required version of WordPress. - * @type string $RequiresPHP Minimum required version of PHP. - * @type string $UpdateURI ID of the plugin for update purposes, should be a URI. - * @type string $RequiresPlugins Comma separated list of dot org plugin slugs. - * @type string $Title Title of the plugin and link to the plugin's site (if set). - * @type string $AuthorName Plugin author's name. - * } - */ -function get_plugin_data( $plugin_file, $markup = true, $translate = true ) { - - $default_headers = array( - 'Name' => 'Plugin Name', - 'PluginURI' => 'Plugin URI', - 'Version' => 'Version', - 'Description' => 'Description', - 'Author' => 'Author', - 'AuthorURI' => 'Author URI', - 'TextDomain' => 'Text Domain', - 'DomainPath' => 'Domain Path', - 'Network' => 'Network', - 'RequiresWP' => 'Requires at least', - 'RequiresPHP' => 'Requires PHP', - 'UpdateURI' => 'Update URI', - 'RequiresPlugins' => 'Requires Plugins', - // Site Wide Only is deprecated in favor of Network. - '_sitewide' => 'Site Wide Only', - ); - - $plugin_data = get_file_data( $plugin_file, $default_headers, 'plugin' ); - - // Site Wide Only is the old header for Network. - if ( ! $plugin_data['Network'] && $plugin_data['_sitewide'] ) { - /* translators: 1: Site Wide Only: true, 2: Network: true */ - _deprecated_argument( __FUNCTION__, '3.0.0', sprintf( __( 'The %1$s plugin header is deprecated. Use %2$s instead.' ), '<code>Site Wide Only: true</code>', '<code>Network: true</code>' ) ); - $plugin_data['Network'] = $plugin_data['_sitewide']; - } - $plugin_data['Network'] = ( 'true' === strtolower( $plugin_data['Network'] ) ); - unset( $plugin_data['_sitewide'] ); - - // If no text domain is defined fall back to the plugin slug. - if ( ! $plugin_data['TextDomain'] ) { - $plugin_slug = dirname( plugin_basename( $plugin_file ) ); - if ( '.' !== $plugin_slug && ! str_contains( $plugin_slug, '/' ) ) { - $plugin_data['TextDomain'] = $plugin_slug; - } - } - - if ( $markup || $translate ) { - $plugin_data = _get_plugin_data_markup_translate( $plugin_file, $plugin_data, $markup, $translate ); - } else { - $plugin_data['Title'] = $plugin_data['Name']; - $plugin_data['AuthorName'] = $plugin_data['Author']; - } - - return $plugin_data; -} - -/** - * Sanitizes plugin data, optionally adds markup, optionally translates. - * - * @since 2.7.0 - * - * @see get_plugin_data() - * - * @access private - * - * @param string $plugin_file Path to the main plugin file. - * @param array $plugin_data An array of plugin data. See get_plugin_data(). - * @param bool $markup Optional. If the returned data should have HTML markup applied. - * Default true. - * @param bool $translate Optional. If the returned data should be translated. Default true. - * @return array Plugin data. Values will be empty if not supplied by the plugin. - * See get_plugin_data() for the list of possible values. - */ -function _get_plugin_data_markup_translate( $plugin_file, $plugin_data, $markup = true, $translate = true ) { - - // Sanitize the plugin filename to a WP_PLUGIN_DIR relative path. - $plugin_file = plugin_basename( $plugin_file ); - - // Translate fields. - if ( $translate ) { - $textdomain = $plugin_data['TextDomain']; - if ( $textdomain ) { - if ( ! is_textdomain_loaded( $textdomain ) ) { - if ( $plugin_data['DomainPath'] ) { - load_plugin_textdomain( $textdomain, false, dirname( $plugin_file ) . $plugin_data['DomainPath'] ); - } else { - load_plugin_textdomain( $textdomain, false, dirname( $plugin_file ) ); - } - } - } elseif ( 'hello.php' === basename( $plugin_file ) ) { - $textdomain = 'default'; - } - if ( $textdomain ) { - foreach ( array( 'Name', 'PluginURI', 'Description', 'Author', 'AuthorURI', 'Version' ) as $field ) { - if ( ! empty( $plugin_data[ $field ] ) ) { - // phpcs:ignore WordPress.WP.I18n.LowLevelTranslationFunction,WordPress.WP.I18n.NonSingularStringLiteralText,WordPress.WP.I18n.NonSingularStringLiteralDomain - $plugin_data[ $field ] = translate( $plugin_data[ $field ], $textdomain ); - } - } - } - } - - // Sanitize fields. - $allowed_tags_in_links = array( - 'abbr' => array( 'title' => true ), - 'acronym' => array( 'title' => true ), - 'code' => true, - 'em' => true, - 'strong' => true, - ); - - $allowed_tags = $allowed_tags_in_links; - $allowed_tags['a'] = array( - 'href' => true, - 'title' => true, - ); - - /* - * Name is marked up inside <a> tags. Don't allow these. - * Author is too, but some plugins have used <a> here (omitting Author URI). - */ - $plugin_data['Name'] = wp_kses( $plugin_data['Name'], $allowed_tags_in_links ); - $plugin_data['Author'] = wp_kses( $plugin_data['Author'], $allowed_tags ); - - $plugin_data['Description'] = wp_kses( $plugin_data['Description'], $allowed_tags ); - $plugin_data['Version'] = wp_kses( $plugin_data['Version'], $allowed_tags ); - - $plugin_data['PluginURI'] = esc_url( $plugin_data['PluginURI'] ); - $plugin_data['AuthorURI'] = esc_url( $plugin_data['AuthorURI'] ); - - $plugin_data['Title'] = $plugin_data['Name']; - $plugin_data['AuthorName'] = $plugin_data['Author']; - - // Apply markup. - if ( $markup ) { - if ( $plugin_data['PluginURI'] && $plugin_data['Name'] ) { - $plugin_data['Title'] = '<a href="' . $plugin_data['PluginURI'] . '">' . $plugin_data['Name'] . '</a>'; - } - - if ( $plugin_data['AuthorURI'] && $plugin_data['Author'] ) { - $plugin_data['Author'] = '<a href="' . $plugin_data['AuthorURI'] . '">' . $plugin_data['Author'] . '</a>'; - } - - $plugin_data['Description'] = wptexturize( $plugin_data['Description'] ); - - if ( $plugin_data['Author'] ) { - $plugin_data['Description'] .= sprintf( - /* translators: %s: Plugin author. */ - ' <cite>' . __( 'By %s.' ) . '</cite>', - $plugin_data['Author'] - ); - } - } - - return $plugin_data; -} - /** * Gets a list of a plugin's files. * diff --git a/src/wp-includes/functions.php b/src/wp-includes/functions.php index d9303083bfaa9..a4cfa13cd20dc 100644 --- a/src/wp-includes/functions.php +++ b/src/wp-includes/functions.php @@ -6929,6 +6929,221 @@ function get_file_data( $file, $default_headers, $context = '' ) { return $all_headers; } +/** + * Parses the plugin contents to retrieve plugin's metadata. + * + * All plugin headers must be on their own line. Plugin description must not have + * any newlines, otherwise only parts of the description will be displayed. + * The below is formatted for printing. + * + * /* + * Plugin Name: Name of the plugin. + * Plugin URI: The home page of the plugin. + * Description: Plugin description. + * Author: Plugin author's name. + * Author URI: Link to the author's website. + * Version: Plugin version. + * Text Domain: Optional. Unique identifier, should be same as the one used in + * load_plugin_textdomain(). + * Domain Path: Optional. Only useful if the translations are located in a + * folder above the plugin's base path. For example, if .mo files are + * located in the locale folder then Domain Path will be "/locale/" and + * must have the first slash. Defaults to the base folder the plugin is + * located in. + * Network: Optional. Specify "Network: true" to require that a plugin is activated + * across all sites in an installation. This will prevent a plugin from being + * activated on a single site when Multisite is enabled. + * Requires at least: Optional. Specify the minimum required WordPress version. + * Requires PHP: Optional. Specify the minimum required PHP version. + * * / # Remove the space to close comment. + * + * The first 8 KB of the file will be pulled in and if the plugin data is not + * within that first 8 KB, then the plugin author should correct their plugin + * and move the plugin data headers to the top. + * + * The plugin file is assumed to have permissions to allow for scripts to read + * the file. This is not checked however and the file is only opened for + * reading. + * + * @since 1.5.0 + * @since 5.3.0 Added support for `Requires at least` and `Requires PHP` headers. + * @since 5.8.0 Added support for `Update URI` header. + * @since 6.5.0 Added support for `Requires Plugins` header. + * + * @param string $plugin_file Absolute path to the main plugin file. + * @param bool $markup Optional. If the returned data should have HTML markup applied. + * Default true. + * @param bool $translate Optional. If the returned data should be translated. Default true. + * @return array { + * Plugin data. Values will be empty if not supplied by the plugin. + * + * @type string $Name Name of the plugin. Should be unique. + * @type string $PluginURI Plugin URI. + * @type string $Version Plugin version. + * @type string $Description Plugin description. + * @type string $Author Plugin author's name. + * @type string $AuthorURI Plugin author's website address (if set). + * @type string $TextDomain Plugin textdomain. + * @type string $DomainPath Plugin's relative directory path to .mo files. + * @type bool $Network Whether the plugin can only be activated network-wide. + * @type string $RequiresWP Minimum required version of WordPress. + * @type string $RequiresPHP Minimum required version of PHP. + * @type string $UpdateURI ID of the plugin for update purposes, should be a URI. + * @type string $RequiresPlugins Comma separated list of dot org plugin slugs. + * @type string $Title Title of the plugin and link to the plugin's site (if set). + * @type string $AuthorName Plugin author's name. + * } + */ +function get_plugin_data( $plugin_file, $markup = true, $translate = true ) { + + $default_headers = array( + 'Name' => 'Plugin Name', + 'PluginURI' => 'Plugin URI', + 'Version' => 'Version', + 'Description' => 'Description', + 'Author' => 'Author', + 'AuthorURI' => 'Author URI', + 'TextDomain' => 'Text Domain', + 'DomainPath' => 'Domain Path', + 'Network' => 'Network', + 'RequiresWP' => 'Requires at least', + 'RequiresPHP' => 'Requires PHP', + 'UpdateURI' => 'Update URI', + 'RequiresPlugins' => 'Requires Plugins', + // Site Wide Only is deprecated in favor of Network. + '_sitewide' => 'Site Wide Only', + ); + + $plugin_data = get_file_data( $plugin_file, $default_headers, 'plugin' ); + + // Site Wide Only is the old header for Network. + if ( ! $plugin_data['Network'] && $plugin_data['_sitewide'] ) { + /* translators: 1: Site Wide Only: true, 2: Network: true */ + _deprecated_argument( __FUNCTION__, '3.0.0', sprintf( __( 'The %1$s plugin header is deprecated. Use %2$s instead.' ), '<code>Site Wide Only: true</code>', '<code>Network: true</code>' ) ); + $plugin_data['Network'] = $plugin_data['_sitewide']; + } + $plugin_data['Network'] = ( 'true' === strtolower( $plugin_data['Network'] ) ); + unset( $plugin_data['_sitewide'] ); + + // If no text domain is defined fall back to the plugin slug. + if ( ! $plugin_data['TextDomain'] ) { + $plugin_slug = dirname( plugin_basename( $plugin_file ) ); + if ( '.' !== $plugin_slug && ! str_contains( $plugin_slug, '/' ) ) { + $plugin_data['TextDomain'] = $plugin_slug; + } + } + + if ( $markup || $translate ) { + $plugin_data = _get_plugin_data_markup_translate( $plugin_file, $plugin_data, $markup, $translate ); + } else { + $plugin_data['Title'] = $plugin_data['Name']; + $plugin_data['AuthorName'] = $plugin_data['Author']; + } + + return $plugin_data; +} + +/** + * Sanitizes plugin data, optionally adds markup, optionally translates. + * + * @since 2.7.0 + * + * @see get_plugin_data() + * + * @access private + * + * @param string $plugin_file Path to the main plugin file. + * @param array $plugin_data An array of plugin data. See get_plugin_data(). + * @param bool $markup Optional. If the returned data should have HTML markup applied. + * Default true. + * @param bool $translate Optional. If the returned data should be translated. Default true. + * @return array Plugin data. Values will be empty if not supplied by the plugin. + * See get_plugin_data() for the list of possible values. + */ +function _get_plugin_data_markup_translate( $plugin_file, $plugin_data, $markup = true, $translate = true ) { + + // Sanitize the plugin filename to a WP_PLUGIN_DIR relative path. + $plugin_file = plugin_basename( $plugin_file ); + + // Translate fields. + if ( $translate ) { + $textdomain = $plugin_data['TextDomain']; + if ( $textdomain ) { + if ( ! is_textdomain_loaded( $textdomain ) ) { + if ( $plugin_data['DomainPath'] ) { + load_plugin_textdomain( $textdomain, false, dirname( $plugin_file ) . $plugin_data['DomainPath'] ); + } else { + load_plugin_textdomain( $textdomain, false, dirname( $plugin_file ) ); + } + } + } elseif ( 'hello.php' === basename( $plugin_file ) ) { + $textdomain = 'default'; + } + if ( $textdomain ) { + foreach ( array( 'Name', 'PluginURI', 'Description', 'Author', 'AuthorURI', 'Version' ) as $field ) { + if ( ! empty( $plugin_data[ $field ] ) ) { + // phpcs:ignore WordPress.WP.I18n.LowLevelTranslationFunction,WordPress.WP.I18n.NonSingularStringLiteralText,WordPress.WP.I18n.NonSingularStringLiteralDomain + $plugin_data[ $field ] = translate( $plugin_data[ $field ], $textdomain ); + } + } + } + } + + // Sanitize fields. + $allowed_tags_in_links = array( + 'abbr' => array( 'title' => true ), + 'acronym' => array( 'title' => true ), + 'code' => true, + 'em' => true, + 'strong' => true, + ); + + $allowed_tags = $allowed_tags_in_links; + $allowed_tags['a'] = array( + 'href' => true, + 'title' => true, + ); + + /* + * Name is marked up inside <a> tags. Don't allow these. + * Author is too, but some plugins have used <a> here (omitting Author URI). + */ + $plugin_data['Name'] = wp_kses( $plugin_data['Name'], $allowed_tags_in_links ); + $plugin_data['Author'] = wp_kses( $plugin_data['Author'], $allowed_tags ); + + $plugin_data['Description'] = wp_kses( $plugin_data['Description'], $allowed_tags ); + $plugin_data['Version'] = wp_kses( $plugin_data['Version'], $allowed_tags ); + + $plugin_data['PluginURI'] = esc_url( $plugin_data['PluginURI'] ); + $plugin_data['AuthorURI'] = esc_url( $plugin_data['AuthorURI'] ); + + $plugin_data['Title'] = $plugin_data['Name']; + $plugin_data['AuthorName'] = $plugin_data['Author']; + + // Apply markup. + if ( $markup ) { + if ( $plugin_data['PluginURI'] && $plugin_data['Name'] ) { + $plugin_data['Title'] = '<a href="' . $plugin_data['PluginURI'] . '">' . $plugin_data['Name'] . '</a>'; + } + + if ( $plugin_data['AuthorURI'] && $plugin_data['Author'] ) { + $plugin_data['Author'] = '<a href="' . $plugin_data['AuthorURI'] . '">' . $plugin_data['Author'] . '</a>'; + } + + $plugin_data['Description'] = wptexturize( $plugin_data['Description'] ); + + if ( $plugin_data['Author'] ) { + $plugin_data['Description'] .= sprintf( + /* translators: %s: Plugin author. */ + ' <cite>' . __( 'By %s.' ) . '</cite>', + $plugin_data['Author'] + ); + } + } + + return $plugin_data; +} + /** * Returns true. * diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index b7cdd347ca85b..b257aa809da75 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -360,10 +360,9 @@ class WP_HTML_Processor_State { * Context node initializing fragment parser, if created as a fragment parser. * * @since 6.4.0 + * @deprecated 6.8.0 WP_HTML_Processor tracks the context_node internally. * - * @see https://html.spec.whatwg.org/#concept-frag-parse-context - * - * @var [string, array]|null + * @var null */ public $context_node = null; diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index a6a36d4117df5..8665ca36034fb 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -538,15 +538,6 @@ public function create_fragment_at_current_node( string $html ) { $fragment_processor->context_node->bookmark_name = 'context-node'; $fragment_processor->context_node->on_destroy = null; - $fragment_processor->state->context_node = array( $fragment_processor->context_node->node_name, array() ); - - $attribute_names = $this->get_attribute_names_with_prefix( '' ); - if ( null !== $attribute_names ) { - foreach ( $attribute_names as $name ) { - $fragment_processor->state->context_node[1][ $name ] = $this->get_attribute( $name ); - } - } - $fragment_processor->breadcrumbs = array( 'HTML', $fragment_processor->context_node->node_name ); if ( 'TEMPLATE' === $fragment_processor->context_node->node_name ) { diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index e2632c80f6da5..39390621e86a6 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1668,7 +1668,7 @@ private function parse_next_tag(): bool { * * @see https://html.spec.whatwg.org/#tag-open-state */ - if ( 1 !== strspn( $html, '!/?abcdefghijklmnopqrstuvwxyzABCEFGHIJKLMNOPQRSTUVWXYZ', $at + 1, 1 ) ) { + if ( 1 !== strspn( $html, '!/?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', $at + 1, 1 ) ) { ++$at; continue; } diff --git a/src/wp-includes/pluggable.php b/src/wp-includes/pluggable.php index cc16e8c8bde37..3dd629fa1990c 100644 --- a/src/wp-includes/pluggable.php +++ b/src/wp-includes/pluggable.php @@ -710,9 +710,10 @@ function wp_validate_auth_cookie( $cookie = '', $scheme = '' ) { $username = $cookie_elements['username']; $hmac = $cookie_elements['hmac']; $token = $cookie_elements['token']; - $expired = $cookie_elements['expiration']; $expiration = $cookie_elements['expiration']; + $expired = (int) $expiration; + // Allow a grace period for POST and Ajax requests. if ( wp_doing_ajax() || 'POST' === $_SERVER['REQUEST_METHOD'] ) { $expired += HOUR_IN_SECONDS; diff --git a/src/wp-includes/rest-api/endpoints/class-wp-rest-templates-controller.php b/src/wp-includes/rest-api/endpoints/class-wp-rest-templates-controller.php index 43780fb4e677b..267f40e77fd0f 100644 --- a/src/wp-includes/rest-api/endpoints/class-wp-rest-templates-controller.php +++ b/src/wp-includes/rest-api/endpoints/class-wp-rest-templates-controller.php @@ -872,7 +872,7 @@ private static function get_wp_templates_author_text_field( $template_object ) { $theme_name = wp_get_theme( $template_object->theme )->get( 'Name' ); return empty( $theme_name ) ? $template_object->theme : $theme_name; case 'plugin': - if ( ! function_exists( 'get_plugins' ) || ! function_exists( 'get_plugin_data' ) ) { + if ( ! function_exists( 'get_plugins' ) ) { require_once ABSPATH . 'wp-admin/includes/plugin.php'; } if ( isset( $template_object->plugin ) ) { diff --git a/src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php index 5571ff4ed95f3..0c53e95a42d6c 100644 --- a/src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php +++ b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php @@ -171,7 +171,7 @@ public function get_max_num_pages( $object_subtype = '' ) { $term_count = wp_count_terms( $this->get_taxonomies_query_args( $taxonomy ) ); - return (int) ceil( $term_count / wp_sitemaps_get_max_urls( $this->object_type ) ); + return (int) ceil( (int) $term_count / wp_sitemaps_get_max_urls( $this->object_type ) ); } /** diff --git a/src/wp-settings.php b/src/wp-settings.php index 62ba4d5dee6ca..98ab4d68d0efc 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -534,8 +534,19 @@ * @param string $plugin Full path to the plugin's main file. */ do_action( 'plugin_loaded', $plugin ); + + $plugin_data = get_plugin_data( $plugin, false, false ); + + $textdomain = $plugin_data['TextDomain']; + if ( $textdomain ) { + if ( $plugin_data['DomainPath'] ) { + $GLOBALS['wp_textdomain_registry']->set_custom_path( $textdomain, dirname( $plugin ) . $plugin_data['DomainPath'] ); + } else { + $GLOBALS['wp_textdomain_registry']->set_custom_path( $textdomain, dirname( $plugin ) ); + } + } } -unset( $plugin, $_wp_plugin_file ); +unset( $plugin, $_wp_plugin_file, $plugin_data, $textdomain ); // Load pluggable functions. require ABSPATH . WPINC . '/pluggable.php'; @@ -671,6 +682,9 @@ if ( file_exists( $theme . '/functions.php' ) ) { include $theme . '/functions.php'; } + + $theme = wp_get_theme( basename( $theme ) ); + $theme->load_textdomain(); } unset( $theme ); diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 393fd2cda06db..cd8faee4ed6a4 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -2984,4 +2984,66 @@ public function test_doctype_doc_name() { $this->assertNull( $doctype->public_identifier ); $this->assertNull( $doctype->system_identifier ); } + + /** + * @ticket 62522 + * + * @dataProvider data_alphabet_by_characters_lowercase + */ + public function test_recognizes_lowercase_tag_name( string $char ) { + /* + * The spacing in the HTML string is important to the problematic + * codepath in ticket #62522. + */ + $html = " <{$char}> </{$char}>"; + $processor = new WP_HTML_Tag_Processor( $html ); + $this->assertTrue( $processor->next_tag(), "Failed to find open tag in '{$html}'." ); + $this->assertTrue( + $processor->next_tag( array( 'tag_closers' => 'visit' ) ), + "Failed to find close tag in '{$html}'." + ); + } + + /** + * @ticket 62522 + * + * @dataProvider data_alphabet_by_characters_uppercase + */ + public function test_recognizes_uppercase_tag_name( string $char ) { + /* + * The spacing in the HTML string is important to the problematic + * codepath in ticket #62522. + */ + $html = " <{$char}> </{$char}>"; + $processor = new WP_HTML_Tag_Processor( $html ); + $this->assertTrue( $processor->next_tag(), "Failed to find open tag in '{$html}'." ); + $this->assertTrue( + $processor->next_tag( array( 'tag_closers' => 'visit' ) ), + "Failed to find close tag in '{$html}'." + ); + } + + /** + * Data provider. + * + * @return Generator<array> + */ + public static function data_alphabet_by_characters_lowercase() { + $char = 'a'; + while ( $char <= 'z' ) { + yield $char => array( $char ); + $char = chr( ord( $char ) + 1 ); + } + } + + /** + * Data provider. + * + * @return Generator<array> + */ + public static function data_alphabet_by_characters_uppercase() { + foreach ( self::data_alphabet_by_characters_lowercase() as $data ) { + yield strtoupper( $data[0] ) => array( strtoupper( $data[0] ) ); + } + } } From 146153401f74ac696aaa8610afda97497439e4f6 Mon Sep 17 00:00:00 2001 From: Jon Surrell <sirreal@users.noreply.github.com> Date: Wed, 27 Nov 2024 12:31:37 +0100 Subject: [PATCH 38/39] Check seeking to final_node success --- src/wp-includes/html-api/class-wp-html-processor.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8665ca36034fb..1be795c5c7de2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -326,9 +326,10 @@ public static function create_fragment( $html, $context = '<body>', $encoding = $context_processor->set_bookmark( 'final_node' ); } - if ( $context_processor->has_bookmark( 'final_node' ) ) { - $context_processor->seek( 'final_node' ); - } else { + if ( + ! $context_processor->has_bookmark( 'final_node' ) || + ! $context_processor->seek( 'final_node' ) + ) { _doing_it_wrong( __METHOD__, __( 'No valid context element was detected.' ), '6.8.0' ); return null; } From d6d5305eaa01c3c11e05b71cb4b4d528ee2a546f Mon Sep 17 00:00:00 2001 From: Jon Surrell <sirreal@users.noreply.github.com> Date: Wed, 27 Nov 2024 13:17:30 +0100 Subject: [PATCH 39/39] Update ticket numbers Co-authored-by: Bernie Reiter <96308+ockham@users.noreply.github.com> --- .../phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php index faf0261001afb..4913fa07eb412 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -97,7 +97,7 @@ public function test_prevent_fragment_creation_on_closers() { * contain no inner HTML. Operations on self-contained elements should occur * through methods such as {@see WP_HTML_Tag_Processor::set_modifiable_text}. * - * @ticket 62357 + * @ticket 62584 * * @dataProvider data_invalid_fragment_contexts * @@ -114,8 +114,6 @@ public function test_rejects_invalid_fragment_contexts( string $context, string /** * Data provider. * - * @ticket 62357 - * * @return array[] */ public static function data_invalid_fragment_contexts() {