From 704ab8ee4032e07a80f51ceb6d70b453f8da539b Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 1 Aug 2024 16:53:25 -0700 Subject: [PATCH 01/13] HTML API: Allow any fragment context. Previously, the fragment parser in WP_HTML_Processor has only allowed creating a fragment with the `` context. In this patch, any context node is allowed. --- .../html-api/class-wp-html-processor.php | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 39ba43e467d5c..f63d8adcf0ace 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -293,12 +293,27 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return static|null The created processor if successful, otherwise null. */ public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { - if ( '' !== $context || 'UTF-8' !== $encoding ) { + if ( 'UTF-8' !== $encoding ) { + return null; + } + + $context_processor = new WP_HTML_Tag_Processor( $context ); + if ( ! $context_processor->next_token() || '#tag' !== $context_processor->get_token_type() ) { + return null; + } + + $context_tag = $context_processor->get_tag(); + $context_attributes = array(); + foreach ( $context_processor->get_attribute_names_with_prefix( '' ) as $name ) { + $context_attributes[ $name ] = $context_processor->get_attribute( $name ); + } + + if ( $context_processor->next_token() ) { return null; } $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); - $processor->state->context_node = array( 'BODY', array() ); + $processor->state->context_node = array( $context_tag, $context_attributes ); $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $processor->state->encoding = $encoding; $processor->state->encoding_confidence = 'certain'; From 691d39e7124fded9dbeac07265835ce5f7ea9bbb Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 5 Aug 2024 15:41:28 -0700 Subject: [PATCH 02/13] Update docs --- src/wp-includes/html-api/class-wp-html-processor.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index f63d8adcf0ace..27d16fbc59587 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -281,11 +281,13 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * ## Current HTML Support * - * - The only supported context is ``, which is the default value. * - The only supported document encoding is `UTF-8`, which is the default value. * + * @todo Verify that creating a fragment in self-contained elements works. + * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. + * @since 6.7.0 Can create fragment in any context. * * @param string $html Input HTML fragment to process. * @param string $context Context element for the fragment, must be default of ``. From 7005b6cbbb7f3db4fd4de89ddae3e60e511f3530 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 5 Aug 2024 22:33:30 -0700 Subject: [PATCH 03/13] Start adding tests for fragment parser --- .../wpHtmlProcessorFragmentParsing.php | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php new file mode 100644 index 0000000000000..424c6ab24648d --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -0,0 +1,103 @@ +' ); + + $this->assertSame( + $expected_html, + static::normalize_html( $processor ), + 'Failed to properly parse SCRIPT fragment.' + ); + } + + /** + * Data provider. + * + * @ticket 61576 + * + * @return array[] + */ + public static function data_script_fragments() { + return array( + 'Basic SCRIPT' => array( 'const x = 5 < y;', '' ), + ); + } + + /** + * Produces normalized HTML output given a processor as input, which has not + * yet started to proceed through its document. + * + * This can be used with a full or a fragment parser. + * + * @param WP_HTML_Processor $processor HTML Processor in READY state at the beginning of its input. + * @return string|null Normalized HTML from input processor. + */ + private static function normalize_html( WP_HTML_Processor $processor ): ?string { + $html = ''; + + while ( $processor->next_token() ) { + $token_name = $processor->get_token_name(); + $token_type = $processor->get_token_type(); + $is_closer = $processor->is_tag_closer(); + + switch ( $token_type ) { + case '#text': + $html .= $processor->get_modifiable_text(); + break; + + case '#tag': + if ( $is_closer ) { + $html .= ""; + } else { + $names = $processor->get_attribute_names_with_prefix( '' ); + if ( ! isset( $names ) ) { + $html .= "<{$token_name}>"; + } else { + $html .= "<{$token_name}"; + foreach ( $names as $name ) { + $value = $processor->get_attribute( $name ); + if ( true === $value ) { + $html .= " {$name}"; + } else { + $value = strtr( $value, '"', '"' ); + $html .= " {$name}=\"{$value}\""; + } + } + } + + $text = $processor->get_modifiable_text(); + if ( '' !== $text ) { + $html .= "{$text}"; + } + } + break; + } + } + + if ( null !== $processor->get_last_error() ) { + return null; + } + + return $html; + } +} From 101d345fc0e8ede742f271e52188e966591213a0 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 5 Aug 2024 22:41:22 -0700 Subject: [PATCH 04/13] Add basic unit tests for SCRIPT fragment parsing. --- .../wpHtmlProcessorFragmentParsing.php | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php index 424c6ab24648d..b16d484908446 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -15,19 +15,27 @@ class Tests_HtmlApi_WpHtmlProcessorFragmentParsing extends WP_UnitTestCase { /** * Verifies that SCRIPT fragment parses behave as they should. * - * + * @dataProvider data_script_fragments * * @param string $inner_html HTML to parse in SCRIPT fragment. * @param string|null $expected_html Expected output of the parse, or `null` if unsupported. */ public function test_script_tag( string $inner_html, ?string $expected_html ) { - $processor = WP_HTML_Processor::create_fragment( $inner_html, '' ); + $normalized = static::normalize_html( $processor ); - $this->assertSame( - $expected_html, - static::normalize_html( $processor ), - 'Failed to properly parse SCRIPT fragment.' - ); + if ( isset( $expected_html ) ) { + $this->assertSame( + $expected_html, + $normalized, + 'Failed to properly parse SCRIPT fragment.' + ); + } else { + $this->assertNull( + $normalized, + "Should have bailed when parsing but didn't." + ); + } } /** @@ -39,7 +47,10 @@ public function test_script_tag( string $inner_html, ?string $expected_html ) { */ public static function data_script_fragments() { return array( - 'Basic SCRIPT' => array( 'const x = 5 < y;', '' ), + 'Basic SCRIPT' => array( 'const x = 5 < y;', 'const x = 5 < y;' ), + 'Text after SCRIPT' => array( 'const x = 5 < y;test', null ), + 'Tag after SCRIPT' => array( 'end', null ), + 'Double escape' => array( "\nconsole.log('');", "\nconsole.log(');" ), ); } From 558381814b883e1c29ee05e513430b57e92f9b51 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 5 Aug 2024 22:43:55 -0700 Subject: [PATCH 05/13] WPCS --- tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php index b16d484908446..d68b3a6056e14 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -21,7 +21,7 @@ class Tests_HtmlApi_WpHtmlProcessorFragmentParsing extends WP_UnitTestCase { * @param string|null $expected_html Expected output of the parse, or `null` if unsupported. */ public function test_script_tag( string $inner_html, ?string $expected_html ) { - $processor = WP_HTML_Processor::create_fragment( $inner_html, '' ); + $processor = WP_HTML_Processor::create_fragment( $inner_html, '' ); $normalized = static::normalize_html( $processor ); if ( isset( $expected_html ) ) { From d47eadd9a6b6d3e378160277f063605493e60a32 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 20 Aug 2024 11:03:49 -0700 Subject: [PATCH 06/13] Move context parsing into a new private static method. This cleans up the actual parsing and makes it easier to accomodate self-contained context elements. --- .../html-api/class-wp-html-processor.php | 87 ++++++++++++++++--- 1 file changed, 73 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index f070b2459d899..2cced6c85722d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -283,6 +283,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * - The only supported document encoding is `UTF-8`, which is the default value. * + * Example: + * + * // Usually, snippets of HTML ought to be processed in the default `` context. + * $processor = WP_HTML_Processor::create_fragment( '

Hi

' ); + * + * // Prevent inner closing tags from closing the containing element and leaking out. + * $processor = WP_HTML_Processor::create_fragment( 'No escape.', '
' ); + * * @todo Verify that creating a fragment in self-contained elements works. * * @since 6.4.0 @@ -290,7 +298,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @since 6.7.0 Can create fragment in any context. * * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment, must be default of ``. + * @param string $context Context element for the fragment, an HTML start tag like ``. * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. * @return static|null The created processor if successful, otherwise null. */ @@ -299,23 +307,18 @@ public static function create_fragment( $html, $context = '', $encoding = return null; } - $context_processor = new WP_HTML_Tag_Processor( $context ); - if ( ! $context_processor->next_token() || '#tag' !== $context_processor->get_token_type() ) { - return null; - } - - $context_tag = $context_processor->get_tag(); - $context_attributes = array(); - foreach ( $context_processor->get_attribute_names_with_prefix( '' ) as $name ) { - $context_attributes[ $name ] = $context_processor->get_attribute( $name ); - } - - if ( $context_processor->next_token() ) { + $context_node = self::parse_context_element( $context ); + if ( null === $context_node ) { + _doing_it_wrong( + __FUNCTION__, + __( 'The context argument must be an HTML start tag.' ), + '6.7.0' + ); return null; } $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); - $processor->state->context_node = array( $context_tag, $context_attributes ); + $processor->state->context_node = $context_node; $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $processor->state->encoding = $encoding; $processor->state->encoding_confidence = 'certain'; @@ -344,6 +347,62 @@ public static function create_fragment( $html, $context = '', $encoding = return $processor; } + /** + * Parses an HTML span containing a context element for the fragment parser. + * + * Effectively this extracts the first token from an HTML input and if it's a + * starting tag, will return the tag name and any attributes on the tag. + * + * Example: + * + * array( 'BODY', array() ) === self::parse_context_element( '' ); + * array( 'SCRIPT', array( 'type' => 'javascript' ) ) === self::parse_context_element( ''; + } + + // Parse out the context element as well as the attributes. + $context_processor = new WP_HTML_Tag_Processor( $context ); + if ( + ! $context_processor->next_token() || + '#tag' !== $context_processor->get_token_type() || + $context_processor->is_tag_closer() + ) { + return null; + } + + $attributes = array(); + $attribute_names = $context_processor->get_attribute_names_with_prefix( '' ); + if ( isset( $attribute_names ) ) { + foreach ( $attribute_names as $name ) { + $attributes[ $name ] = $context_processor->get_attribute( $name ); + } + } + + return array( $context_processor->get_tag(), $attributes ); + } + /** * Creates an HTML processor in the full parsing mode. * From d1464615877444f50defd29284229e67cd38f18b Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 20 Aug 2024 15:58:17 -0700 Subject: [PATCH 07/13] Reject creating a fragment parser for self-contained elements. --- src/wp-includes/html-api/class-wp-html-processor.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 2cced6c85722d..8cc71c9b9c516 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -317,6 +317,15 @@ public static function create_fragment( $html, $context = '', $encoding = return null; } + if ( in_array( $context_node[0], array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { + _doing_it_wrong( + __FUNCTION__, + __( 'The context argument may not specify a self-contained element.' ), + '6.7.0' + ); + return null; + } + $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); $processor->state->context_node = $context_node; $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; From a175677751902fbcd33619c66126afd3a2f789b5 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 20 Aug 2024 16:02:11 -0700 Subject: [PATCH 08/13] Forbit void elements. --- src/wp-includes/html-api/class-wp-html-processor.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 8cc71c9b9c516..ad073884e40e2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -317,6 +317,15 @@ public static function create_fragment( $html, $context = '', $encoding = return null; } + if ( self::is_void( $context_node[0] ) ) { + _doing_it_wrong( + __FUNCTION__, + __( 'The context argument may not specify a void element.' ), + '6.7.0' + ); + return null; + } + if ( in_array( $context_node[0], array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { _doing_it_wrong( __FUNCTION__, From 77f1a38b2bc6d321d87441ed2e51b556881ddfb9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 20 Aug 2024 16:03:03 -0700 Subject: [PATCH 09/13] Add TODO about foreign content. --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ad073884e40e2..dc9a1c52e220a 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -291,7 +291,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * // Prevent inner closing tags from closing the containing element and leaking out. * $processor = WP_HTML_Processor::create_fragment( 'No
escape.', '
' ); * - * @todo Verify that creating a fragment in self-contained elements works. + * @todo Set the SVG or MathML namespace when creating with context node SVG or MATH. * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. From 640f458d95875e9ad6f615257f99471cf2606ecf Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 20 Aug 2024 16:44:29 -0700 Subject: [PATCH 10/13] Update unit tests for invalid contexts. --- .../wpHtmlProcessorFragmentParsing.php | 76 +++++++++++++------ 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php index d68b3a6056e14..f080683050e28 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php @@ -13,29 +13,23 @@ */ class Tests_HtmlApi_WpHtmlProcessorFragmentParsing extends WP_UnitTestCase { /** - * Verifies that SCRIPT fragment parses behave as they should. + * Verifies that the fragment parser doesn't allow invalid context nodes. * - * @dataProvider data_script_fragments + * This includes void elements and self-contained elements because they can + * contain no inner HTML. Operations on self-contained elements should occur + * through methods such as {@see WP_HTML_Tag_Processor::set_modifiable_text}. * - * @param string $inner_html HTML to parse in SCRIPT fragment. - * @param string|null $expected_html Expected output of the parse, or `null` if unsupported. + * @ticket 61576 + * + * @dataProvider data_invalid_fragment_contexts + * + * @param string $context Invalid context node for fragment parser. */ - public function test_script_tag( string $inner_html, ?string $expected_html ) { - $processor = WP_HTML_Processor::create_fragment( $inner_html, '' ); - $normalized = static::normalize_html( $processor ); - - if ( isset( $expected_html ) ) { - $this->assertSame( - $expected_html, - $normalized, - 'Failed to properly parse SCRIPT fragment.' - ); - } else { - $this->assertNull( - $normalized, - "Should have bailed when parsing but didn't." - ); - } + public function test_rejects_invalid_fragment_contexts( string $context ) { + $this->assertNull( + WP_HTML_Processor::create_fragment( 'just a test', $context ), + "Should not have been able to create a fragment parser with context node {$context}" + ); } /** @@ -45,12 +39,44 @@ public function test_script_tag( string $inner_html, ?string $expected_html ) { * * @return array[] */ - public static function data_script_fragments() { + public static function data_invalid_fragment_contexts() { return array( - 'Basic SCRIPT' => array( 'const x = 5 < y;', 'const x = 5 < y;' ), - 'Text after SCRIPT' => array( 'const x = 5 < y;test', null ), - 'Tag after SCRIPT' => array( 'end', null ), - 'Double escape' => array( "\nconsole.log('');", "\nconsole.log(');" ), + // Invalid contexts. + 'Invalid text' => array( 'just some text' ), + 'Invalid comment' => array( '' ), + 'Invalid closing' => array( '
' ), + 'Invalid DOCTYPE' => array( '' ), + + // Void elements. + 'AREA' => array( '' ), + 'BASE' => array( '' ), + 'BASEFONT' => array( '' ), + 'BGSOUND' => array( '' ), + 'BR' => array( '
' ), + 'COL' => array( '' ), + 'EMBED' => array( '' ), + 'FRAME' => array( '' ), + 'HR' => array( '
' ), + 'IMG' => array( '' ), + 'INPUT' => array( '' ), + 'KEYGEN' => array( '' ), + 'LINK' => array( '' ), + 'META' => array( '' ), + 'PARAM' => array( '' ), + 'SOURCE' => array( '' ), + 'TRACK' => array( '' ), + 'WBR' => array( '' ), + + // Self-contained elements. + 'IFRAME' => array( '