From 80c5983392fc8df91749ca0b61c13b6d7e9d4669 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Sun, 16 Jun 2024 11:17:15 +0200 Subject: [PATCH 01/34] Add test setup for running Core tests. --- my.bootstrap.php | 16 ++++++++++++++++ my.phpunit.xml | 23 +++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 my.bootstrap.php create mode 100644 my.phpunit.xml diff --git a/my.bootstrap.php b/my.bootstrap.php new file mode 100644 index 0000000000000..83a781da4aaff --- /dev/null +++ b/my.bootstrap.php @@ -0,0 +1,16 @@ + + + + + + + + tests/phpunit/tests/formatting + + + tests/phpunit/tests/kses.php + + + tests/phpunit/tests/html-api + + + From 0e1a917ab904508b3986296620973bfc69067371 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 18 Jul 2024 17:15:50 +0200 Subject: [PATCH 02/34] Add quirks mode parameter to create_fragment Allow quirks mode to be set before document processing begins. --- .../html-api/class-wp-html-processor.php | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 6f5da5477f922..4c44dedf81f02 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -287,12 +287,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. * - * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment, must be default of ``. - * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. + * @param string $html Input HTML fragment to process. + * @param string $context Context element for the fragment, must be default of ``. + * @param string $encoding Optional. Text encoding of the document; must be default of 'UTF-8'. + * @param string $compat_mode Optional. Set document compatibility mode (quirks). Should be + * 'quirks-mode' or 'no-quirks-mode'. Default: 'no-quirks-mode'. * @return static|null The created processor if successful, otherwise null. */ - public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { + public static function create_fragment( $html, $context = '', $encoding = 'UTF-8', $compat_mode = WP_HTML_Processor_State::NO_QUIRKS_MODE ) { if ( '' !== $context || 'UTF-8' !== $encoding ) { return null; } @@ -303,6 +305,10 @@ public static function create_fragment( $html, $context = '', $encoding = $processor->state->encoding = $encoding; $processor->state->encoding_confidence = 'certain'; + if ( WP_HTML_Processor_State::QUIRKS_MODE === $compat_mode ) { + $processor->state->compat_mode = WP_HTML_Processor_State::QUIRKS_MODE; + } + // @todo Create "fake" bookmarks for non-existent but implied nodes. $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); From 526777438daa32d1ccf6af85554c1bd683be7a6c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 19 Jul 2024 12:34:18 +0200 Subject: [PATCH 03/34] Add standards mode tests for class name case sensitivity --- .../tests/html-api/wpHtmlProcessor.php | 127 ++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 0b7d72bdbee9a..3aedbb32d6f4e 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -531,4 +531,131 @@ public function test_foreign_content_script_self_closing() { $processor = WP_HTML_Processor::create_fragment( '' ); $this->assertTrue( $processor->next_tag( 'script' ) ); } + + /** + * Ensures that the tag processor is case sensitive when removing CSS classes in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::remove_class + */ + public function test_remove_class_no_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '' ); + $processor->next_tag(); + $processor->remove_class( 'upper' ); + $this->assertSame( '', $processor->get_updated_html() ); + + $processor->remove_class( 'UPPER' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case sensitive when adding CSS classes in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::add_class + */ + public function test_add_class_no_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '' ); + $processor->next_tag(); + $processor->add_class( 'UPPER' ); + $this->assertSame( '', $processor->get_updated_html() ); + + $processor->add_class( 'upper' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case sensitive when checking has CSS classes in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::has_class + */ + public function test_has_class_no_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '' ); + $processor->next_tag(); + $this->assertFalse( $processor->has_class( 'upper' ) ); + $this->assertTrue( $processor->has_class( 'UPPER' ) ); + } + + /** + * Ensures that the tag processor lists unique CSS class names in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::class_list + */ + public function test_class_list_no_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '' ); + $processor->next_tag(); + $class_list = iterator_to_array( $processor->class_list() ); + $this->assertSame( + array( 'A', 'a', 'B', 'b', 'É', 'é' ), + $class_list + ); + } + + /** + * Ensures that the tag processor is case sensitive when removing CSS classes in quirks mode. + * + * @ticket 61531 + * + * @covers ::remove_class + */ + public function test_remove_class_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); + $processor->next_tag(); + $processor->remove_class( 'upper' ); + $this->assertSame( '', $processor->get_updated_html() ); + + $processor->remove_class( 'UPPER' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case sensitive when adding CSS classes in quirks mode. + * + * @ticket 61531 + * + * @covers ::add_class + */ + public function test_add_class_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); + $processor->next_tag(); + $processor->add_class( 'upper' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case sensitive when checking has CSS classes in quirks mode. + * + * @ticket 61531 + * + * @covers ::has_class + */ + public function test_has_class_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); + $processor->next_tag(); + $this->assertTrue( $processor->has_class( 'upper' ) ); + $this->assertTrue( $processor->has_class( 'UPPER' ) ); + } + + /** + * Ensures that the tag processor lists unique CSS class names in quirks mode. + * + * @ticket 61531 + * + * @covers ::class_list + */ + public function test_class_list_quirks_mode() { + $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); + $processor->next_tag(); + $class_list = iterator_to_array( $processor->class_list() ); + $this->assertSame( + array( 'A', 'a', 'B', 'b', 'É', 'é' ), + $class_list + ); + } } From ed8f34f519e6d1be8d5694d800d51b9c8d70feb4 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 19 Jul 2024 16:31:38 +0200 Subject: [PATCH 04/34] Implement has_class --- .../html-api/class-wp-html-processor.php | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4c44dedf81f02..689d595352b0c 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4645,13 +4645,38 @@ public function remove_class( $class_name ): bool { /** * Returns if a matched tag contains the given ASCII case-insensitive class name. * + * + * > When matching against a document which is in quirks mode, class names must be matched + * > ASCII case-insensitively; class selectors are otherwise case-sensitive, only matching + * > class names they are identical to. + * + * @see https://www.w3.org/TR/selectors-4/#class-html + * * @since 6.6.0 Subclassed for the HTML Processor. + * @since 6.7.0 Matches are case sensitive in no-quirks mode (the default). * * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. * @return bool|null Whether the matched tag contains the given class name, or null if not matched. */ public function has_class( $wanted_class ): ?bool { - return $this->is_virtual() ? null : parent::has_class( $wanted_class ); + if ( $this->is_virtual() ) { + return false; + } + + if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { + return null; + } + + $compare_func = WP_HTML_Processor_State::QUIRKS_MODE === $this->state->compat_mode ? + 'strcasecmp' : + 'strcmp'; + + foreach ( $this->class_list() as $class_name ) { + if ( 0 === $compare_func( $class_name, $wanted_class ) ) { + return true; + } + } + return false; } /** From f2fa469e2fda3e19b5d3bff17f6b9fc79cb1dba9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 19 Jul 2024 17:10:28 +0200 Subject: [PATCH 05/34] Add test with null bytes in class attribute --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 3aedbb32d6f4e..84b5b7b163bed 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -658,4 +658,17 @@ public function test_class_list_quirks_mode() { $class_list ); } + + /** + * Ensures that the tag processor matches class names with null bytes correctly. + * + * @ticket 61531 + * + * @covers ::has_class + */ + public function test_has_class_null_byte_class_name() { + $processor = WP_HTML_Processor::create_fragment( "" ); + $processor->next_tag(); + $this->assertTrue( $processor->has_class( 'null-byte-�-there' ) ); + } } From c1034103cd2231f7efcc77884b5c9e275009eafd Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 19 Jul 2024 16:36:06 +0200 Subject: [PATCH 06/34] Remove lower-casing behavior of class_list This is necessary for has_class to work properly. This could be put into a protected method or the class sensitivity could be a parameter if desired. --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 95216b08a1988..d673381ae7763 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1126,6 +1126,7 @@ public function paused_at_incomplete_token(): bool { * // Outputs: "free lang-en " * * @since 6.4.0 + * @since 6.7.0 Class names are no longer force lower-cased. */ public function class_list() { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { @@ -1155,12 +1156,7 @@ public function class_list() { return; } - /* - * CSS class names are case-insensitive in the ASCII range. - * - * @see https://www.w3.org/TR/CSS2/syndata.html#x1 - */ - $name = strtolower( substr( $class, $at, $length ) ); + $name = substr( $class, $at, $length ); $at += $length; /* From 1435cf9eb027af28573e23f75b6e4ee5e0982a24 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 12 Aug 2024 18:02:27 +0200 Subject: [PATCH 07/34] Update to use document_mode Subsequent changes introduced document_mode instead of compat_mode --- .../html-api/class-wp-html-processor.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 689d595352b0c..fc2bf2416f23f 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -287,14 +287,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. * - * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment, must be default of ``. - * @param string $encoding Optional. Text encoding of the document; must be default of 'UTF-8'. - * @param string $compat_mode Optional. Set document compatibility mode (quirks). Should be - * 'quirks-mode' or 'no-quirks-mode'. Default: 'no-quirks-mode'. + * @param string $html Input HTML fragment to process. + * @param string $context Context element for the fragment, must be default of ``. + * @param string $encoding Optional. Text encoding of the document; must be default of 'UTF-8'. + * @param string $document_mode Optional. Set document compatibility mode (quirks). Should be + * 'quirks-mode' or 'no-quirks-mode'. Default: 'no-quirks-mode'. * @return static|null The created processor if successful, otherwise null. */ - public static function create_fragment( $html, $context = '', $encoding = 'UTF-8', $compat_mode = WP_HTML_Processor_State::NO_QUIRKS_MODE ) { + public static function create_fragment( $html, $context = '', $encoding = 'UTF-8', $document_mode = WP_HTML_Processor_State::NO_QUIRKS_MODE ) { if ( '' !== $context || 'UTF-8' !== $encoding ) { return null; } @@ -305,8 +305,8 @@ public static function create_fragment( $html, $context = '', $encoding = $processor->state->encoding = $encoding; $processor->state->encoding_confidence = 'certain'; - if ( WP_HTML_Processor_State::QUIRKS_MODE === $compat_mode ) { - $processor->state->compat_mode = WP_HTML_Processor_State::QUIRKS_MODE; + if ( WP_HTML_Processor_State::QUIRKS_MODE === $document_mode ) { + $processor->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; } // @todo Create "fake" bookmarks for non-existent but implied nodes. @@ -4667,7 +4667,7 @@ public function has_class( $wanted_class ): ?bool { return null; } - $compare_func = WP_HTML_Processor_State::QUIRKS_MODE === $this->state->compat_mode ? + $compare_func = WP_HTML_Processor_State::QUIRKS_MODE === $this->state->document_mode ? 'strcasecmp' : 'strcmp'; From 4f1836818ca863f0611409fce5658acbe3fffe12 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 11:53:42 +0200 Subject: [PATCH 08/34] Replace null bytes in class_list class names --- .../html-api/class-wp-html-processor.php | 1 + .../html-api/class-wp-html-tag-processor.php | 3 ++- .../tests/html-api/wpHtmlTagProcessor.php | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index fc2bf2416f23f..798cd8f81d499 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4694,6 +4694,7 @@ public function has_class( $wanted_class ): ?bool { * // Outputs: "free lang-en " * * @since 6.6.0 Subclassed for the HTML Processor. + * @since 6.7.0 Null bytes are replaced with the replacement character (U+FFFD). */ public function class_list() { return $this->is_virtual() ? null : parent::class_list(); diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index d673381ae7763..c19c07e03348f 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1127,6 +1127,7 @@ public function paused_at_incomplete_token(): bool { * * @since 6.4.0 * @since 6.7.0 Class names are no longer force lower-cased. + * @since 6.7.0 Null bytes are replaced with the replacement character (U+FFFD). */ public function class_list() { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { @@ -1156,7 +1157,7 @@ public function class_list() { return; } - $name = substr( $class, $at, $length ); + $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) ); $at += $length; /* diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index b9c6817988032..c946ec191db54 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -2211,6 +2211,22 @@ public function test_class_list_visits_unique_class_names_only_once() { $this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' ); } + /** + * Ensures that null bytes are replaced with the replacement character (U+FFFD) in class_list. + * + * @ticket 61531 + * + * @covers ::class_list + */ + public function test_class_list_null_bytes_replaced() { + $processor = new WP_HTML_Tag_Processor( "
" ); + $processor->next_tag(); + + $found_classes = iterator_to_array( $processor->class_list() ); + + $this->assertSame( array( 'a', "\u{FFFD}", "b\u{FFFD}", "\u{FFFD}c\u{FFFD}" ), $found_classes ); + } + /** * @ticket 59209 * From 2540406c62ba77d35f17d610bac3c6eb130d1a4a Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 15:03:45 +0200 Subject: [PATCH 09/34] Fix tests --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 84b5b7b163bed..f0e295f9fd729 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -598,7 +598,7 @@ public function test_class_list_no_quirks_mode() { } /** - * Ensures that the tag processor is case sensitive when removing CSS classes in quirks mode. + * Ensures that the tag processor is case insensitive when removing CSS classes in quirks mode. * * @ticket 61531 * @@ -608,14 +608,11 @@ public function test_remove_class_quirks_mode() { $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); $processor->next_tag(); $processor->remove_class( 'upper' ); - $this->assertSame( '', $processor->get_updated_html() ); - - $processor->remove_class( 'UPPER' ); $this->assertSame( '', $processor->get_updated_html() ); } /** - * Ensures that the tag processor is case sensitive when adding CSS classes in quirks mode. + * Ensures that the tag processor is case insensitive when adding CSS classes in quirks mode. * * @ticket 61531 * From 9be0a32010b04a34c65af09fcb253d2c9b07d8fc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 15:39:27 +0200 Subject: [PATCH 10/34] Handle all the comparison stuff with a protected comparable_class_name function --- .../html-api/class-wp-html-processor.php | 37 ++++++++++--------- .../html-api/class-wp-html-tag-processor.php | 26 ++++++++++--- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 798cd8f81d499..da1e37cca0715 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4642,6 +4642,24 @@ public function remove_class( $class_name ): bool { return $this->is_virtual() ? false : parent::remove_class( $class_name ); } + /** + * Transform a class name string to a comparable form. + * + * This allows subclasses to ensure class name comparsison is handled correctly, + * for example, the HTML Processor may use case-insensitive comparison when the + * document is in quirks mode. + * + * @since 6.7.0 + * + * @param string $class_name The class name to transform. + * @return string The transformed class name. + */ + protected function comparable_class_name( string $class_name ): string { + return $this->state->document_mode === WP_HTML_Processor_State::QUIRKS_MODE + ? strtolower( $class_name ) + : $class_name; + } + /** * Returns if a matched tag contains the given ASCII case-insensitive class name. * @@ -4659,24 +4677,7 @@ public function remove_class( $class_name ): bool { * @return bool|null Whether the matched tag contains the given class name, or null if not matched. */ public function has_class( $wanted_class ): ?bool { - if ( $this->is_virtual() ) { - return false; - } - - if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { - return null; - } - - $compare_func = WP_HTML_Processor_State::QUIRKS_MODE === $this->state->document_mode ? - 'strcasecmp' : - 'strcmp'; - - foreach ( $this->class_list() as $class_name ) { - if ( 0 === $compare_func( $class_name, $wanted_class ) ) { - return true; - } - } - return false; + return $this->is_virtual() ? false : parent::has_class( $wanted_class ); } /** diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index c19c07e03348f..2280abd144f95 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1188,10 +1188,10 @@ public function has_class( $wanted_class ): ?bool { return null; } - $wanted_class = strtolower( $wanted_class ); + $wanted_class = $this->comparable_class_name( $wanted_class ); foreach ( $this->class_list() as $class_name ) { - if ( $class_name === $wanted_class ) { + if ( $this->comparable_class_name( $class_name ) === $wanted_class ) { return true; } } @@ -2293,7 +2293,7 @@ private function class_name_updates_to_attributes_updates(): void { break; } - $name = substr( $existing_class, $at, $name_length ); + $name = $this->comparable_class_name( substr( $existing_class, $at, $name_length ) ); $at += $name_length; // If this class is marked for removal, start processing the next one. @@ -3828,7 +3828,7 @@ public function add_class( $class_name ): bool { return false; } - $this->classname_updates[ $class_name ] = self::ADD_CLASS; + $this->classname_updates[ $this->comparable_class_name( $class_name ) ] = self::ADD_CLASS; return true; } @@ -3850,12 +3850,28 @@ public function remove_class( $class_name ): bool { } if ( null !== $this->tag_name_starts_at ) { - $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; + $this->classname_updates[ $this->comparable_class_name( $class_name ) ] = self::REMOVE_CLASS; } return true; } + /** + * Transform a class name string to a comparable form. + * + * This allows subclasses to ensure class name comparsison is handled correctly, + * for example, the HTML Processor may use case-insensitive comparison when the + * document is in quirks mode. + * + * @since 6.7.0 + * + * @param string $class_name The class name to transform. + * @return string The transformed class name. + */ + protected function comparable_class_name( string $class_name ): string { + return $class_name; + } + /** * Returns the string representation of the HTML Tag Processor. * From cd43ef933c5f48893d77fdf7f933f413370c6e7c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 15:42:26 +0200 Subject: [PATCH 11/34] Lint --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index da1e37cca0715..4fb84cb318aea 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4655,7 +4655,7 @@ public function remove_class( $class_name ): bool { * @return string The transformed class name. */ protected function comparable_class_name( string $class_name ): string { - return $this->state->document_mode === WP_HTML_Processor_State::QUIRKS_MODE + return WP_HTML_Processor_State::QUIRKS_MODE === $this->state->document_mode ? strtolower( $class_name ) : $class_name; } From 00e8affaf62062730242cff0787738838e01677c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 15:55:49 +0200 Subject: [PATCH 12/34] Improve phpdoc explanations --- .../html-api/class-wp-html-processor.php | 20 +++++++++---------- .../html-api/class-wp-html-tag-processor.php | 5 ++--- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4fb84cb318aea..178b14a5bb5da 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4645,9 +4645,15 @@ public function remove_class( $class_name ): bool { /** * Transform a class name string to a comparable form. * - * This allows subclasses to ensure class name comparsison is handled correctly, - * for example, the HTML Processor may use case-insensitive comparison when the - * document is in quirks mode. + * When the document is in quirks mode, class names are transformed to + * ASCII lowercase for comparison. In no quirks mode, class names are + * compared as they are (case-sensitive). + * + * > When matching against a document which is in quirks mode, class names must be matched + * > ASCII case-insensitively; class selectors are otherwise case-sensitive, only matching + * > class names they are identical to. + * + * @see https://www.w3.org/TR/selectors-4/#class-html * * @since 6.7.0 * @@ -4663,15 +4669,7 @@ protected function comparable_class_name( string $class_name ): string { /** * Returns if a matched tag contains the given ASCII case-insensitive class name. * - * - * > When matching against a document which is in quirks mode, class names must be matched - * > ASCII case-insensitively; class selectors are otherwise case-sensitive, only matching - * > class names they are identical to. - * - * @see https://www.w3.org/TR/selectors-4/#class-html - * * @since 6.6.0 Subclassed for the HTML Processor. - * @since 6.7.0 Matches are case sensitive in no-quirks mode (the default). * * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. * @return bool|null Whether the matched tag contains the given class name, or null if not matched. diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 2280abd144f95..323569e7a17ca 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3859,9 +3859,8 @@ public function remove_class( $class_name ): bool { /** * Transform a class name string to a comparable form. * - * This allows subclasses to ensure class name comparsison is handled correctly, - * for example, the HTML Processor may use case-insensitive comparison when the - * document is in quirks mode. + * This method may be subclassed to customize class names for comparison. For example, this + * allows for subclasses to support case-insensitive class name comparison. * * @since 6.7.0 * From eb07339f2ea40e7a4627a87bfe760ebd2fc1cf0f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 16:24:54 +0200 Subject: [PATCH 13/34] Remove comment about styling (HTML structure is affected) --- src/wp-includes/html-api/class-wp-html-processor-state.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index 16875c4ac1b2b..ac16aedb2ee92 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -393,9 +393,6 @@ class WP_HTML_Processor_State { * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag * if one is in scope and open, otherwise the TABLE becomes a child of the P. * - * `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but - * none of the other changes modifies how the HTML is parsed or selected. - * * @see self::QUIRKS_MODE * @see self::NO_QUIRKS_MODE * From a9e924d6e3733eee517953f4d49393cc6ba22211 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 17:02:21 +0200 Subject: [PATCH 14/34] Revert "Replace null bytes in class_list class names" This reverts commit 4f1836818ca863f0611409fce5658acbe3fffe12. --- .../html-api/class-wp-html-processor.php | 1 - .../html-api/class-wp-html-tag-processor.php | 3 +-- .../tests/html-api/wpHtmlTagProcessor.php | 16 ---------------- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 178b14a5bb5da..9ee84c5b888b9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4693,7 +4693,6 @@ public function has_class( $wanted_class ): ?bool { * // Outputs: "free lang-en " * * @since 6.6.0 Subclassed for the HTML Processor. - * @since 6.7.0 Null bytes are replaced with the replacement character (U+FFFD). */ public function class_list() { return $this->is_virtual() ? null : parent::class_list(); diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 323569e7a17ca..9049d209add2c 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1127,7 +1127,6 @@ public function paused_at_incomplete_token(): bool { * * @since 6.4.0 * @since 6.7.0 Class names are no longer force lower-cased. - * @since 6.7.0 Null bytes are replaced with the replacement character (U+FFFD). */ public function class_list() { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { @@ -1157,7 +1156,7 @@ public function class_list() { return; } - $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) ); + $name = substr( $class, $at, $length ); $at += $length; /* diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index c946ec191db54..b9c6817988032 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -2211,22 +2211,6 @@ public function test_class_list_visits_unique_class_names_only_once() { $this->assertSame( array( 'one' ), $found_classes, 'Visited multiple copies of the same class name when it should have skipped the duplicates.' ); } - /** - * Ensures that null bytes are replaced with the replacement character (U+FFFD) in class_list. - * - * @ticket 61531 - * - * @covers ::class_list - */ - public function test_class_list_null_bytes_replaced() { - $processor = new WP_HTML_Tag_Processor( "
" ); - $processor->next_tag(); - - $found_classes = iterator_to_array( $processor->class_list() ); - - $this->assertSame( array( 'a', "\u{FFFD}", "b\u{FFFD}", "\u{FFFD}c\u{FFFD}" ), $found_classes ); - } - /** * @ticket 59209 * From 1dc175269118ac070fce82cdeeb67791f0f6733e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 13 Aug 2024 17:06:52 +0200 Subject: [PATCH 15/34] Remove null-byte has_class test --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index f0e295f9fd729..d636df1485afc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -655,17 +655,4 @@ public function test_class_list_quirks_mode() { $class_list ); } - - /** - * Ensures that the tag processor matches class names with null bytes correctly. - * - * @ticket 61531 - * - * @covers ::has_class - */ - public function test_has_class_null_byte_class_name() { - $processor = WP_HTML_Processor::create_fragment( "" ); - $processor->next_tag(); - $this->assertTrue( $processor->has_class( 'null-byte-�-there' ) ); - } } From be6091bb20ea5dd165cdd85928eab9984ec4a238 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 23 Aug 2024 17:01:21 +0200 Subject: [PATCH 16/34] Revert ::create_fragment changes Full documents can be created in quirks mode now. There's no need to introduce quirks mode to the fragment parser or change its signature in order to tests the quirks mode changes. --- .../html-api/class-wp-html-processor.php | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 487578f9b3dc8..bdebd650b54c8 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -287,14 +287,12 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. * - * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment, must be default of ``. - * @param string $encoding Optional. Text encoding of the document; must be default of 'UTF-8'. - * @param string $document_mode Optional. Set document compatibility mode (quirks). Should be - * 'quirks-mode' or 'no-quirks-mode'. Default: 'no-quirks-mode'. + * @param string $html Input HTML fragment to process. + * @param string $context Context element for the fragment, must be default of ``. + * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. * @return static|null The created processor if successful, otherwise null. */ - public static function create_fragment( $html, $context = '', $encoding = 'UTF-8', $document_mode = WP_HTML_Processor_State::NO_QUIRKS_MODE ) { + public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { if ( '' !== $context || 'UTF-8' !== $encoding ) { return null; } @@ -305,10 +303,6 @@ public static function create_fragment( $html, $context = '', $encoding = $processor->state->encoding = $encoding; $processor->state->encoding_confidence = 'certain'; - if ( WP_HTML_Processor_State::QUIRKS_MODE === $document_mode ) { - $processor->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; - } - // @todo Create "fake" bookmarks for non-existent but implied nodes. $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); From 07726b166798305c696088f897594d46b3f60187 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 23 Aug 2024 17:17:43 +0200 Subject: [PATCH 17/34] Adjust tests to use full parser in quirks/no-quirks --- .../tests/html-api/wpHtmlProcessor.php | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 0bd4c8a0526a7..fec30d3d3ef54 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -528,13 +528,13 @@ public function test_foreign_content_script_self_closing() { * @covers ::remove_class */ public function test_remove_class_no_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '' ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $processor->remove_class( 'upper' ); - $this->assertSame( '', $processor->get_updated_html() ); + $this->assertSame( '', $processor->get_updated_html() ); $processor->remove_class( 'UPPER' ); - $this->assertSame( '', $processor->get_updated_html() ); + $this->assertSame( '', $processor->get_updated_html() ); } /** @@ -545,13 +545,13 @@ public function test_remove_class_no_quirks_mode() { * @covers ::add_class */ public function test_add_class_no_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '' ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $processor->add_class( 'UPPER' ); - $this->assertSame( '', $processor->get_updated_html() ); + $this->assertSame( '', $processor->get_updated_html() ); $processor->add_class( 'upper' ); - $this->assertSame( '', $processor->get_updated_html() ); + $this->assertSame( '', $processor->get_updated_html() ); } /** @@ -562,8 +562,8 @@ public function test_add_class_no_quirks_mode() { * @covers ::has_class */ public function test_has_class_no_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '' ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $this->assertFalse( $processor->has_class( 'upper' ) ); $this->assertTrue( $processor->has_class( 'UPPER' ) ); } @@ -576,8 +576,8 @@ public function test_has_class_no_quirks_mode() { * @covers ::class_list */ public function test_class_list_no_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '' ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $class_list = iterator_to_array( $processor->class_list() ); $this->assertSame( array( 'A', 'a', 'B', 'b', 'É', 'é' ), @@ -593,10 +593,10 @@ public function test_class_list_no_quirks_mode() { * @covers ::remove_class */ public function test_remove_class_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $processor->remove_class( 'upper' ); - $this->assertSame( '', $processor->get_updated_html() ); + $this->assertSame( '', $processor->get_updated_html() ); } /** @@ -607,10 +607,10 @@ public function test_remove_class_quirks_mode() { * @covers ::add_class */ public function test_add_class_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $processor->add_class( 'upper' ); - $this->assertSame( '', $processor->get_updated_html() ); + $this->assertSame( '', $processor->get_updated_html() ); } /** @@ -621,8 +621,8 @@ public function test_add_class_quirks_mode() { * @covers ::has_class */ public function test_has_class_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $this->assertTrue( $processor->has_class( 'upper' ) ); $this->assertTrue( $processor->has_class( 'UPPER' ) ); } @@ -635,8 +635,8 @@ public function test_has_class_quirks_mode() { * @covers ::class_list */ public function test_class_list_quirks_mode() { - $processor = WP_HTML_Processor::create_fragment( '', '', 'UTF-8', WP_HTML_Processor_State::QUIRKS_MODE ); - $processor->next_tag(); + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); $class_list = iterator_to_array( $processor->class_list() ); $this->assertSame( array( 'A', 'a', 'B', 'b', 'É', 'é' ), From ec05abb39cc0e34b9c7dd0dca9b1fa52ebf163a2 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 23 Aug 2024 18:21:24 +0200 Subject: [PATCH 18/34] Move quirks mode to tag processor Quirks mode changes behavior CSS class functions, namely whether they are ASCII case-insensitive class name matches or byte-for-byte comparisons. It makes sense to move quirks mode into the tag processor so that it can deal with this correctly. --- .../class-wp-html-processor-state.php | 46 ---------------- .../html-api/class-wp-html-processor.php | 30 ++--------- .../html-api/class-wp-html-tag-processor.php | 52 +++++++++++++++++-- 3 files changed, 52 insertions(+), 76 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index ac16aedb2ee92..b7cdd347ca85b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -299,31 +299,6 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_AFTER_AFTER_FRAMESET = 'insertion-mode-after-after-frameset'; - /** - * No-quirks mode document compatability mode. - * - * > In no-quirks mode, the behavior is (hopefully) the desired behavior - * > described by the modern HTML and CSS specifications. - * - * @since 6.7.0 - * - * @var string - */ - const NO_QUIRKS_MODE = 'no-quirks-mode'; - - /** - * Quirks mode document compatability mode. - * - * > In quirks mode, layout emulates behavior in Navigator 4 and Internet - * > Explorer 5. This is essential in order to support websites that were - * > built before the widespread adoption of web standards. - * - * @since 6.7.0 - * - * @var string - */ - const QUIRKS_MODE = 'quirks-mode'; - /** * The stack of template insertion modes. * @@ -381,27 +356,6 @@ class WP_HTML_Processor_State { */ public $insertion_mode = self::INSERTION_MODE_INITIAL; - /** - * Indicates if the document is in quirks mode or no-quirks mode. - * - * Impact on HTML parsing: - * - * - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte - * manner, otherwise for backwards compatability, class selectors are to - * match in an ASCII case-insensitive manner. - * - * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag - * if one is in scope and open, otherwise the TABLE becomes a child of the P. - * - * @see self::QUIRKS_MODE - * @see self::NO_QUIRKS_MODE - * - * @since 6.7.0 - * - * @var string - */ - public $document_mode = self::NO_QUIRKS_MODE; - /** * Context node initializing fragment parser, if created as a fragment parser. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index bdebd650b54c8..96c67501a8fe5 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1078,7 +1078,7 @@ private function step_initial(): bool { case 'html': $doctype = $this->get_doctype_info(); if ( null !== $doctype && 'quirks' === $doctype->indicated_compatability_mode ) { - $this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; + $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; } /* @@ -1093,7 +1093,7 @@ private function step_initial(): bool { * > Anything else */ initial_anything_else: - $this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; + $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML; return $this->step( self::REPROCESS_CURRENT_NODE ); } @@ -2465,7 +2465,7 @@ private function step_in_body(): bool { * > has a p element in button scope, then close a p element. */ if ( - WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode && + WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode && $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); @@ -4634,30 +4634,6 @@ public function remove_class( $class_name ): bool { return $this->is_virtual() ? false : parent::remove_class( $class_name ); } - /** - * Transform a class name string to a comparable form. - * - * When the document is in quirks mode, class names are transformed to - * ASCII lowercase for comparison. In no quirks mode, class names are - * compared as they are (case-sensitive). - * - * > When matching against a document which is in quirks mode, class names must be matched - * > ASCII case-insensitively; class selectors are otherwise case-sensitive, only matching - * > class names they are identical to. - * - * @see https://www.w3.org/TR/selectors-4/#class-html - * - * @since 6.7.0 - * - * @param string $class_name The class name to transform. - * @return string The transformed class name. - */ - protected function comparable_class_name( string $class_name ): string { - return WP_HTML_Processor_State::QUIRKS_MODE === $this->state->document_mode - ? strtolower( $class_name ) - : $class_name; - } - /** * Returns if a matched tag contains the given ASCII case-insensitive class name. * diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 4dddf5fa47337..8b3e13226609e 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -511,6 +511,18 @@ class WP_HTML_Tag_Processor { */ protected $parser_state = self::STATE_READY; + /** + * Indicates if the document is in quirks mode or no-quirks mode. + * + * @see self::QUIRKS_MODE + * @see self::NO_QUIRKS_MODE + * + * @since 6.7.0 + * + * @var string + */ + protected $compat_mode = self::NO_QUIRKS_MODE; + /** * Indicates whether the parser is inside foreign content, * e.g. inside an SVG or MathML element. @@ -3858,8 +3870,15 @@ public function remove_class( $class_name ): bool { /** * Transform a class name string to a comparable form. * - * This method may be subclassed to customize class names for comparison. For example, this - * allows for subclasses to support case-insensitive class name comparison. + * When the document is in quirks mode, class names are transformed to + * ASCII lowercase for comparison. In no quirks mode, class names are + * compared as they are (case-sensitive). + * + * > When matching against a document which is in quirks mode, class names must be matched + * > ASCII case-insensitively; class selectors are otherwise case-sensitive, only matching + * > class names they are identical to. + * + * @see https://www.w3.org/TR/selectors-4/#class-html * * @since 6.7.0 * @@ -3867,7 +3886,9 @@ public function remove_class( $class_name ): bool { * @return string The transformed class name. */ protected function comparable_class_name( string $class_name ): string { - return $class_name; + return self::QUIRKS_MODE === $this->compat_mode + ? strtolower( $class_name ) + : $class_name; } /** @@ -4259,4 +4280,29 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { * @since 6.5.0 */ const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML'; + + /** + * No-quirks mode document compatability mode. + * + * > In no-quirks mode, the behavior is (hopefully) the desired behavior + * > described by the modern HTML and CSS specifications. + * + * @since 6.7.0 + * + * @var string + */ + const NO_QUIRKS_MODE = 'no-quirks-mode'; + + /** + * Quirks mode document compatability mode. + * + * > In quirks mode, layout emulates behavior in Navigator 4 and Internet + * > Explorer 5. This is essential in order to support websites that were + * > built before the widespread adoption of web standards. + * + * @since 6.7.0 + * + * @var string + */ + const QUIRKS_MODE = 'quirks-mode'; } From 176604c9b62418b4937faeb1d83f85b8b2775af5 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 23 Aug 2024 18:26:52 +0200 Subject: [PATCH 19/34] Make comparable_class_name internal --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 8b3e13226609e..526bc6344bbb8 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3885,7 +3885,7 @@ public function remove_class( $class_name ): bool { * @param string $class_name The class name to transform. * @return string The transformed class name. */ - protected function comparable_class_name( string $class_name ): string { + private function comparable_class_name( string $class_name ): string { return self::QUIRKS_MODE === $this->compat_mode ? strtolower( $class_name ) : $class_name; From bb6d772ff188fc5b765a9dbf2808a89bfd2503c6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 26 Aug 2024 13:46:18 +0200 Subject: [PATCH 20/34] Add information about how quirks mode changes behavior --- .../html-api/class-wp-html-tag-processor.php | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 526bc6344bbb8..18b146de757b0 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -4300,6 +4300,28 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { * > Explorer 5. This is essential in order to support websites that were * > built before the widespread adoption of web standards. * + * The behavior of the HTML API is impacted in the following ways when a document is in quirks mode: + * - The CSS class name methods on the Tag and HTML Processors such as `has_class` are + * in ASCII case-insensitive. In no-quirks mode, they are case-sensitive. + * - The HTML Processor may produce a different structure, namely when HTML attempts to nest + * a TABLE under a P. In no-quirks mode this is not allowed and a TABLE start tag closes + * open P elements. In quirks mode, TABLE is allowed to appear as a descendant of P so + * the tree structure will appear as authored. For example: + * - Quirks mode: + *

+ * Produces: + * HTML + *  └BODY + *   └P + *    └TABLE + * - No-quirks mode: + *

+ * Produces: + * HTML + *  └BODY + *   ├P + *   └TABLE + * * @since 6.7.0 * * @var string From 15d479ebdee92a824073f29b45378438fd66cf93 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 26 Aug 2024 18:21:38 +0200 Subject: [PATCH 21/34] Remove comparable_class_name function call from has_class loop --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 18b146de757b0..3e7d6a80dedfa 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1199,10 +1199,14 @@ public function has_class( $wanted_class ): ?bool { return null; } - $wanted_class = $this->comparable_class_name( $wanted_class ); + $case_insensitive = self::QUIRKS_MODE === $this->compat_mode; + $wanted_length = strlen( $wanted_class ); foreach ( $this->class_list() as $class_name ) { - if ( $this->comparable_class_name( $class_name ) === $wanted_class ) { + if ( + strlen( $class_name ) === $wanted_length && + 0 === substr_compare( $class_name, $wanted_class, 0, strlen( $wanted_class ), $case_insensitive ) + ) { return true; } } From 8dbf6abf3d2b8a99f649a3e718b70d163e6a4cd9 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 26 Aug 2024 18:51:06 +0200 Subject: [PATCH 22/34] Remove comparable_class_name method entirely --- .../html-api/class-wp-html-tag-processor.php | 42 +++++++------------ 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 3e7d6a80dedfa..82cba171bf015 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2308,8 +2308,12 @@ private function class_name_updates_to_attributes_updates(): void { break; } - $name = $this->comparable_class_name( substr( $existing_class, $at, $name_length ) ); - $at += $name_length; + $name = substr( $existing_class, $at, $name_length ); + if ( self::QUIRKS_MODE === $this->compat_mode ) { + $name = strtolower( $name ); + } + + $at += $name_length; // If this class is marked for removal, start processing the next one. $remove_class = ( @@ -3843,7 +3847,10 @@ public function add_class( $class_name ): bool { return false; } - $this->classname_updates[ $this->comparable_class_name( $class_name ) ] = self::ADD_CLASS; + if ( self::QUIRKS_MODE === $this->compat_mode ) { + $class_name = strtolower( $class_name ); + } + $this->classname_updates[ $class_name ] = self::ADD_CLASS; return true; } @@ -3865,36 +3872,15 @@ public function remove_class( $class_name ): bool { } if ( null !== $this->tag_name_starts_at ) { - $this->classname_updates[ $this->comparable_class_name( $class_name ) ] = self::REMOVE_CLASS; + if ( self::QUIRKS_MODE === $this->compat_mode ) { + $class_name = strtolower( $class_name ); + } + $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; } return true; } - /** - * Transform a class name string to a comparable form. - * - * When the document is in quirks mode, class names are transformed to - * ASCII lowercase for comparison. In no quirks mode, class names are - * compared as they are (case-sensitive). - * - * > When matching against a document which is in quirks mode, class names must be matched - * > ASCII case-insensitively; class selectors are otherwise case-sensitive, only matching - * > class names they are identical to. - * - * @see https://www.w3.org/TR/selectors-4/#class-html - * - * @since 6.7.0 - * - * @param string $class_name The class name to transform. - * @return string The transformed class name. - */ - private function comparable_class_name( string $class_name ): string { - return self::QUIRKS_MODE === $this->compat_mode - ? strtolower( $class_name ) - : $class_name; - } - /** * Returns the string representation of the HTML Tag Processor. * From 926e7d6a49898782d75a5d2c42775ef1ab51783e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 26 Aug 2024 18:51:26 +0200 Subject: [PATCH 23/34] Add another test for quirks-mode add_class --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index fec30d3d3ef54..6d6401275f99b 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -610,7 +610,11 @@ public function test_add_class_quirks_mode() { $processor = WP_HTML_Processor::create_full_parser( '' ); $processor->next_tag( 'SPAN' ); $processor->add_class( 'upper' ); + $this->assertSame( '', $processor->get_updated_html() ); + + $processor->add_class( 'ANOTHER-UPPER' ); + $this->assertSame( '', $processor->get_updated_html() ); } /** From f1f42241363500a630b04e19328f2f6f194217eb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 26 Aug 2024 18:52:00 +0200 Subject: [PATCH 24/34] Remove since tag from class_list --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 82cba171bf015..740fac279ba02 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1138,7 +1138,6 @@ public function paused_at_incomplete_token(): bool { * // Outputs: "free lang-en " * * @since 6.4.0 - * @since 6.7.0 Class names are no longer force lower-cased. */ public function class_list() { if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { From 559315d7fa4d55b4def3bbc9bd7863e100e8c186 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 2 Sep 2024 20:09:32 +0200 Subject: [PATCH 25/34] Lowerclass yielded class_list class names in quirks mode --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 5 +++++ tests/phpunit/tests/html-api/wpHtmlProcessor.php | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 740fac279ba02..eeb54b4997cca 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1153,6 +1153,8 @@ public function class_list() { $seen = array(); + $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + $at = 0; while ( $at < strlen( $class ) ) { // Skip past any initial boundary characters. @@ -1168,6 +1170,9 @@ public function class_list() { } $name = substr( $class, $at, $length ); + if ( $is_quirks ) { + $name = strtolower( $name ); + } $at += $length; /* diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 6d6401275f99b..b7d88232a967c 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -643,7 +643,7 @@ public function test_class_list_quirks_mode() { $processor->next_tag( 'SPAN' ); $class_list = iterator_to_array( $processor->class_list() ); $this->assertSame( - array( 'A', 'a', 'B', 'b', 'É', 'é' ), + array( 'a', 'b', 'É', 'é' ), $class_list ); } From df914154334e1d49be09d974c2485ee9925cb0a8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 2 Sep 2024 20:15:42 +0200 Subject: [PATCH 26/34] Fix modifying class case when removing another class --- .../html-api/class-wp-html-tag-processor.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index eeb54b4997cca..efcd084e81873 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2297,6 +2297,8 @@ private function class_name_updates_to_attributes_updates(): void { */ $modified = false; + $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + // Remove unwanted classes by only copying the new ones. $existing_class_length = strlen( $existing_class ); while ( $at < $existing_class_length ) { @@ -2313,21 +2315,19 @@ private function class_name_updates_to_attributes_updates(): void { } $name = substr( $existing_class, $at, $name_length ); - if ( self::QUIRKS_MODE === $this->compat_mode ) { - $name = strtolower( $name ); - } + $comparable_class_name = $is_quirks ? strtolower( $name ) : $name; $at += $name_length; // If this class is marked for removal, start processing the next one. $remove_class = ( - isset( $this->classname_updates[ $name ] ) && - self::REMOVE_CLASS === $this->classname_updates[ $name ] + isset( $this->classname_updates[ $comparable_class_name ] ) && + self::REMOVE_CLASS === $this->classname_updates[ $comparable_class_name ] ); // If a class has already been seen then skip it; it should not be added twice. if ( ! $remove_class ) { - $this->classname_updates[ $name ] = self::SKIP_CLASS; + $this->classname_updates[ $comparable_class_name ] = self::SKIP_CLASS; } if ( $remove_class ) { From 0eaa9af49907b0a32bc7e4762e06aa2905c5278c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 3 Sep 2024 11:02:26 +0200 Subject: [PATCH 27/34] Fix equals sign alignment lint --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9948244462ad7..de20d4464a377 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1098,7 +1098,7 @@ private function step_initial(): bool { * > Anything else */ initial_anything_else: - $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; + $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML; return $this->step( self::REPROCESS_CURRENT_NODE ); } From ab02ca48198df1e5d60276d4a643253770d7acf3 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 3 Sep 2024 15:51:32 -0500 Subject: [PATCH 28/34] Reintroduce an explanatory comment on `compat_mode` property --- .../html-api/class-wp-html-tag-processor.php | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 85458da2c25a7..3255ab61676a9 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -514,6 +514,20 @@ class WP_HTML_Tag_Processor { /** * Indicates if the document is in quirks mode or no-quirks mode. * + * Impact on HTML parsing: + * + * - In `NO_QUIRKS_MODE` (also known as "standard mode"): + * - CSS class and ID selectors match byte-for-byte (case-sensitively). + * - A TABLE start tag `
` implicitly closes any open `P` element. + * + * - In `QUIRKS_MODE`: + * - CSS class and ID selectors match match in an ASCII case-insensitive manner. + * - A TABLE start tag `
` opens a `TABLE` element as a child of a `P` + * element if one is open. + * + * Quirks and no-quirks mode are thus mostly about styling, but have an impact when + * tables are found inside paragraph elements. + * * @see self::QUIRKS_MODE * @see self::NO_QUIRKS_MODE * From cad5d622573d55162dc1c27a88eeb7934c922cc9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 3 Sep 2024 16:06:29 -0500 Subject: [PATCH 29/34] Revert: Have `has_class()` return `null` for unsupported tokens. --- src/wp-includes/html-api/class-wp-html-processor.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 1a5d91eab194d..55b906136820f 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -4938,11 +4938,15 @@ public function remove_class( $class_name ): bool { * * @since 6.6.0 Subclassed for the HTML Processor. * + * @todo When reconstructing active formatting elements with attributes, find a way + * to indicate if the virtually-reconstructed formatting elements contain the + * wanted class name. + * * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. * @return bool|null Whether the matched tag contains the given class name, or null if not matched. */ public function has_class( $wanted_class ): ?bool { - return $this->is_virtual() ? false : parent::has_class( $wanted_class ); + return $this->is_virtual() ? null : parent::has_class( $wanted_class ); } /** From 62cbb1d014a9fa590aa4279c04bb945a4a5653d0 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 3 Sep 2024 17:30:13 -0500 Subject: [PATCH 30/34] Truncate explanatory comment on quirks mode constants and cite MDN. --- .../html-api/class-wp-html-tag-processor.php | 26 ++++--------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 3255ab61676a9..daa78bc08242a 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -4395,6 +4395,9 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { * > In no-quirks mode, the behavior is (hopefully) the desired behavior * > described by the modern HTML and CSS specifications. * + * @see self::$compat_mode + * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode + * * @since 6.7.0 * * @var string @@ -4408,27 +4411,8 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { * > Explorer 5. This is essential in order to support websites that were * > built before the widespread adoption of web standards. * - * The behavior of the HTML API is impacted in the following ways when a document is in quirks mode: - * - The CSS class name methods on the Tag and HTML Processors such as `has_class` are - * in ASCII case-insensitive. In no-quirks mode, they are case-sensitive. - * - The HTML Processor may produce a different structure, namely when HTML attempts to nest - * a TABLE under a P. In no-quirks mode this is not allowed and a TABLE start tag closes - * open P elements. In quirks mode, TABLE is allowed to appear as a descendant of P so - * the tree structure will appear as authored. For example: - * - Quirks mode: - *

- * Produces: - * HTML - *  └BODY - *   └P - *    └TABLE - * - No-quirks mode: - *

- * Produces: - * HTML - *  └BODY - *   ├P - *   └TABLE + * @see self::$compat_mode + * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode * * @since 6.7.0 * From 821730396bb38d46788f5cbdbf2e8cf79811b247 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 3 Sep 2024 17:42:03 -0500 Subject: [PATCH 31/34] Modify tests --- .../tests/html-api/wpHtmlProcessor.php | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index b7d88232a967c..7f1ff415c9140 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -576,11 +576,22 @@ public function test_has_class_no_quirks_mode() { * @covers ::class_list */ public function test_class_list_no_quirks_mode() { - $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor = WP_HTML_Processor::create_full_parser( + /* + * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE + * U+0045 is LATIN CAPITAL LETTER E + * U+0301 is COMBINING ACUTE ACCENT + * + * This tests not only that the class matching deduplicates the É, but also + * that it treats the same character in different normalization forms as + * distinct, since matching occurs on a byte-for-byte basis. + */ + "" + ); $processor->next_tag( 'SPAN' ); $class_list = iterator_to_array( $processor->class_list() ); $this->assertSame( - array( 'A', 'a', 'B', 'b', 'É', 'é' ), + array( 'A', 'a', 'B', 'b', 'É', "E\u{0301}", 'é' ), $class_list ); } @@ -614,7 +625,7 @@ public function test_add_class_quirks_mode() { $this->assertSame( '', $processor->get_updated_html() ); $processor->add_class( 'ANOTHER-UPPER' ); - $this->assertSame( '', $processor->get_updated_html() ); + $this->assertSame( '', $processor->get_updated_html() ); } /** @@ -639,11 +650,23 @@ public function test_has_class_quirks_mode() { * @covers ::class_list */ public function test_class_list_quirks_mode() { - $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor = WP_HTML_Processor::create_full_parser( + /* + * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE + * U+0045 is LATIN CAPITAL LETTER E + * U+0065 is LATIN SMALL LETTER E + * U+0301 is COMBINING ACUTE ACCENT + * + * This tests not only that the class matching deduplicates the É, but also + * that it treats the same character in different normalization forms as + * distinct, since matching occurs on a byte-for-byte basis. + */ + "" + ); $processor->next_tag( 'SPAN' ); $class_list = iterator_to_array( $processor->class_list() ); $this->assertSame( - array( 'a', 'b', 'É', 'é' ), + array( 'a', 'b', 'É', "E\u{301}", 'é' ), $class_list ); } From b417e11620a1f702b47e5274d444e16b664795ef Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 3 Sep 2024 22:32:08 -0500 Subject: [PATCH 32/34] Preserve given casing of added and removed class names. --- .../html-api/class-wp-html-tag-processor.php | 48 ++++++++++++++++--- .../tests/html-api/wpHtmlProcessor.php | 2 +- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index daa78bc08242a..4171d0d7d56a9 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3965,11 +3965,29 @@ public function add_class( $class_name ): bool { return false; } - if ( self::QUIRKS_MODE === $this->compat_mode ) { - $class_name = strtolower( $class_name ); + if ( self::QUIRKS_MODE !== $this->compat_mode ) { + $this->classname_updates[ $class_name ] = self::ADD_CLASS; + return true; } - $this->classname_updates[ $class_name ] = self::ADD_CLASS; + /* + * Because class names are matched ASCII-case-insensitively in quirks mode, + * this needs to see if a case variant of the given class name is already + * enqueued and update that existing entry, if so. This picks the casing of + * the first-provided class name for all lexical variations. + */ + $class_name_length = strlen( $class_name ); + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( + strlen( $updated_name ) === $class_name_length && + 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) + ) { + $this->classname_updates[ $updated_name ] = self::ADD_CLASS; + return true; + } + } + + $this->classname_updates[ $class_name ] = self::ADD_CLASS; return true; } @@ -3989,13 +4007,29 @@ public function remove_class( $class_name ): bool { return false; } - if ( null !== $this->tag_name_starts_at ) { - if ( self::QUIRKS_MODE === $this->compat_mode ) { - $class_name = strtolower( $class_name ); - } + if ( self::QUIRKS_MODE !== $this->compat_mode ) { $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; + return true; + } + + /* + * Because class names are matched ASCII-case-insensitively in quirks mode, + * this needs to see if a case variant of the given class name is already + * enqueued and update that existing entry, if so. This picks the casing of + * the first-provided class name for all lexical variations. + */ + $class_name_length = strlen( $class_name ); + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( + strlen( $updated_name ) === $class_name_length && + 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) + ) { + $this->classname_updates[ $updated_name ] = self::REMOVE_CLASS; + return true; + } } + $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; return true; } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 7f1ff415c9140..4deee0e3a0358 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -666,7 +666,7 @@ public function test_class_list_quirks_mode() { $processor->next_tag( 'SPAN' ); $class_list = iterator_to_array( $processor->class_list() ); $this->assertSame( - array( 'a', 'b', 'É', "E\u{301}", 'é' ), + array( 'a', 'b', 'É', "e\u{301}", 'é' ), $class_list ); } From 48e00c9e7c176523bf4c99f2710c6896e0303329 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 3 Sep 2024 23:06:07 -0500 Subject: [PATCH 33/34] Fix issue with classname updates, and change test to assert it. --- .../html-api/class-wp-html-tag-processor.php | 37 +++++++++++++------ .../tests/html-api/wpHtmlProcessor.php | 4 +- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 4171d0d7d56a9..1ea8066d97ade 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -2326,7 +2326,22 @@ private function class_name_updates_to_attributes_updates(): void { */ $modified = false; + $seen = array(); + $to_remove = array(); $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + if ( $is_quirks ) { + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( self::REMOVE_CLASS === $action ) { + $to_remove[] = strtolower( $updated_name ); + } + } + } else { + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( self::REMOVE_CLASS === $action ) { + $to_remove[] = $updated_name; + } + } + } // Remove unwanted classes by only copying the new ones. $existing_class_length = strlen( $existing_class ); @@ -2347,22 +2362,19 @@ private function class_name_updates_to_attributes_updates(): void { $comparable_class_name = $is_quirks ? strtolower( $name ) : $name; $at += $name_length; - // If this class is marked for removal, start processing the next one. - $remove_class = ( - isset( $this->classname_updates[ $comparable_class_name ] ) && - self::REMOVE_CLASS === $this->classname_updates[ $comparable_class_name ] - ); - - // If a class has already been seen then skip it; it should not be added twice. - if ( ! $remove_class ) { - $this->classname_updates[ $comparable_class_name ] = self::SKIP_CLASS; + // If this class is marked for removal, remove it and move on to the next one. + if ( in_array( $comparable_class_name, $to_remove, true ) ) { + $modified = true; + continue; } - if ( $remove_class ) { - $modified = true; + // If a class has already been seen then skip it; it should not be added twice. + if ( in_array( $comparable_class_name, $seen, true ) ) { continue; } + $seen[] = $comparable_class_name; + /* * Otherwise, append it to the new "class" attribute value. * @@ -2383,7 +2395,8 @@ private function class_name_updates_to_attributes_updates(): void { // Add new classes by appending those which haven't already been seen. foreach ( $this->classname_updates as $name => $operation ) { - if ( self::ADD_CLASS === $operation ) { + $comparable_name = $is_quirks ? strtolower( $name ) : $name; + if ( self::ADD_CLASS === $operation && ! in_array( $comparable_name, $seen, true ) ) { $modified = true; $class .= strlen( $class ) > 0 ? ' ' : ''; diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 4deee0e3a0358..e9b9063f77a7b 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -604,9 +604,9 @@ public function test_class_list_no_quirks_mode() { * @covers ::remove_class */ public function test_remove_class_quirks_mode() { - $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor = WP_HTML_Processor::create_full_parser( '' ); $processor->next_tag( 'SPAN' ); - $processor->remove_class( 'upper' ); + $processor->remove_class( 'upPer' ); $this->assertSame( '', $processor->get_updated_html() ); } From b09902608e3db4748608ad076d1a598253da2062 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 3 Sep 2024 23:07:37 -0500 Subject: [PATCH 34/34] Remove test helpers accidentally added (can't force-push) --- my.bootstrap.php | 16 ---------------- my.phpunit.xml | 23 ----------------------- 2 files changed, 39 deletions(-) delete mode 100644 my.bootstrap.php delete mode 100644 my.phpunit.xml diff --git a/my.bootstrap.php b/my.bootstrap.php deleted file mode 100644 index 83a781da4aaff..0000000000000 --- a/my.bootstrap.php +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - tests/phpunit/tests/formatting - - - tests/phpunit/tests/kses.php - - - tests/phpunit/tests/html-api - - -