From 02e9e524fefafa1f109a9cc197e75e132b7d46c4 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 21 Mar 2023 14:53:33 -0700 Subject: [PATCH 01/11] HTML API: Support extra non-normative comment constructions --- .../tests/html-api/wpHtmlTagProcessor.php | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 6b0c263c5dad6..44e3a1a579a89 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1714,6 +1714,48 @@ public function data_next_tag_ignores_script_tag_contents() { ); } + /** + * Invalid tag names are comments on tag closers. + * + * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name + * + * @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments + * + * @param $html_with_markers + * @param $before_tag + * @param $after_tag + * @return void + */ + public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers, $before_tag, $after_tag ) { + $p = new WP_HTML_Tag_Processor( $html_with_markers ); + $p->next_tag( $before_tag ); + $p->next_tag(); + + $this->assertSame( $after_tag, $p->get_tag() ); + } + + public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() { + return array( + 'Invalid tag openers as normal text' => array( + '', + 'DIV', + 'IMG', + ), + + 'Invalid tag closers as comments' => array( + '', + 'DIV', + 'BR', + ), + + 'Unexpected question mark instead of tag name' => array( + '

', + 'DIV', + 'HR', + ), + ); + } + /** * @ticket 56299 * @@ -1766,6 +1808,47 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() { ); } + public function test_allows_incorrectly_closed_comments() { + $p = new WP_HTML_Tag_Processor( '-->' ); + + $p->next_tag(); + $this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' ); + + $p->next_tag(); + $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' ); + + $p->next_tag(); + $this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' ); + } + + /** + * @dataProvider data_abruptly_closed_empty_comments + * + * @param $html + * @return void + */ + public function test_closes_abrupt_closing_of_empty_comment( $html ) { + $p = new WP_HTML_Tag_Processor( $html ); + $p->next_tag(); + $p->next_tag(); + + $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' ); + } + + public function data_abruptly_closed_empty_comments() { + return array( + 'Empty comment with two dashes only' => array( '

' ), + 'Empty comment with two dashes only, improperly closed' => array( '

' ), + 'Comment with two dashes only, improperly closed twice' => array( '

' ), + 'Empty comment with three dashes' => array( '

' ), + 'Empty comment with three dashes, improperly closed' => array( '

' ), + 'Comment with three dashes, improperly closed twice' => array( '

' ), + 'Empty comment with four dashes' => array( '

' ), + 'Empty comment with four dashes, improperly closed' => array( '

-->
' ), + 'Comment with four dashes, improperly closed twice' => array( '

--!>
' ), + ); + } + /** * @ticket 56299 * From 5900185cae8e80a08bc95c43d8ce04f0d5e2efa1 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 21 Mar 2023 14:54:08 -0700 Subject: [PATCH 02/11] Fix newly introduced and broken tests --- .../html-api/class-wp-html-tag-processor.php | 66 +++++++++++++++++-- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ab0b88693ab2e..9dcaed92f1b53 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1039,13 +1039,41 @@ private function parse_next_tag() { '-' === $html[ $at + 2 ] && '-' === $html[ $at + 3 ] ) { - $closer_at = strpos( $html, '-->', $at + 4 ); - if ( false === $closer_at ) { - return false; + $closer_at = $at + 4; + + /* + * Abruptly-closed empty comments are a sequence of dashes followed by `>`. + */ + $span_of_dashes = strspn( $html, '-', $closer_at ); + if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { + $at = $closer_at + $span_of_dashes + 1; + continue; } - $at = $closer_at + 3; - continue; + /* + * Comments may be closed by either a --> or an invalid --!>. + * The first occurrence closes the comment. + * + * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment + */ + while ( $closer_at < strlen( $html ) ) { + $closer_at = strpos( $html, '--', $closer_at ); + if ( false === $closer_at ) { + return false; + } + + if ( '>' === $html[ $closer_at + 2 ] ) { + $at = $closer_at + 3; + continue 2; + } + + if ( '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { + $at = $closer_at + 4; + continue 2; + } + + $closer_at = $closer_at + 2; + } } /* @@ -1104,9 +1132,19 @@ private function parse_next_tag() { continue; } + /* + * is a missing end tag name, which is ignored. + * + * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name + */ + if ( '>' === $html[ $at + 1 ] ) { + $at++; + continue; + } + /* * - * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state + * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( '?' === $html[ $at + 1 ] ) { $closer_at = strpos( $html, '>', $at + 2 ); @@ -1118,6 +1156,22 @@ private function parse_next_tag() { continue; } + /* + * If a non-alpha starts the tag name in a tag closer it's a comment. + * Find the first `>`, which closes the comment. + * + * See https://github.com/WordPress/wordpress-develop/pull/4256 + */ + if ( $this->is_closing_tag ) { + $closer_at = strpos( $html, '>', $at + 3 ); + if ( false === $closer_at ) { + return false; + } + + $at = $closer_at + 1; + continue; + } + ++$at; } From 46af17207ab192a4af0e2e7c9b6aaae5cd3985c7 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 21 Mar 2023 16:43:28 -0700 Subject: [PATCH 03/11] Fix implementation detail, move by one not two dashes --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +- tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 9dcaed92f1b53..7b0a495df6dad 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1072,7 +1072,7 @@ private function parse_next_tag() { continue 2; } - $closer_at = $closer_at + 2; + $closer_at = $closer_at + 1; } } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 44e3a1a579a89..5fc23875425bc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1846,6 +1846,7 @@ public function data_abruptly_closed_empty_comments() { 'Empty comment with four dashes' => array( '

' ), 'Empty comment with four dashes, improperly closed' => array( '

-->
' ), 'Comment with four dashes, improperly closed twice' => array( '

--!>
' ), + 'Comment with almost-closer inside' => array( '

--!>
' ), ); } From 2a1e1beeb6e04c091fba0c2d709f687ae76883ce Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 21 Mar 2023 16:53:45 -0700 Subject: [PATCH 04/11] Move pointer increment into loop condition to avoid infinite loops This is just a safety precaution to mitigate an accidental removal of the pointer increment. No actual problems motivated this change. --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 7b0a495df6dad..6b8dc632419ba 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1056,7 +1056,8 @@ private function parse_next_tag() { * * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment */ - while ( $closer_at < strlen( $html ) ) { + $closer_at--; // Pre-increment inside condition avoids risk of infinite looping. + while ( ++$closer_at < strlen( $html ) ) { $closer_at = strpos( $html, '--', $closer_at ); if ( false === $closer_at ) { return false; @@ -1071,8 +1072,6 @@ private function parse_next_tag() { $at = $closer_at + 4; continue 2; } - - $closer_at = $closer_at + 1; } } From f72414024d8b0f6117b70bc66b3a21b2377390db Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 28 Mar 2023 17:02:53 -0700 Subject: [PATCH 05/11] Add references to Trac ticket, annotate tests --- .../html-api/class-wp-html-tag-processor.php | 1 + .../tests/html-api/wpHtmlTagProcessor.php | 56 ++++++++++++------- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 6b8dc632419ba..ce95a45564bf5 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -971,6 +971,7 @@ private function skip_script_data() { * closing `>`; these are left for other methods. * * @since 6.2.0 + * @since 6.3.0 Passes over invalid-tag-closer-comments like "". * * @return bool Whether a tag was found before the end of the document. */ diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 5fc23875425bc..fd3e86101b62b 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1719,39 +1719,38 @@ public function data_next_tag_ignores_script_tag_contents() { * * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name * + * @ticket 58007 * @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments * - * @param $html_with_markers - * @param $before_tag - * @param $after_tag - * @return void + * @param string $html_with_markers HTML containing an invalid tag closer whose element before and element after contain the "start" and "end" CSS classes. */ - public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers, $before_tag, $after_tag ) { + public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) { $p = new WP_HTML_Tag_Processor( $html_with_markers ); - $p->next_tag( $before_tag ); + $p->next_tag( array( 'class_name' => 'start' ) ); $p->next_tag(); - $this->assertSame( $after_tag, $p->get_tag() ); + $this->assertSame( 'end', $p->get_attribute( 'class' ) ); } + /** + * Data provider. + * + * @ticket 58007 + * + * @return array[] + */ public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() { return array( 'Invalid tag openers as normal text' => array( - '
  • I <3 when outflow > inflow
', - 'DIV', - 'IMG', + '
  • I <3 when outflow > inflow
', ), 'Invalid tag closers as comments' => array( - '
  • I outflow
    inflow
', - 'DIV', - 'BR', + '
  • I outflow
    inflow
', ), 'Unexpected question mark instead of tag name' => array( - '

', - 'DIV', - 'HR', + '

', ), ); } @@ -1808,6 +1807,13 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() { ); } + /** + * Ensures that the invalid comment closing syntax "--!>" properly closes a comment. + * + * @ticket 58007 + * @covers WP_HTML_Tag_Processor::next_tag + * + */ public function test_allows_incorrectly_closed_comments() { $p = new WP_HTML_Tag_Processor( '-->' ); @@ -1822,19 +1828,29 @@ public function test_allows_incorrectly_closed_comments() { } /** + * Ensures that abruptly-closed empty comments are properly closed. + * + * @ticket 58007 + * @covers WP_HTML_Tag_Processor::next_tag * @dataProvider data_abruptly_closed_empty_comments * - * @param $html - * @return void + * @param string $html_with_after_marker HTML to test with "id=after" on element immediately following an abruptly closed comment. */ - public function test_closes_abrupt_closing_of_empty_comment( $html ) { - $p = new WP_HTML_Tag_Processor( $html ); + public function test_closes_abrupt_closing_of_empty_comment( $html_with_after_marker ) { + $p = new WP_HTML_Tag_Processor( $html_with_after_marker ); $p->next_tag(); $p->next_tag(); $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' ); } + /** + * Data provider. + * + * @ticket 58007 + * + * @return array[] + */ public function data_abruptly_closed_empty_comments() { return array( 'Empty comment with two dashes only' => array( '

' ), From 5f170ee6a8fd6e4a311650fc6ff28f9ecf1f5522 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 29 Mar 2023 08:38:17 -0700 Subject: [PATCH 06/11] Linting issues --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 6 ++---- tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 6 +++--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ce95a45564bf5..020608b9e0c5a 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -971,7 +971,7 @@ private function skip_script_data() { * closing `>`; these are left for other methods. * * @since 6.2.0 - * @since 6.3.0 Passes over invalid-tag-closer-comments like "". + * @since 6.2.1 Passes over invalid-tag-closer-comments like "". * * @return bool Whether a tag was found before the end of the document. */ @@ -1042,9 +1042,7 @@ private function parse_next_tag() { ) { $closer_at = $at + 4; - /* - * Abruptly-closed empty comments are a sequence of dashes followed by `>`. - */ + // Abruptly-closed empty comments are a sequence of dashes followed by `>`. $span_of_dashes = strspn( $html, '-', $closer_at ); if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { $at = $closer_at + $span_of_dashes + 1; diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index fd3e86101b62b..aac5593309283 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1720,9 +1720,11 @@ public function data_next_tag_ignores_script_tag_contents() { * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name * * @ticket 58007 + * * @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments * - * @param string $html_with_markers HTML containing an invalid tag closer whose element before and element after contain the "start" and "end" CSS classes. + * @param string $html_with_markers HTML containing an invalid tag closer whose element before and + * element after contain the "start" and "end" CSS classes. */ public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) { $p = new WP_HTML_Tag_Processor( $html_with_markers ); @@ -1735,8 +1737,6 @@ public function test_next_tag_ignores_invalid_first_character_of_tag_name_commen /** * Data provider. * - * @ticket 58007 - * * @return array[] */ public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() { From ebeb8707a0ce8cc119036812c3d51279b670ecaa Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 29 Mar 2023 08:54:53 -0700 Subject: [PATCH 07/11] Correct unchecked indexing, feedback Props to @costdev for noting the unchecked indices. --- .../html-api/class-wp-html-tag-processor.php | 6 ++-- .../tests/html-api/wpHtmlTagProcessor.php | 34 +++++++++++++++++-- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 020608b9e0c5a..8fe59d68c4b8d 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1055,19 +1055,19 @@ private function parse_next_tag() { * * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment */ - $closer_at--; // Pre-increment inside condition avoids risk of infinite looping. + $closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping. while ( ++$closer_at < strlen( $html ) ) { $closer_at = strpos( $html, '--', $closer_at ); if ( false === $closer_at ) { return false; } - if ( '>' === $html[ $closer_at + 2 ] ) { + if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) { $at = $closer_at + 3; continue 2; } - if ( '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { + if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { $at = $closer_at + 4; continue 2; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index aac5593309283..58d032329deff 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1827,10 +1827,42 @@ public function test_allows_incorrectly_closed_comments() { $this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' ); } + /** + * Ensures that unclosed and invalid comments don't trigger warnings or errors. + * + * @ticket 58007 + * + * @covers WP_HTML_Tag_Processor::next_tag + * @dataProvider data_html_with_unclosed_comments + * + * @param string $html_ending_before_comment_close HTML with opened comments that aren't closed + */ + public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) { + $p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close ); + + $this->assertFalse( $p->next_tag() ); + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_html_with_unclosed_comments() { + return array( + 'Basic truncated comment' => array( '