From 805fb5534f2bdff7493a057578cb0154f820aa18 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 24 Feb 2023 16:32:25 -0700 Subject: [PATCH 01/39] WIP: HTML API: Add HTML Spec class to convey information related to the HTML specification --- .../html-api/class-wp-html-spec.php | 292 ++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 src/wp-includes/html-api/class-wp-html-spec.php diff --git a/src/wp-includes/html-api/class-wp-html-spec.php b/src/wp-includes/html-api/class-wp-html-spec.php new file mode 100644 index 0000000000000..cef3e07c0bdff --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-spec.php @@ -0,0 +1,292 @@ + Date: Wed, 1 Mar 2023 18:13:43 -0700 Subject: [PATCH 02/39] WIP: HTML API: Expose self-closing flag in Tag Processor --- .../html-api/class-wp-html-tag-processor.php | 8 +++++ .../tests/html-api/wpHtmlTagProcessor.php | 32 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 0b3dab1ad2ae4..a8a1c14a1e69a 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1743,6 +1743,14 @@ public function get_tag() { return strtoupper( $tag_name ); } + public function has_self_closing_flag() { + if ( ! $this->tag_name_starts_at ) { + return false; + } + + return '/' === $this->html[ $this->tag_ends_at - 1 ]; + } + /** * Indicates if the current tag token is a tag closer. * diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index d006b3294c9ea..33692267f4f4d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -51,6 +51,38 @@ public function test_get_tag_returns_open_tag_name() { $this->assertSame( 'DIV', $p->get_tag(), 'Accessing an existing tag name did not return "div"' ); } + /** + * @ticket NEEDS TICKET + * + * @covers WP_HTML_Tag_Processor::has_self_closing_flag() + * + * @dataProvider data_has_self_closing_flag + * + * @param string $html input HTML whose first tag might contain the self-closing flag `/`. + * @param bool $flag_is_set whether the input HTML's first tag contains the self-closing flag. + */ + public function test_has_self_closing_flag_matches_input_html( $html, $flag_is_set ) { + $p = new WP_HTML_Tag_Processor( $html ); + $p->next_tag( array( 'tag_closers' => 'visit' ) ); + + if ( $flag_is_set ) { + $this->assertTrue( $p->has_self_closing_flag(), 'Did not find the self-closing tag when it was present.' ); + } else { + $this->assertFalse( $p->has_self_closing_flag(), 'Found the self-closing tag when it was absent.' ); + } + } + + public function data_has_self_closing_flag() { + return array( + array( '
', true ), + array( '
', false ), + array( '', true ), + array( '', false ), + array( '', false ), + array( '', true ), + ); + } + /** * @ticket 56299 * From 84e25cfabdbb99b2067e25cba37ea9d66ec624d1 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Mar 2023 19:31:14 -0700 Subject: [PATCH 03/39] Expand documentation --- .../html-api/class-wp-html-tag-processor.php | 17 ++++++++++++++ .../tests/html-api/wpHtmlTagProcessor.php | 23 ++++++++++++++----- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index a8a1c14a1e69a..ec8855d472b78 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1743,6 +1743,23 @@ public function get_tag() { return strtoupper( $tag_name ); } + /** + * Indicates if the currently matched tag contains the self-closing flag. + * + * No HTML elements ought to have the self-closing flag and for those, the self-closing + * flag will be ignored. For void elements this is benign because they "self close" + * automatically. For non-void HTML elements though problems will appear if someone + * intends to use a self-closing element in place of that element with an empty body. + * For HTML foreign elements and custom elements the self-closing flag determines if + * they self-close or not. + * + * This function does not determine if a tag is self-closing, + * but only if the self-closing flag is present in the syntax. + * + * @since 6.3.0 + * + * @return bool Whether the currently matched tag contains the self-closing flag. + */ public function has_self_closing_flag() { if ( ! $this->tag_name_starts_at ) { return false; diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 33692267f4f4d..7402df600e17d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -72,14 +72,25 @@ public function test_has_self_closing_flag_matches_input_html( $html, $flag_is_s } } + /** + * Data provider. HTML tags which might have a self-closing flag, and an indicator if they do. + * + * @return array[] + */ public function data_has_self_closing_flag() { return array( - array( '
', true ), - array( '
', false ), - array( '', true ), - array( '', false ), - array( '', false ), - array( '', true ), + // These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing. + 'Self-closing flag on non-void HTML element' => array( '
', true ), + 'No self-closing flag on non-void HTML element' => array( '
', false ), + // These should not have a self-closer, but are benign when used because the elements are void. + 'Self-closing flag on void HTML element' => array( '', true ), + 'No self-closing flag on void HTML element' => array( '', false ), + // These should not have a self-closer, but as part of a tag closer they are entirely ignored. + 'Self-closing flag on tag closer' => array( '', true ), + 'No self-closing flag on tag closer' => array( '', false ), + // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing. + 'Self-closing flag on a foreign element' => array( '', true ), + 'No self-closing flag on a foreign element' => array( '', false ), ); } From c18a81acbadeddddc19625bd1ef0d1d12d14806a Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Mar 2023 20:20:53 -0700 Subject: [PATCH 04/39] Appease the linting gods --- tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 7402df600e17d..7568bbfa5a759 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -83,14 +83,14 @@ public function data_has_self_closing_flag() { 'Self-closing flag on non-void HTML element' => array( '
', true ), 'No self-closing flag on non-void HTML element' => array( '
', false ), // These should not have a self-closer, but are benign when used because the elements are void. - 'Self-closing flag on void HTML element' => array( '', true ), - 'No self-closing flag on void HTML element' => array( '', false ), + 'Self-closing flag on void HTML element' => array( '', true ), + 'No self-closing flag on void HTML element' => array( '', false ), // These should not have a self-closer, but as part of a tag closer they are entirely ignored. - 'Self-closing flag on tag closer' => array( '', true ), - 'No self-closing flag on tag closer' => array( '', false ), + 'Self-closing flag on tag closer' => array( '', true ), + 'No self-closing flag on tag closer' => array( '', false ), // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing. - 'Self-closing flag on a foreign element' => array( '', true ), - 'No self-closing flag on a foreign element' => array( '', false ), + 'Self-closing flag on a foreign element' => array( '', true ), + 'No self-closing flag on a foreign element' => array( '', false ), ); } From af8d49dc2471f88cf7813941e538c22254cd5ee8 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Mar 2023 20:27:53 -0700 Subject: [PATCH 05/39] Add extra tests for syntax peculiarities. --- tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 7568bbfa5a759..ee8ec7a8b0e2f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -91,6 +91,11 @@ public function data_has_self_closing_flag() { // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing. 'Self-closing flag on a foreign element' => array( '', true ), 'No self-closing flag on a foreign element' => array( '', false ), + // These involve syntax peculiarities. + 'Self-closing flag after extra spaces' => array( '
', true ), + 'Self-closing flag after attribute' => array( '
', true ), + 'Self-closing flag after quoted attribute' => array( '
', true ), + 'Self-closing flag after boolean attribute' => array( '
', true ), ); } From 72e5de0646005bd133dcc0b854526db80f10715a Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Mar 2023 20:39:08 -0700 Subject: [PATCH 06/39] Appease the linting gods --- .../phpunit/tests/html-api/wpHtmlTagProcessor.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index ee8ec7a8b0e2f..e292afb730fea 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -80,21 +80,21 @@ public function test_has_self_closing_flag_matches_input_html( $html, $flag_is_s public function data_has_self_closing_flag() { return array( // These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing. - 'Self-closing flag on non-void HTML element' => array( '
', true ), + 'Self-closing flag on non-void HTML element' => array( '
', true ), 'No self-closing flag on non-void HTML element' => array( '
', false ), // These should not have a self-closer, but are benign when used because the elements are void. - 'Self-closing flag on void HTML element' => array( '', true ), + 'Self-closing flag on void HTML element' => array( '', true ), 'No self-closing flag on void HTML element' => array( '', false ), // These should not have a self-closer, but as part of a tag closer they are entirely ignored. - 'Self-closing flag on tag closer' => array( '', true ), + 'Self-closing flag on tag closer' => array( '', true ), 'No self-closing flag on tag closer' => array( '', false ), // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing. - 'Self-closing flag on a foreign element' => array( '', true ), + 'Self-closing flag on a foreign element' => array( '', true ), 'No self-closing flag on a foreign element' => array( '', false ), // These involve syntax peculiarities. - 'Self-closing flag after extra spaces' => array( '
', true ), - 'Self-closing flag after attribute' => array( '
', true ), - 'Self-closing flag after quoted attribute' => array( '
', true ), + 'Self-closing flag after extra spaces' => array( '
', true ), + 'Self-closing flag after attribute' => array( '
', true ), + 'Self-closing flag after quoted attribute' => array( '
', true ), 'Self-closing flag after boolean attribute' => array( '
', true ), ); } From 095a110510ac85a018e560d645892eb47d713039 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Mar 2023 20:42:05 -0700 Subject: [PATCH 07/39] Appease the linting gods --- .../tests/html-api/wpHtmlTagProcessor.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index e292afb730fea..c1f711eb7c6d4 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -80,22 +80,22 @@ public function test_has_self_closing_flag_matches_input_html( $html, $flag_is_s public function data_has_self_closing_flag() { return array( // These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing. - 'Self-closing flag on non-void HTML element' => array( '
', true ), + 'Self-closing flag on non-void HTML element' => array( '
', true ), 'No self-closing flag on non-void HTML element' => array( '
', false ), // These should not have a self-closer, but are benign when used because the elements are void. 'Self-closing flag on void HTML element' => array( '', true ), - 'No self-closing flag on void HTML element' => array( '', false ), + 'No self-closing flag on void HTML element' => array( '', false ), // These should not have a self-closer, but as part of a tag closer they are entirely ignored. 'Self-closing flag on tag closer' => array( '', true ), - 'No self-closing flag on tag closer' => array( '', false ), + 'No self-closing flag on tag closer' => array( '', false ), // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing. - 'Self-closing flag on a foreign element' => array( '', true ), - 'No self-closing flag on a foreign element' => array( '', false ), + 'Self-closing flag on a foreign element' => array( '', true ), + 'No self-closing flag on a foreign element' => array( '', false ), // These involve syntax peculiarities. - 'Self-closing flag after extra spaces' => array( '
', true ), - 'Self-closing flag after attribute' => array( '
', true ), - 'Self-closing flag after quoted attribute' => array( '
', true ), - 'Self-closing flag after boolean attribute' => array( '
', true ), + 'Self-closing flag after extra spaces' => array( '
', true ), + 'Self-closing flag after attribute' => array( '
', true ), + 'Self-closing flag after quoted attribute' => array( '
', true ), + 'Self-closing flag after boolean attribute' => array( '
', true ), ); } From 9b3f90d7bdf26c0c61ab63888f122b2bc196a83c Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 1 Mar 2023 20:44:12 -0700 Subject: [PATCH 08/39] Appease the linting gods --- tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index c1f711eb7c6d4..6ad80a29cf72f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -80,13 +80,13 @@ public function test_has_self_closing_flag_matches_input_html( $html, $flag_is_s public function data_has_self_closing_flag() { return array( // These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing. - 'Self-closing flag on non-void HTML element' => array( '
', true ), + 'Self-closing flag on non-void HTML element' => array( '
', true ), 'No self-closing flag on non-void HTML element' => array( '
', false ), // These should not have a self-closer, but are benign when used because the elements are void. - 'Self-closing flag on void HTML element' => array( '', true ), + 'Self-closing flag on void HTML element' => array( '', true ), 'No self-closing flag on void HTML element' => array( '', false ), // These should not have a self-closer, but as part of a tag closer they are entirely ignored. - 'Self-closing flag on tag closer' => array( '', true ), + 'Self-closing flag on tag closer' => array( '', true ), 'No self-closing flag on tag closer' => array( '', false ), // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing. 'Self-closing flag on a foreign element' => array( '', true ), From 396285b0c9b9f4b32c1dd59e6d23667b458f5a92 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 6 Mar 2023 22:18:57 -0700 Subject: [PATCH 09/39] Add `ensure_support()` and tests --- .../html-api/class-wp-html-processor.php | 76 +++++++++++++++++++ .../html-api/wpHtmlTagProcessor-support.php | 72 ++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 src/wp-includes/html-api/class-wp-html-processor.php create mode 100644 tests/phpunit/tests/html-api/wpHtmlTagProcessor-support.php diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php new file mode 100644 index 0000000000000..16dc92f61ec5f --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -0,0 +1,76 @@ +fully_supported_input ) { + return $this->fully_supported_input; + } + + $stack = array(); + + $p = new WP_HTML_Tag_Processor( $this->html ); + while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) { + $tag_name = $p->get_tag(); + + if ( ! $p->is_tag_closer() ) { + $element = WP_HTML_Spec::element_info( $tag_name ); + + $self_closes = $element::is_void || ( ! $element::is_html && $p->has_self_closing_flag() ); + if ( ! $self_closes ) { + $stack[] = $tag_name; + } + } else { + if ( end( $stack ) === $tag_name ) { + array_pop( $stack ); + continue; + } + + $this->fully_supported_input = false; + return false; + } + } + + $this->fully_supported_input = 0 === count( $stack ); + + return $this->fully_supported_input; + } + + public function next_tag( $query = null ) { + if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { + return false; + } + + return parent::next_tag( $query ); + } + + public function proceed_to_end_of_next_tag() { + if ( $this->fully_supported_input ) { + return false; + } + + $open_elements = array(); + + while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) ) { + $tag_name = $this->get_tag(); + $element = WP_HTML_Spec::element_info( $tag_name ); + + if ( $element::is_void ) { + return true; + } + + if ( $this->is_tag_closer() ) { + if ( 0 === count( $open_elements ) || $open_elements[ count( $open_elements ) - 1 ] !== $tag_name ) { + $this->fully_supported_input = true; + return false; + } + + array_pop( $open_elements ); + if ( 0 === count( $open_elements ) ) { + return true; + } + } + } + } +} diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-support.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-support.php new file mode 100644 index 0000000000000..11b1eb8c418ca --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor-support.php @@ -0,0 +1,72 @@ +assertTrue( $p->ensure_support(), "Detected that supported HTML input isn't supported." ); + } + + /** + * @return array[] + */ + public function data_fully_balanced_html() { + return array( + 'Fully-nested balanced tags' => array( '

Test

' ), + 'Sibling nested tags' => array( '
  • One
  • Two
  • Three
' ), + 'Top-level siblings' => array( '
  • One
  • Two
  • Three
  • ' ), + 'Void tags' => array( '

    ' ), + 'Void tags with invalid self-closing flags' => array( '

    ' ), + 'Invalid self-closing non-void' => array( 'This
    is (not) empty.
    ' ), + 'Nested with void tags' => array( '

    Text
    More Text

    '), + 'HTML foreign elements' => array( ''), + ); + } + + /** + * @dataProvider data_not_fully_balanced_html + */ + public function test_detects_not_fully_balanced_html( $html ) { + $p = new WP_HTML_Processor( $html ); + + $this->assertFalse( $p->ensure_support(), 'Detected that unsupported HTML input is supported.' ); + } + + /** + * @return array[] + */ + public function data_not_fully_balanced_html() { + return array( + 'Unclosed tag' => array( '

    Unclosed paragraph' ), + 'Unclosed nested tag' => array( '

    Unclosed paragraph

    ' ), + 'Overlapping tags' => array( '

    Important

    ' ), + 'Overlapping nested tags' => array( '

    Important

    ' ), + 'Invalid self-closing non-void' => array( 'This
    is (not) empty.' ), + 'Un-closed HTML foreign self-closer' => array( ''), + 'Improperly-closed HTML foreign self-closer' => array( ''), + ); + } + + /** + * @dataProvider data_not_fully_balanced_html + */ + public function test_does_not_call_next_tag_for_unsupported_html( $html ) { + $p = new WP_HTML_Processor( $html ); + + $this->assertFalse( $p->next_tag(), "Advanced to '{$p->get_tag()}' even though input HTML isn't supported." ); + } +} From b2e6856b6604b272d647b1b4e2ab986eec0a3beb Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Mon, 6 Mar 2023 23:20:58 -0700 Subject: [PATCH 10/39] WIP: Work on next_sibling --- .../html-api/class-wp-html-processor.php | 55 +++++++++++++------ ...Tests_HtmlApi_wpHtmlProcessor_Support.php} | 35 ++++++++++++ 2 files changed, 73 insertions(+), 17 deletions(-) rename tests/phpunit/tests/html-api/{wpHtmlTagProcessor-support.php => Tests_HtmlApi_wpHtmlProcessor_Support.php} (66%) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 16dc92f61ec5f..c7de3079970f0 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -2,6 +2,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { public $fully_supported_input = null; + public $open_elements = array(); public function ensure_support() { if ( null !== $this->fully_supported_input ) { @@ -42,35 +43,55 @@ public function next_tag( $query = null ) { return false; } - return parent::next_tag( $query ); + if ( false === parent::next_tag( array( 'tag_closers' => 'visit' ) ) ) { + return false; + } + + $tag_name = $this->get_tag(); + $element = WP_HTML_Spec::element_info( $tag_name ); + + $self_closes = $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ); + if ( $self_closes ) { + return true; + } + + if ( $this->is_tag_closer() ) { + array_pop( $this->open_elements ); + } else { + $this->open_elements[] = $tag_name; + } + + return true; } - public function proceed_to_end_of_next_tag() { + public function next_sibling() { if ( $this->fully_supported_input ) { return false; } - $open_elements = array(); + $starting_depth = count( $this->open_elements ); - while ( $this->next_tag( array( 'tag_closers' => 'visit' ) ) ) { - $tag_name = $this->get_tag(); - $element = WP_HTML_Spec::element_info( $tag_name ); + /* + * If we aren't already inside a tag then advance to the first one. + * If that tag is self-closing then we're done. Otherwise, open the + * stack with that tag name and prepare to close out the stack. + */ + if ( ! $this->get_tag() ) { + if ( ! $this->next_tag() ) { + return false; + } - if ( $element::is_void ) { + if ( $starting_depth === count( $this->open_elements ) ) { return true; } + } - if ( $this->is_tag_closer() ) { - if ( 0 === count( $open_elements ) || $open_elements[ count( $open_elements ) - 1 ] !== $tag_name ) { - $this->fully_supported_input = true; - return false; - } - - array_pop( $open_elements ); - if ( 0 === count( $open_elements ) ) { - return true; - } + while ( $this->next_tag() ) { + if ( $starting_depth === count( $this->open_elements ) ) { + return true; } } + + return false; } } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-support.php b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php similarity index 66% rename from tests/phpunit/tests/html-api/wpHtmlTagProcessor-support.php rename to tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php index 11b1eb8c418ca..90d11a7f60af9 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor-support.php +++ b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php @@ -6,6 +6,17 @@ * @subpackage HTML-API */ +require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-attribute-token.php'; +require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-span.php'; +require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-spec.php'; +require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-text-replacement.php'; +require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-tag-processor.php'; +require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-processor.php'; + +class WP_UnitTestCase extends PHPUnit\Framework\TestCase {} + +function esc_attr( $s ) { return str_replace( [ '<', '>', '"' ], [ '<', '>', '"' ], $s ); } + /** * @group html-api * @@ -69,4 +80,28 @@ public function test_does_not_call_next_tag_for_unsupported_html( $html ) { $this->assertFalse( $p->next_tag(), "Advanced to '{$p->get_tag()}' even though input HTML isn't supported." ); } + + /** + * @dataProvider data_next_sibling + */ + public function test_finds_next_sibling( $html ) { + $p = new WP_HTML_Processor( $html ); + + while ( true !== $p->get_attribute( 'start' ) ) { + $p->next_tag(); + } + $p->next_sibling(); + + $this->assertTrue( $p->get_attribute( 'end' ) ); + } + + /** + * @return array[] + */ + public function data_next_sibling() { + return array( + 'Top-level siblings' => array( '' ), + 'List items' => array( '
    • One
    • Two
    • Three
    • Four
    ' ), + ); + } } From 65c9f5b37335d7888fd7b0831691ad2377b5996b Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 9 Mar 2023 17:47:18 -0700 Subject: [PATCH 11/39] Add the rest of it --- .../html-api/class-wp-html-processor.php | 182 ++++++++++++- .../Tests_HtmlApi_wpHtmlProcessor_Support.php | 248 +++++++++++++++++- 2 files changed, 413 insertions(+), 17 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index c7de3079970f0..29b6fe81955f6 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -43,6 +43,14 @@ public function next_tag( $query = null ) { return false; } + if ( 0 < count( $this->open_elements ) ) { + $element = WP_HTML_Spec::element_info( end( $this->open_elements ) ); + // @TODO: Handle self-closing HTML foreign elements: must convey self-closing flag on stack. + if ( $element::is_void ) { + array_pop( $this->open_elements ); + } + } + if ( false === parent::next_tag( array( 'tag_closers' => 'visit' ) ) ) { return false; } @@ -52,6 +60,7 @@ public function next_tag( $query = null ) { $self_closes = $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ); if ( $self_closes ) { + $this->open_elements[] = $tag_name; return true; } @@ -65,33 +74,186 @@ public function next_tag( $query = null ) { } public function next_sibling() { - if ( $this->fully_supported_input ) { + if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { return false; } $starting_depth = count( $this->open_elements ); - /* - * If we aren't already inside a tag then advance to the first one. - * If that tag is self-closing then we're done. Otherwise, open the - * stack with that tag name and prepare to close out the stack. - */ - if ( ! $this->get_tag() ) { - if ( ! $this->next_tag() ) { + while ( $this->next_tag() ) { + $current_depth = count( $this->open_elements ); + + if ( ! $this->is_tag_closer() && $current_depth === $starting_depth ) { + return true; + } + + if ( ! $this->is_tag_closer() && $current_depth < $starting_depth ) { return false; } + } + + return false; + } + + public function next_child() { + if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { + return false; + } - if ( $starting_depth === count( $this->open_elements ) ) { + $starting_depth = count( $this->open_elements ); + + while ( $this->next_tag() ) { + $current_depth = count( $this->open_elements ); + + if ( ! $this->is_tag_closer() && $current_depth === $starting_depth + 1 ) { return true; } } + return false; + } + + private function find_closing_tag() { + $starting_depth = count( $this->open_elements ); + while ( $this->next_tag() ) { - if ( $starting_depth === count( $this->open_elements ) ) { + $current_depth = count( $this->open_elements ); + + if ( $this->is_tag_closer() && $current_depth < $starting_depth ) { return true; } } return false; } + + public function get_inner_content() { + if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { + return false; + } + + if ( ! $this->get_tag() || $this->is_tag_closer() ) { + return false; + } + + $element = WP_HTML_Spec::element_info( $this->get_tag() ); + if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + return false; + } + + // @TODO: Unique bookmark names + $this->set_bookmark( 'start' ); + if ( ! $this->find_closing_tag() ) { + return false; + } + $this->set_bookmark( 'end' ); + + $start = $this->bookmarks['start']->end + 1; + $end = $this->bookmarks['end']->start - 1; + $inner_content = substr( $this->html, $start, $end - $start + 1 ); + + $this->release_bookmark( 'start' ); + $this->release_bookmark( 'end' ); + + return $inner_content; + } + + public function set_inner_content( $new_html ) { + if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { + return false; + } + + if ( ! $this->get_tag() || $this->is_tag_closer() ) { + return false; + } + + $element = WP_HTML_Spec::element_info( $this->get_tag() ); + if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + return false; + } + + // @TODO: Unique bookmark names + $this->set_bookmark( 'start' ); + if ( ! $this->find_closing_tag() ) { + return false; + } + $this->set_bookmark( 'end' ); + + $start = $this->bookmarks['start']->end + 1; + $end = $this->bookmarks['end']->start; + $this->lexical_updates[] = new WP_HTML_Text_Replacement( $start, $end, $new_html ); + $this->get_updated_html(); + $this->seek( 'start' ); + + $this->release_bookmark( 'start' ); + $this->release_bookmark( 'end' ); + } + + public function get_outer_content() { + if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { + return false; + } + + if ( ! $this->get_tag() || $this->is_tag_closer() ) { + return false; + } + + $element = WP_HTML_Spec::element_info( $this->get_tag() ); + if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + $this->set_bookmark( 'start' ); + $here = $this->bookmarks['start']; + return substr( $this->html, $here->start, $here->end - $here->start + 1 ); + } + + // @TODO: Unique bookmark names + $this->set_bookmark( 'start' ); + if ( ! $this->find_closing_tag() ) { + return false; + } + $this->set_bookmark( 'end' ); + + $start = $this->bookmarks['start']->start; + $end = $this->bookmarks['end']->end; + $inner_content = substr( $this->html, $start, $end - $start + 1 ); + $this->seek( 'start' ); + + $this->release_bookmark( 'start' ); + $this->release_bookmark( 'end' ); + + return $inner_content; + } + + public function set_outer_content( $new_html ) { + if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { + return false; + } + + if ( ! $this->get_tag() || $this->is_tag_closer() ) { + return false; + } + + $element = WP_HTML_Spec::element_info( $this->get_tag() ); + // @TODO: Replace void and self-closing tags. + if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + return false; + } + + // @TODO: Unique bookmark names + $this->set_bookmark( 'start' ); + if ( ! $this->find_closing_tag() ) { + return false; + } + $this->set_bookmark( 'end' ); + + $start = $this->bookmarks['start']->start; + $end = $this->bookmarks['end']->end + 1; + $this->lexical_updates[] = new WP_HTML_Text_Replacement( $start, $end, $new_html ); + $this->get_updated_html(); + $this->bookmarks['start']->start = $start; + $this->bookmarks['start']->end = $start; + $this->seek( 'start' ); + + $this->release_bookmark( 'start' ); + $this->release_bookmark( 'end' ); + } } diff --git a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php index 90d11a7f60af9..f83529f9f0d1c 100644 --- a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php +++ b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php @@ -23,6 +23,16 @@ function esc_attr( $s ) { return str_replace( [ '<', '>', '"' ], [ '<', '> * @coversDefaultClass WP_HTML_Processor */ class Tests_HtmlApi_wpHtmlProcessor_Support extends WP_UnitTestCase { + private function html_processor_at_start( $html ) { + $p = new WP_HTML_Processor( $html ); + + while ( true !== $p->get_attribute( 'start' ) && $p->next_tag() ) { + continue; + } + + return $p; + } + /** * @dataProvider data_fully_balanced_html */ @@ -85,14 +95,10 @@ public function test_does_not_call_next_tag_for_unsupported_html( $html ) { * @dataProvider data_next_sibling */ public function test_finds_next_sibling( $html ) { - $p = new WP_HTML_Processor( $html ); - - while ( true !== $p->get_attribute( 'start' ) ) { - $p->next_tag(); - } + $p = $this->html_processor_at_start( $html ); $p->next_sibling(); - $this->assertTrue( $p->get_attribute( 'end' ) ); + $this->assertTrue( $p->get_attribute( 'end' ), 'Did not finding sibling tag.' ); } /** @@ -100,8 +106,236 @@ public function test_finds_next_sibling( $html ) { */ public function data_next_sibling() { return array( + 'Leading markup' => array( 'before' ), 'Top-level siblings' => array( '' ), - 'List items' => array( '
    • One
    • Two
    • Three
    • Four
    ' ), + 'Nested siblings' => array( '
    • One
    • Two
    • Three
    • Four
    ' ), + 'Nesting avalanche' => array( '
    '), + ); + } + + /** + * @dataProvider data_no_next_sibling + */ + public function test_finds_no_next_sibling_when_none_exists( $html ) { + $p = $this->html_processor_at_start( $html ); + $this->assertFalse( $p->next_sibling(), 'Found a sibling when none exists.' ); + } + + public function data_no_next_sibling() { + return array( + 'Leading markup' => array( 'before
    ' ), + 'No more siblings' => array( '
    ' ), + 'Tag-closing avalanche' => array( '
    '), + ); + } + + /** + * @dataProvider data_next_child + */ + public function test_finds_next_child( $html ) { + $p = $this->html_processor_at_start( $html ); + + $p->next_child(); + $this->assertTrue( $p->get_attribute( 'end' ), 'Did not find child tag.' ); + } + + public function data_next_child() { + return array( + 'Leading markup' => array( 'this is not tag content
    afterwards' ), + 'Normal nesting' => array( '
    • text

    ' ), + ); + } + + /** + * @dataProvider data_no_next_child + */ + public function test_finds_no_next_child( $html ) { + $p = $this->html_processor_at_start( $html ); + + $this->assertFalse( $p->next_child(), 'Did not find child tag.' ); + } + + public function data_no_next_child() { + return array( + 'Leading markup' => array( 'this is not tag content
    afterwards' ), + 'Already nested' => array( '
  • text

  • ', ), + ); + } + + public function test_finds_chain_of_elements() { + $p = new WP_HTML_Processor( << +

    Things I could be eating right now

    +
      +
    • Apples
    • +
    • Pears
    • +
    • Prickly pears
    • +
    • + +
      + Scwarzwälder Kirschtorte +
        +
      • Flour
      • +
      • Eggs
      • +
      • Sugar
      • +
      • Cream
      • +
      • Cherries
      • +
      • Chocolate
      • +
      +
      +
    • +
    + +HTML + ); + + $p->next_tag(); + $p->next_child(); + $p->next_sibling(); + $p->next_child(); + $p->next_sibling(); + $p->next_sibling(); + $p->next_sibling(); + $p->next_child(); + $p->next_sibling(); + $p->next_child(); + $p->next_sibling(); + $p->next_child(); + $p->next_sibling(); + $p->next_sibling(); + + $this->assertTrue( $p->get_attribute( 'this-one' ) ); + } + + /** + * @dataProvider data_inner_content + */ + public function test_get_inner_content( $before, $inner, $after ) { + $p = $this->html_processor_at_start( $before . $inner . $after ); + + $this->assertSame( $inner, $p->get_inner_content(), 'Found the wrong inner content.' ); + } + + public function data_inner_content() { + return array( + 'Leading text' => array( '
    ', 'text', '
    ' ), + 'Single tag' => array( '
    ', 'text', '
    ' ), + 'Nested tags' => array( '
    ', '
    • One
    • Two
    ', '
    ' ), + 'Complex HTML' => array( + << +

    Things I could be eating right now

    +
      +
    • Apples
    • +
    • Pears
    • +
    • Prickly pears
    • +
    • + +
      +HTML, + <<Scwarzwälder Kirschtorte +
        +
      • Flour
      • +
      • Eggs
      • +
      • Sugar
      • +
      • Cream
      • +
      • Cherries
      • +
      • Chocolate
      • +
      + +HTML, + << +
    • +
    + +HTML + ), + ); + } + + /** + * @dataProvider data_set_inner_content + */ + public function test_set_inner_content( $before, $old, $new, $after ) { + $p = $this->html_processor_at_start( $before . $old . $after ); + + $p->set_inner_content( $new ); + + $this->assertSame( $before . $new . $after, $p->get_updated_html(), 'Did not properly swap out inner content.' ); + } + + public function data_set_inner_content() { + return array( + 'Single tag' => array( '
    ', 'boring text', 'exciting text', '
    ' ), + 'Nested tags' => array( '
    • ', '

      This is neat

      ', 'this
      is
      not', '
    ' ), + ); + } + + /** + * @dataProvider data_outer_content + */ + public function test_get_outer_content( $before, $outer, $after ) { + $p = $this->html_processor_at_start( $before . $outer . $after ); + + $this->assertSame( $outer, $p->get_outer_content(), 'Found the wrong outer content.' ); + } + + public function data_outer_content() { + return array( + 'Leading text' => array( '', '
    text
    ', 'when will it end?' ), + 'Single tag' => array( '', '
    text
    ', '' ), + 'Nested tags' => array( '
    ', '
    • One
    • Two
    ', '
    ' ), + 'Complex HTML' => array( + << +

    Things I could be eating right now

    +
      +
    • Apples
    • +
    • Pears
    • +
    • Prickly pears
    • +
    • + + +HTML, + << + Scwarzwälder Kirschtorte +
        +
      • Flour
      • +
      • Eggs
      • +
      • Sugar
      • +
      • Cream
      • +
      • Cherries
      • +
      • Chocolate
      • +
      + +HTML, + << +
    + +HTML + ), + ); + } + + /** + * @dataProvider data_set_outer_content + */ + public function test_set_outer_content( $before, $old, $new, $after ) { + $p = $this->html_processor_at_start( $before . $old . $after ); + + $p->set_outer_content( $new ); + + $this->assertSame( $before . $new . $after, $p->get_updated_html(), 'Did not properly swap out outer content.' ); + } + + public function data_set_outer_content() { + return array( + 'Single tag' => array( '', '
    boring text
    ', 'exciting text', '' ), + 'Nested tags' => array( '
      ', '
    • This is neat

    • ', '
    • this is
      not
    • ', '
    ' ), ); } } From 247966b4c4b9fe35727f86e88e77f6b2c528f7a9 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 21 Mar 2023 16:56:34 -0700 Subject: [PATCH 12/39] Rename "next_child" to "first_child" to better match its purpose --- .../html-api/class-wp-html-processor.php | 2 +- .../Tests_HtmlApi_wpHtmlProcessor_Support.php | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 29b6fe81955f6..bee001da42d88 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -95,7 +95,7 @@ public function next_sibling() { return false; } - public function next_child() { + public function first_child() { if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { return false; } diff --git a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php index f83529f9f0d1c..85848c7f31f07 100644 --- a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php +++ b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php @@ -135,7 +135,7 @@ public function data_no_next_sibling() { public function test_finds_next_child( $html ) { $p = $this->html_processor_at_start( $html ); - $p->next_child(); + $p->first_child(); $this->assertTrue( $p->get_attribute( 'end' ), 'Did not find child tag.' ); } @@ -152,7 +152,7 @@ public function data_next_child() { public function test_finds_no_next_child( $html ) { $p = $this->html_processor_at_start( $html ); - $this->assertFalse( $p->next_child(), 'Did not find child tag.' ); + $this->assertFalse( $p->first_child(), 'Did not find child tag.' ); } public function data_no_next_child() { @@ -190,17 +190,17 @@ public function test_finds_chain_of_elements() { ); $p->next_tag(); - $p->next_child(); + $p->first_child(); $p->next_sibling(); - $p->next_child(); + $p->first_child(); $p->next_sibling(); $p->next_sibling(); $p->next_sibling(); - $p->next_child(); + $p->first_child(); $p->next_sibling(); - $p->next_child(); + $p->first_child(); $p->next_sibling(); - $p->next_child(); + $p->first_child(); $p->next_sibling(); $p->next_sibling(); From f1ccd9be082b2c2c47e7a4ec72f86f1f7979f751 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 21 Mar 2023 18:01:03 -0700 Subject: [PATCH 13/39] Ensure no children are found for void elements. --- .../tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php index 85848c7f31f07..7b3a97e549bfe 100644 --- a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php +++ b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php @@ -159,6 +159,7 @@ public function data_no_next_child() { return array( 'Leading markup' => array( 'this is not tag content
    afterwards' ), 'Already nested' => array( '
  • text

  • ', ), + 'Void element' => array( '' ), ); } From fb31ea1c0824fcfa2cca57bf05d213cafdd2ee93 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 21 Mar 2023 18:02:04 -0700 Subject: [PATCH 14/39] rename next_child to first_child in test names --- .../Tests_HtmlApi_wpHtmlProcessor_Support.php | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php index 7b3a97e549bfe..9acb589360356 100644 --- a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php +++ b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php @@ -130,16 +130,16 @@ public function data_no_next_sibling() { } /** - * @dataProvider data_next_child + * @dataProvider data_first_child */ - public function test_finds_next_child( $html ) { + public function test_finds_first_child( $html ) { $p = $this->html_processor_at_start( $html ); $p->first_child(); $this->assertTrue( $p->get_attribute( 'end' ), 'Did not find child tag.' ); } - public function data_next_child() { + public function data_first_child() { return array( 'Leading markup' => array( 'this is not tag content
    afterwards' ), 'Normal nesting' => array( '
    • text

    ' ), @@ -147,15 +147,15 @@ public function data_next_child() { } /** - * @dataProvider data_no_next_child + * @dataProvider data_no_first_child */ - public function test_finds_no_next_child( $html ) { + public function test_finds_no_first_child( $html ) { $p = $this->html_processor_at_start( $html ); $this->assertFalse( $p->first_child(), 'Did not find child tag.' ); } - public function data_no_next_child() { + public function data_no_first_child() { return array( 'Leading markup' => array( 'this is not tag content
    afterwards' ), 'Already nested' => array( '
  • text

  • ', ), From e945f16fb8843c209083d03375aa0d162f824f49 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 29 Mar 2023 05:35:30 -0700 Subject: [PATCH 15/39] Add Trac ticket reference to tests --- tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 6ad80a29cf72f..f938a14441a58 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -52,7 +52,7 @@ public function test_get_tag_returns_open_tag_name() { } /** - * @ticket NEEDS TICKET + * @ticket 58009 TICKET * * @covers WP_HTML_Tag_Processor::has_self_closing_flag() * From 96053c9c737891287e68a1ead2113fa3e79284b4 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 29 Mar 2023 05:40:41 -0700 Subject: [PATCH 16/39] fixup! Add Trac ticket reference to tests --- tests/phpunit/tests/html-api/wpHtmlTagProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index f938a14441a58..3d12f065f5168 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -52,7 +52,7 @@ public function test_get_tag_returns_open_tag_name() { } /** - * @ticket 58009 TICKET + * @ticket 58009 * * @covers WP_HTML_Tag_Processor::has_self_closing_flag() * From 04a37b5e1fbfb333f584d37f3286889f65e4f082 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 29 Mar 2023 07:33:40 -0700 Subject: [PATCH 17/39] Linting issues --- .../html-api/class-wp-html-processor.php | 14 +- .../html-api/class-wp-html-spec.php | 894 +++++++++++++----- .../Tests_HtmlApi_wpHtmlProcessor_Support.php | 32 +- 3 files changed, 659 insertions(+), 281 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index bee001da42d88..e9e64647506e2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -18,7 +18,7 @@ public function ensure_support() { if ( ! $p->is_tag_closer() ) { $element = WP_HTML_Spec::element_info( $tag_name ); - $self_closes = $element::is_void || ( ! $element::is_html && $p->has_self_closing_flag() ); + $self_closes = $element::IS_VOID || ( ! $element::IS_HTML && $p->has_self_closing_flag() ); if ( ! $self_closes ) { $stack[] = $tag_name; } @@ -46,7 +46,7 @@ public function next_tag( $query = null ) { if ( 0 < count( $this->open_elements ) ) { $element = WP_HTML_Spec::element_info( end( $this->open_elements ) ); // @TODO: Handle self-closing HTML foreign elements: must convey self-closing flag on stack. - if ( $element::is_void ) { + if ( $element::IS_VOID ) { array_pop( $this->open_elements ); } } @@ -58,7 +58,7 @@ public function next_tag( $query = null ) { $tag_name = $this->get_tag(); $element = WP_HTML_Spec::element_info( $tag_name ); - $self_closes = $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ); + $self_closes = $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ); if ( $self_closes ) { $this->open_elements[] = $tag_name; return true; @@ -137,7 +137,7 @@ public function get_inner_content() { } $element = WP_HTML_Spec::element_info( $this->get_tag() ); - if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + if ( $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ) ) { return false; } @@ -168,7 +168,7 @@ public function set_inner_content( $new_html ) { } $element = WP_HTML_Spec::element_info( $this->get_tag() ); - if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + if ( $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ) ) { return false; } @@ -199,7 +199,7 @@ public function get_outer_content() { } $element = WP_HTML_Spec::element_info( $this->get_tag() ); - if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + if ( $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ) ) { $this->set_bookmark( 'start' ); $here = $this->bookmarks['start']; return substr( $this->html, $here->start, $here->end - $here->start + 1 ); @@ -234,7 +234,7 @@ public function set_outer_content( $new_html ) { $element = WP_HTML_Spec::element_info( $this->get_tag() ); // @TODO: Replace void and self-closing tags. - if ( $element::is_void || ( ! $element::is_html && $this->has_self_closing_flag() ) ) { + if ( $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ) ) { return false; } diff --git a/src/wp-includes/html-api/class-wp-html-spec.php b/src/wp-includes/html-api/class-wp-html-spec.php index cef3e07c0bdff..a82826903a422 100644 --- a/src/wp-includes/html-api/class-wp-html-spec.php +++ b/src/wp-includes/html-api/class-wp-html-spec.php @@ -8,7 +8,7 @@ */ class WP_HTML_Spec { /** - * Returns meta information about an HTML tag. + * Returns class defining attributes of an element with the given name. * * @since 6.3.0 * @@ -16,277 +16,651 @@ class WP_HTML_Spec { * @return WP_HTML_Element_Meta::class */ public static function element_info( $tag_name ) { - switch ( strtolower( $tag_name ) ) { + // We have to force casing on this because someone might call it with mixed casing. + switch ( strtoupper( $tag_name ) ) { // Normal elements - case 'a': return WP_HTMLAnchorElement::class; - case 'abbr': return WP_HTMLAbbrElement::class; - case 'address': return WP_HTMLAddressElement::class; - case 'area': return WP_HTMLAreaElement::class; - case 'article': return WP_HTMLArticleElement::class; - case 'aside': return WP_HTMLAsideElement::class; - case 'audio': return WP_HTMLAudioElement::class; - case 'b': return WP_HTMLBElement::class; - case 'base': return WP_HTMLBaseElement::class; - case 'bdi': return WP_HTMLBdiElement::class; - case 'bdo': return WP_HTMLBdoElement::class; - case 'blockquote': return WP_HTMLBlockquoteElement::class; - case 'body': return WP_HTMLBodyElement::class; - case 'br': return WP_HTMLBrElement::class; - case 'button': return WP_HTMLButtonElement::class; - case 'canvas': return WP_HTMLCanvasElement::class; - case 'caption': return WP_HTMLCaptionElement::class; - case 'cite': return WP_HTMLCiteElement::class; - case 'code': return WP_HTMLCodeElement::class; - case 'col': return WP_HTMLColElement::class; - case 'colgroup': return WP_HTMLColgroupElement::class; - case 'data': return WP_HTMLDataElement::class; - case 'datalist': return WP_HTMLDataListElement::class; - case 'dd': return WP_HTMLDdElement::class; - case 'del': return WP_HTMLDelElement::class; - case 'details': return WP_HTMLDetailsElement::class; - case 'dfn': return WP_HTMLDfnElement::class; - case 'dialog': return WP_HTMLDialogElement::class; - case 'div': return WP_HTMLDivElement::class; - case 'dl': return WP_HTMLDlElement::class; - case 'dt': return WP_HTMLDtElement::class; - case 'em': return WP_HTMLEmElement::class; - case 'embed': return WP_HTMLEmbedElement::class; - case 'fieldset': return WP_HTMLFieldsetElement::class; - case 'figcaption': return WP_HTMLFigcaptionElement::class; - case 'figure': return WP_HTMLFigureElement::class; - case 'footer': return WP_HTMLFooterElement::class; - case 'form': return WP_HTMLFormElement::class; - case 'h1': return WP_HTMLH1Element::class; - case 'h2': return WP_HTMLH2Element::class; - case 'h3': return WP_HTMLH3Element::class; - case 'h4': return WP_HTMLH4Element::class; - case 'h5': return WP_HTMLH5Element::class; - case 'h6': return WP_HTMLH6Element::class; - case 'head': return WP_HTMLHeadElement::class; - case 'header': return WP_HTMLHeaderElement::class; - case 'hgroup': return WP_HTMLHgropuElement::class; - case 'hr': return WP_HTMLHrElement::class; - case 'html': return WP_HTMLHtmlElement::class; - case 'i': return WP_HTMLIElement::class; - case 'iframe': return WP_HTMLIframeElement::class; - case 'img': return WP_HTMLImgElement::class; - case 'input': return WP_HTMLInputElement::class; - case 'ins': return WP_HTMLInsElement::class; - case 'kbd': return WP_HTMLKbdElement::class; - case 'label': return WP_HTMLLabelElement::class; - case 'legend': return WP_HTMLLegendElement::class; - case 'li': return WP_HTMLLiElement::class; - case 'link': return WP_HTMLLinkElement::class; - case 'main': return WP_HTMLMainElement::class; - case 'map': return WP_HTMLMapElement::class; - case 'mark': return WP_HTMLMarkElement::class; - case 'math': return WP_HTMLMathElement::class; - case 'menu': return WP_HTMLMenuElement::class; - case 'meta': return WP_HTMLMetaElement::class; - case 'meter': return WP_HTMLMeterElement::class; - case 'nav': return WP_HTMLNavElement::class; - case 'noscript': return WP_HTMLNoscriptElement::class; - case 'object': return WP_HTMLObjectElement::class; - case 'ol': return WP_HTMLOlElement::class; - case 'optgroup': return WP_HTMLOptgroupElement::class; - case 'option': return WP_HTMLOptionElement::class; - case 'output': return WP_HTMLOutputElement::class; - case 'p': return WP_HTMLPElement::class; - case 'picture': return WP_HTMLPictureElement::class; - case 'pre': return WP_HTMLPreElement::class; - case 'progress': return WP_HTMLProgressElement::class; - case 'q': return WP_HTMLQElement::class; - case 'rp': return WP_HTMLRpElement::class; - case 'rt': return WP_HTMLRtElement::class; - case 'ruby': return WP_HTMLRubyElement::class; - case 's': return WP_HTMLSElement::class; - case 'samp': return WP_HTMLSampElement::class; - case 'script': return WP_HTMLScriptElement::class; - case 'section': return WP_HTMLSectionElement::class; - case 'select': return WP_HTMLSelectElement::class; - case 'slot': return WP_HTMLSlotElement::class; - case 'small': return WP_HTMLSmallElement::class; - case 'source': return WP_HTMLSourceElement::class; - case 'span': return WP_HTMLSpanElement::class; - case 'strong': return WP_HTMLStrongElement::class; - case 'style': return WP_HTMLStyleElement::class; - case 'sub': return WP_HTMLSubElement::class; - case 'summary': return WP_HTMLSummaryElement::class; - case 'sup': return WP_HTMLSupElement::class; - case 'svg': return WP_HTMLSvgElement::class; - case 'table': return WP_HTMLTableElement::class; - case 'tbody': return WP_HTMLTbodyElement::class; - case 'td': return WP_HTMLTdElement::class; - case 'template': return WP_HTMLTemplateElement::class; - case 'textarea': return WP_HTMLTextareaElement::class; - case 'tfoot': return WP_HTMLTfootElement::class; - case 'th': return WP_HTMLThElement::class; - case 'thead': return WP_HTMLTheadElement::class; - case 'time': return WP_HTMLTimeElement::class; - case 'title': return WP_HTMLTitleElement::class; - case 'tr': return WP_HTMLTrElement::class; - case 'track': return WP_HTMLTrackElement::class; - case 'u': return WP_HTMLUElement::class; - case 'ul': return WP_HTMLUlElement::class; - case 'var': return WP_HTMLVarElement::class; - case 'video': return WP_HTMLVideoElement::class; - case 'wbr': return WP_HTMLWbrElement::class; + case 'A': + return WP_HTMLAnchorElement::class; + case 'ABBR': + return WP_HTMLAbbrElement::class; + case 'ADDRESS': + return WP_HTMLAddressElement::class; + case 'AREA': + return WP_HTMLAreaElement::class; + case 'ARTICLE': + return WP_HTMLArticleElement::class; + case 'ASIDE': + return WP_HTMLAsideElement::class; + case 'AUDIO': + return WP_HTMLAudioElement::class; + case 'B': + return WP_HTMLBElement::class; + case 'BASE': + return WP_HTMLBaseElement::class; + case 'BDI': + return WP_HTMLBdiElement::class; + case 'BDO': + return WP_HTMLBdoElement::class; + case 'BLOCKQUOTE': + return WP_HTMLBlockquoteElement::class; + case 'BODY': + return WP_HTMLBodyElement::class; + case 'BR': + return WP_HTMLBrElement::class; + case 'BUTTON': + return WP_HTMLButtonElement::class; + case 'CANVAS': + return WP_HTMLCanvasElement::class; + case 'CAPTION': + return WP_HTMLCaptionElement::class; + case 'CITE': + return WP_HTMLCiteElement::class; + case 'CODE': + return WP_HTMLCodeElement::class; + case 'COL': + return WP_HTMLColElement::class; + case 'COLGROUP': + return WP_HTMLColgroupElement::class; + case 'DATA': + return WP_HTMLDataElement::class; + case 'DATALIST': + return WP_HTMLDataListElement::class; + case 'DD': + return WP_HTMLDdElement::class; + case 'DEL': + return WP_HTMLDelElement::class; + case 'DETAILS': + return WP_HTMLDetailsElement::class; + case 'DFN': + return WP_HTMLDfnElement::class; + case 'DIALOG': + return WP_HTMLDialogElement::class; + case 'DIV': + return WP_HTMLDivElement::class; + case 'DL': + return WP_HTMLDlElement::class; + case 'DT': + return WP_HTMLDtElement::class; + case 'EM': + return WP_HTMLEmElement::class; + case 'EMBED': + return WP_HTMLEmbedElement::class; + case 'FIELDSET': + return WP_HTMLFieldsetElement::class; + case 'FIGCAPTION': + return WP_HTMLFigcaptionElement::class; + case 'FIGURE': + return WP_HTMLFigureElement::class; + case 'FOOTER': + return WP_HTMLFooterElement::class; + case 'FORM': + return WP_HTMLFormElement::class; + case 'H1': + return WP_HTMLH1Element::class; + case 'H2': + return WP_HTMLH2Element::class; + case 'H3': + return WP_HTMLH3Element::class; + case 'H4': + return WP_HTMLH4Element::class; + case 'H5': + return WP_HTMLH5Element::class; + case 'H6': + return WP_HTMLH6Element::class; + case 'HEAD': + return WP_HTMLHeadElement::class; + case 'HEADER': + return WP_HTMLHeaderElement::class; + case 'HGROUP': + return WP_HTMLHgropuElement::class; + case 'HR': + return WP_HTMLHrElement::class; + case 'HTML': + return WP_HTMLHtmlElement::class; + case 'I': + return WP_HTMLIElement::class; + case 'IFRAME': + return WP_HTMLIframeElement::class; + case 'IMG': + return WP_HTMLImgElement::class; + case 'INPUT': + return WP_HTMLInputElement::class; + case 'INS': + return WP_HTMLInsElement::class; + case 'KBD': + return WP_HTMLKbdElement::class; + case 'LABEL': + return WP_HTMLLabelElement::class; + case 'LEGEND': + return WP_HTMLLegendElement::class; + case 'LI': + return WP_HTMLLiElement::class; + case 'LINK': + return WP_HTMLLinkElement::class; + case 'MAIN': + return WP_HTMLMainElement::class; + case 'MAP': + return WP_HTMLMapElement::class; + case 'MARK': + return WP_HTMLMarkElement::class; + case 'MATH': + return WP_HTMLMathElement::class; + case 'MENU': + return WP_HTMLMenuElement::class; + case 'META': + return WP_HTMLMetaElement::class; + case 'METER': + return WP_HTMLMeterElement::class; + case 'NAV': + return WP_HTMLNavElement::class; + case 'NOSCRIPT': + return WP_HTMLNoscriptElement::class; + case 'OBJECT': + return WP_HTMLObjectElement::class; + case 'OL': + return WP_HTMLOlElement::class; + case 'OPTGROUP': + return WP_HTMLOptgroupElement::class; + case 'OPTION': + return WP_HTMLOptionElement::class; + case 'OUTPUT': + return WP_HTMLOutputElement::class; + case 'P': + return WP_HTMLPElement::class; + case 'PICTURE': + return WP_HTMLPictureElement::class; + case 'PRE': + return WP_HTMLPreElement::class; + case 'PROGRESS': + return WP_HTMLProgressElement::class; + case 'Q': + return WP_HTMLQElement::class; + case 'RP': + return WP_HTMLRpElement::class; + case 'RT': + return WP_HTMLRtElement::class; + case 'RUBY': + return WP_HTMLRubyElement::class; + case 'S': + return WP_HTMLSElement::class; + case 'SAMP': + return WP_HTMLSampElement::class; + case 'SCRIPT': + return WP_HTMLScriptElement::class; + case 'SECTION': + return WP_HTMLSectionElement::class; + case 'SELECT': + return WP_HTMLSelectElement::class; + case 'SLOT': + return WP_HTMLSlotElement::class; + case 'SMALL': + return WP_HTMLSmallElement::class; + case 'SOURCE': + return WP_HTMLSourceElement::class; + case 'SPAN': + return WP_HTMLSpanElement::class; + case 'STRONG': + return WP_HTMLStrongElement::class; + case 'STYLE': + return WP_HTMLStyleElement::class; + case 'SUB': + return WP_HTMLSubElement::class; + case 'SUMMARY': + return WP_HTMLSummaryElement::class; + case 'SUP': + return WP_HTMLSupElement::class; + case 'SVG': + return WP_HTMLSvgElement::class; + case 'TABLE': + return WP_HTMLTableElement::class; + case 'TBODY': + return WP_HTMLTbodyElement::class; + case 'TD': + return WP_HTMLTdElement::class; + case 'TEMPLATE': + return WP_HTMLTemplateElement::class; + case 'TEXTAREA': + return WP_HTMLTextareaElement::class; + case 'TFOOT': + return WP_HTMLTfootElement::class; + case 'TH': + return WP_HTMLThElement::class; + case 'THEAD': + return WP_HTMLTheadElement::class; + case 'TIME': + return WP_HTMLTimeElement::class; + case 'TITLE': + return WP_HTMLTitleElement::class; + case 'TR': + return WP_HTMLTrElement::class; + case 'TRACK': + return WP_HTMLTrackElement::class; + case 'U': + return WP_HTMLUElement::class; + case 'UL': + return WP_HTMLUlElement::class; + case 'VAR': + return WP_HTMLVarElement::class; + case 'VIDEO': + return WP_HTMLVideoElement::class; + case 'WBR': + return WP_HTMLWbrElement::class; // Deprecated elements - case 'applet': - case 'bgsound': - case 'blink': - case 'isindex': - case 'keygen': - case 'multicol': - case 'nextid': - case 'spacer': + case 'APPLET': + case 'BLINK': + case 'ISINDEX': + case 'MULTICOL': + case 'NEXTID': + case 'SPACER': + return WP_HTMLUnknownHTMLElement::class; + + case 'BGSOUND': // may be self-closing return WP_HTMLUnknownElement::class; + case 'KEYGEN': + return WP_HTML_Void_Element::class; + // Neutralized elements - case 'acronym': - case 'basefont': - case 'big': - case 'center': - case 'nobr': - case 'noembed': - case 'noframes': - case 'plaintext': - case 'rb': - case 'rtc': - case 'strike': - case 'tt': + case 'ACRONYM': + case 'BIG': + case 'CENTER': + case 'NOBR': + case 'NOEMBED': + case 'NOFRAMES': + case 'PLAINTEXT': + case 'RB': + case 'RTC': + case 'STRIKE': + case 'TT': return WP_HTMLElement::class; + case 'BASEFONT': + return WP_HTML_Void_Element::class; + // Substitutions - case 'listing': - case 'xmp': + case 'LISTING': + case 'XMP': return WP_HTMLPreElement::class; - - default: - return WP_HTMLUnknownElement::class; } + + $is_valid_custom_name = false !== strpos( $tag_name, '-' ); + + return $is_valid_custom_name + ? WP_HTMLElement::class + : WP_HTMLUnknownElement::class; } } class WP_HTML_Element_Meta { - const is_void = false; - const is_html = true; -} - -class WP_HTMLUnknownElement extends WP_HTML_Element_Meta { const is_html = false; } -class WP_HTMLElement extends WP_HTML_Element_Meta {} - -class WP_HTMLAnchorElement extends WP_HTML_Element_Meta {} -class WP_HTMLAbbrElement extends WP_HTML_Element_Meta {} -class WP_HTMLAddressElement extends WP_HTML_Element_Meta {} -class WP_HTMLAreaElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLArticleElement extends WP_HTML_Element_Meta {} -class WP_HTMLAsideElement extends WP_HTML_Element_Meta {} -class WP_HTMLAudioElement extends WP_HTML_Element_Meta {} -class WP_HTMLBElement extends WP_HTML_Element_Meta {} -class WP_HTMLBaseElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLBdiElement extends WP_HTML_Element_Meta {} -class WP_HTMLBdoElement extends WP_HTML_Element_Meta {} -class WP_HTMLBlockquoteElement extends WP_HTML_Element_Meta {} -class WP_HTMLBodyElement extends WP_HTML_Element_Meta {} -class WP_HTMLBrElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLButtonElement extends WP_HTML_Element_Meta {} -class WP_HTMLCanvasElement extends WP_HTML_Element_Meta {} -class WP_HTMLCaptionElement extends WP_HTML_Element_Meta {} -class WP_HTMLCiteElement extends WP_HTML_Element_Meta {} -class WP_HTMLCodeElement extends WP_HTML_Element_Meta {} -class WP_HTMLColElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLColgroupElement extends WP_HTML_Element_Meta {} -class WP_HTMLDataElement extends WP_HTML_Element_Meta {} -class WP_HTMLDataListElement extends WP_HTML_Element_Meta {} -class WP_HTMLDdElement extends WP_HTML_Element_Meta {} -class WP_HTMLDelElement extends WP_HTML_Element_Meta {} -class WP_HTMLDetailsElement extends WP_HTML_Element_Meta {} -class WP_HTMLDfnElement extends WP_HTML_Element_Meta {} -class WP_HTMLDialogElement extends WP_HTML_Element_Meta {} -class WP_HTMLDivElement extends WP_HTML_Element_Meta {} -class WP_HTMLDlElement extends WP_HTML_Element_Meta {} -class WP_HTMLDtElement extends WP_HTML_Element_Meta {} -class WP_HTMLEmElement extends WP_HTML_Element_Meta {} -class WP_HTMLEmbedElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLFieldsetElement extends WP_HTML_Element_Meta {} -class WP_HTMLFigcaptionElement extends WP_HTML_Element_Meta {} -class WP_HTMLFigureElement extends WP_HTML_Element_Meta {} -class WP_HTMLFooterElement extends WP_HTML_Element_Meta {} -class WP_HTMLFormElement extends WP_HTML_Element_Meta {} -class WP_HTMLH1Element extends WP_HTML_Element_Meta {} -class WP_HTMLH2Element extends WP_HTML_Element_Meta {} -class WP_HTMLH3Element extends WP_HTML_Element_Meta {} -class WP_HTMLH4Element extends WP_HTML_Element_Meta {} -class WP_HTMLH5Element extends WP_HTML_Element_Meta {} -class WP_HTMLH6Element extends WP_HTML_Element_Meta {} -class WP_HTMLHeadElement extends WP_HTML_Element_Meta {} -class WP_HTMLHeaderElement extends WP_HTML_Element_Meta {} -class WP_HTMLHgropuElement extends WP_HTML_Element_Meta {} -class WP_HTMLHrElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLHtmlElement extends WP_HTML_Element_Meta {} -class WP_HTMLIElement extends WP_HTML_Element_Meta {} -class WP_HTMLIframeElement extends WP_HTML_Element_Meta {} -class WP_HTMLImgElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLInputElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLInsElement extends WP_HTML_Element_Meta {} -class WP_HTMLKbdElement extends WP_HTML_Element_Meta {} -class WP_HTMLLabelElement extends WP_HTML_Element_Meta {} -class WP_HTMLLegendElement extends WP_HTML_Element_Meta {} -class WP_HTMLLiElement extends WP_HTML_Element_Meta {} -class WP_HTMLLinkElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLMainElement extends WP_HTML_Element_Meta {} -class WP_HTMLMapElement extends WP_HTML_Element_Meta {} -class WP_HTMLMarkElement extends WP_HTML_Element_Meta {} -class WP_HTMLMathElement extends WP_HTML_Element_Meta {} -class WP_HTMLMenuElement extends WP_HTML_Element_Meta {} -class WP_HTMLMetaElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLMeterElement extends WP_HTML_Element_Meta {} -class WP_HTMLNavElement extends WP_HTML_Element_Meta {} -class WP_HTMLNoscriptElement extends WP_HTML_Element_Meta {} -class WP_HTMLObjectElement extends WP_HTML_Element_Meta {} -class WP_HTMLOlElement extends WP_HTML_Element_Meta {} -class WP_HTMLOptgroupElement extends WP_HTML_Element_Meta {} -class WP_HTMLOptionElement extends WP_HTML_Element_Meta {} -class WP_HTMLOutputElement extends WP_HTML_Element_Meta {} -class WP_HTMLPElement extends WP_HTML_Element_Meta {} -class WP_HTMLPictureElement extends WP_HTML_Element_Meta {} -class WP_HTMLPreElement extends WP_HTML_Element_Meta {} -class WP_HTMLProgressElement extends WP_HTML_Element_Meta {} -class WP_HTMLQElement extends WP_HTML_Element_Meta {} -class WP_HTMLRpElement extends WP_HTML_Element_Meta {} -class WP_HTMLRtElement extends WP_HTML_Element_Meta {} -class WP_HTMLRubyElement extends WP_HTML_Element_Meta {} -class WP_HTMLSElement extends WP_HTML_Element_Meta {} -class WP_HTMLSampElement extends WP_HTML_Element_Meta {} -class WP_HTMLScriptElement extends WP_HTML_Element_Meta {} -class WP_HTMLSectionElement extends WP_HTML_Element_Meta {} -class WP_HTMLSelectElement extends WP_HTML_Element_Meta {} -class WP_HTMLSlotElement extends WP_HTML_Element_Meta {} -class WP_HTMLSmallElement extends WP_HTML_Element_Meta {} -class WP_HTMLSourceElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLSpanElement extends WP_HTML_Element_Meta {} -class WP_HTMLStrongElement extends WP_HTML_Element_Meta {} -class WP_HTMLStyleElement extends WP_HTML_Element_Meta {} -class WP_HTMLSubElement extends WP_HTML_Element_Meta {} -class WP_HTMLSummaryElement extends WP_HTML_Element_Meta {} -class WP_HTMLSupElement extends WP_HTML_Element_Meta {} -class WP_HTMLSvgElement extends WP_HTML_Element_Meta {} -class WP_HTMLTableElement extends WP_HTML_Element_Meta {} -class WP_HTMLTbodyElement extends WP_HTML_Element_Meta {} -class WP_HTMLTdElement extends WP_HTML_Element_Meta {} -class WP_HTMLTemplateElement extends WP_HTML_Element_Meta {} -class WP_HTMLTextareaElement extends WP_HTML_Element_Meta {} -class WP_HTMLTfootElement extends WP_HTML_Element_Meta {} -class WP_HTMLThElement extends WP_HTML_Element_Meta {} -class WP_HTMLTheadElement extends WP_HTML_Element_Meta {} -class WP_HTMLTimeElement extends WP_HTML_Element_Meta {} -class WP_HTMLTitleElement extends WP_HTML_Element_Meta {} -class WP_HTMLTrElement extends WP_HTML_Element_Meta {} -class WP_HTMLTrackElement extends WP_HTML_Element_Meta { const is_void = true; } -class WP_HTMLUElement extends WP_HTML_Element_Meta {} -class WP_HTMLUlElement extends WP_HTML_Element_Meta {} -class WP_HTMLVarElement extends WP_HTML_Element_Meta {} -class WP_HTMLVideoElement extends WP_HTML_Element_Meta {} -class WP_HTMLWbrElement extends WP_HTML_Element_Meta { const is_void = true; } + const IS_VOID = false; + const IS_HTML = true; +} + +class WP_HTMLElement extends WP_HTML_Element_Meta { +} + +class WP_HTML_Void_Element extends WP_HTMLElement { + const IS_VOID = true; +} + +class WP_HTMLUnknownElement extends WP_HTML_Element_Meta { + const IS_HTML = false; +} + +// this one is a bit weird, but so are the deprecated HTML elements belonging to this category. +class WP_HTMLUnknownHTMLElement extends WP_HTMLUnknownElement { + const IS_HTML = true; +} + +class WP_HTMLAnchorElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLAbbrElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLAddressElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLAreaElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLArticleElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLAsideElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLAudioElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLBElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLBaseElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLBdiElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLBdoElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLBlockquoteElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLBodyElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLBrElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLButtonElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLCanvasElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLCaptionElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLCiteElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLCodeElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLColElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLColgroupElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDataElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDataListElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDdElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDelElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDetailsElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDfnElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDialogElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDivElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDlElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLDtElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLEmElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLEmbedElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLFieldsetElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLFigcaptionElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLFigureElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLFooterElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLFormElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLH1Element extends WP_HTML_Element_Meta { +} + +class WP_HTMLH2Element extends WP_HTML_Element_Meta { +} + +class WP_HTMLH3Element extends WP_HTML_Element_Meta { +} + +class WP_HTMLH4Element extends WP_HTML_Element_Meta { +} + +class WP_HTMLH5Element extends WP_HTML_Element_Meta { +} + +class WP_HTMLH6Element extends WP_HTML_Element_Meta { +} + +class WP_HTMLHeadElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLHeaderElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLHgropuElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLHrElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLHtmlElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLIElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLIframeElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLImgElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLInputElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLInsElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLKbdElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLLabelElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLLegendElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLLiElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLLinkElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLMainElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLMapElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLMarkElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLMathElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLMenuElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLMetaElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLMeterElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLNavElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLNoscriptElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLObjectElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLOlElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLOptgroupElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLOptionElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLOutputElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLPElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLPictureElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLPreElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLProgressElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLQElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLRpElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLRtElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLRubyElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSampElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLScriptElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSectionElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSelectElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSlotElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSmallElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSourceElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLSpanElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLStrongElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLStyleElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSubElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSummaryElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSupElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLSvgElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTableElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTbodyElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTdElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTemplateElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTextareaElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTfootElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLThElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTheadElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTimeElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTitleElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTrElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLTrackElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} + +class WP_HTMLUElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLUlElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLVarElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLVideoElement extends WP_HTML_Element_Meta { +} + +class WP_HTMLWbrElement extends WP_HTML_Element_Meta { + const IS_VOID = true; +} diff --git a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php index 9acb589360356..87c194ea1c637 100644 --- a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php +++ b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php @@ -13,9 +13,13 @@ require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-tag-processor.php'; require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-processor.php'; -class WP_UnitTestCase extends PHPUnit\Framework\TestCase {} +class WP_UnitTestCase extends PHPUnit\Framework\TestCase { -function esc_attr( $s ) { return str_replace( [ '<', '>', '"' ], [ '<', '>', '"' ], $s ); } +} + +function esc_attr( $s ) { + return str_replace( array( '<', '>', '"' ), array( '<', '>', '"' ), $s ); +} /** * @group html-api @@ -53,8 +57,8 @@ public function data_fully_balanced_html() { 'Void tags' => array( '

    ' ), 'Void tags with invalid self-closing flags' => array( '

    ' ), 'Invalid self-closing non-void' => array( 'This
    is (not) empty.
    ' ), - 'Nested with void tags' => array( '

    Text
    More Text

    '), - 'HTML foreign elements' => array( ''), + 'Nested with void tags' => array( '

    Text
    More Text

    ' ), + 'HTML foreign elements' => array( '' ) ); } @@ -77,8 +81,8 @@ public function data_not_fully_balanced_html() { 'Overlapping tags' => array( '

    Important

    ' ), 'Overlapping nested tags' => array( '

    Important

    ' ), 'Invalid self-closing non-void' => array( 'This
    is (not) empty.' ), - 'Un-closed HTML foreign self-closer' => array( ''), - 'Improperly-closed HTML foreign self-closer' => array( ''), + 'Un-closed HTML foreign self-closer' => array( '' ), + 'Improperly-closed HTML foreign self-closer' => array( '' ) ); } @@ -109,7 +113,7 @@ public function data_next_sibling() { 'Leading markup' => array( 'before' ), 'Top-level siblings' => array( '' ), 'Nested siblings' => array( '
    • One
    • Two
    • Three
    • Four
    ' ), - 'Nesting avalanche' => array( '
    '), + 'Nesting avalanche' => array( '
    ' ) ); } @@ -125,7 +129,7 @@ public function data_no_next_sibling() { return array( 'Leading markup' => array( 'before
    ' ), 'No more siblings' => array( '
    ' ), - 'Tag-closing avalanche' => array( '
    '), + 'Tag-closing avalanche' => array( '
    ' ) ); } @@ -142,7 +146,7 @@ public function test_finds_first_child( $html ) { public function data_first_child() { return array( 'Leading markup' => array( 'this is not tag content
    afterwards' ), - 'Normal nesting' => array( '
    • text

    ' ), + 'Normal nesting' => array( '
    • text

    ' ) ); } @@ -159,7 +163,7 @@ public function data_no_first_child() { return array( 'Leading markup' => array( 'this is not tag content
    afterwards' ), 'Already nested' => array( '
  • text

  • ', ), - 'Void element' => array( '' ), + 'Void element' => array( '' ) ); } @@ -223,7 +227,7 @@ public function data_inner_content() { 'Single tag' => array( '
    ', 'text', '
    ' ), 'Nested tags' => array( '
    ', '
    • One
    • Two
    ', '
    ' ), 'Complex HTML' => array( - <<

    Things I could be eating right now

      @@ -234,7 +238,7 @@ public function data_inner_content() {
      HTML, - <<Scwarzwälder Kirschtorte
      • Flour
      • @@ -246,7 +250,7 @@ public function data_inner_content() {
      HTML, - <<
    @@ -318,7 +322,7 @@ public function data_outer_content() { HTML - ), + ) ); } From 0975e345e1ce2580148adf38ed9601ed756ba586 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 20 Apr 2023 15:42:04 +0200 Subject: [PATCH 18/39] Wrap bookmarking to create special internal bookmarks used in HTML traversal --- .../html-api/class-wp-html-processor.php | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e9e64647506e2..2e0cbe0291820 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -61,13 +61,16 @@ public function next_tag( $query = null ) { $self_closes = $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ); if ( $self_closes ) { $this->open_elements[] = $tag_name; + $this->set_bookmark( '__open_elements_' . count( $this->open_elements ) ); return true; } if ( $this->is_tag_closer() ) { + $this->release_bookmark( '__open_elements_' . count( $this->open_elements ) ); array_pop( $this->open_elements ); } else { $this->open_elements[] = $tag_name; + $this->set_bookmark( '__open_elements_' . count( $this->open_elements ) ); } return true; @@ -113,6 +116,16 @@ public function first_child() { return false; } + public function seek( $bookmark_name ) { + parent::seek( $bookmark_name ); + + foreach ( $this->bookmarks as $name => $mark ) { + if ( str_starts_with( $name, '__open_elements_' ) && $mark->start > $this->bookmarks[ $bookmark_name ]->start ) { + $this->release_bookmark( $name ); + } + } + } + private function find_closing_tag() { $starting_depth = count( $this->open_elements ); From cafee034c77216292b5206290300847f6aae5b4f Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 20 Apr 2023 15:55:30 +0200 Subject: [PATCH 19/39] Remove `ensure_support` --- .../html-api/class-wp-html-processor.php | 66 +------------------ .../Tests_HtmlApi_wpHtmlProcessor_Support.php | 18 ----- 2 files changed, 1 insertion(+), 83 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 2e0cbe0291820..717d692b703b0 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1,52 +1,12 @@ fully_supported_input ) { - return $this->fully_supported_input; - } - - $stack = array(); - - $p = new WP_HTML_Tag_Processor( $this->html ); - while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) { - $tag_name = $p->get_tag(); - - if ( ! $p->is_tag_closer() ) { - $element = WP_HTML_Spec::element_info( $tag_name ); - - $self_closes = $element::IS_VOID || ( ! $element::IS_HTML && $p->has_self_closing_flag() ); - if ( ! $self_closes ) { - $stack[] = $tag_name; - } - } else { - if ( end( $stack ) === $tag_name ) { - array_pop( $stack ); - continue; - } - - $this->fully_supported_input = false; - return false; - } - } - - $this->fully_supported_input = 0 === count( $stack ); - - return $this->fully_supported_input; - } - public function next_tag( $query = null ) { - if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { - return false; - } - if ( 0 < count( $this->open_elements ) ) { $element = WP_HTML_Spec::element_info( end( $this->open_elements ) ); - // @TODO: Handle self-closing HTML foreign elements: must convey self-closing flag on stack. - if ( $element::IS_VOID ) { + if ( $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ) ) { array_pop( $this->open_elements ); } } @@ -77,10 +37,6 @@ public function next_tag( $query = null ) { } public function next_sibling() { - if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { - return false; - } - $starting_depth = count( $this->open_elements ); while ( $this->next_tag() ) { @@ -99,10 +55,6 @@ public function next_sibling() { } public function first_child() { - if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { - return false; - } - $starting_depth = count( $this->open_elements ); while ( $this->next_tag() ) { @@ -141,10 +93,6 @@ private function find_closing_tag() { } public function get_inner_content() { - if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { - return false; - } - if ( ! $this->get_tag() || $this->is_tag_closer() ) { return false; } @@ -172,10 +120,6 @@ public function get_inner_content() { } public function set_inner_content( $new_html ) { - if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { - return false; - } - if ( ! $this->get_tag() || $this->is_tag_closer() ) { return false; } @@ -203,10 +147,6 @@ public function set_inner_content( $new_html ) { } public function get_outer_content() { - if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { - return false; - } - if ( ! $this->get_tag() || $this->is_tag_closer() ) { return false; } @@ -237,10 +177,6 @@ public function get_outer_content() { } public function set_outer_content( $new_html ) { - if ( false === $this->fully_supported_input || false === $this->ensure_support() ) { - return false; - } - if ( ! $this->get_tag() || $this->is_tag_closer() ) { return false; } diff --git a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php index 87c194ea1c637..92e29101b1ba1 100644 --- a/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php +++ b/tests/phpunit/tests/html-api/Tests_HtmlApi_wpHtmlProcessor_Support.php @@ -37,15 +37,6 @@ private function html_processor_at_start( $html ) { return $p; } - /** - * @dataProvider data_fully_balanced_html - */ - public function test_detects_fully_balanced_html( $html ) { - $p = new WP_HTML_Processor( $html ); - - $this->assertTrue( $p->ensure_support(), "Detected that supported HTML input isn't supported." ); - } - /** * @return array[] */ @@ -62,15 +53,6 @@ public function data_fully_balanced_html() { ); } - /** - * @dataProvider data_not_fully_balanced_html - */ - public function test_detects_not_fully_balanced_html( $html ) { - $p = new WP_HTML_Processor( $html ); - - $this->assertFalse( $p->ensure_support(), 'Detected that unsupported HTML input is supported.' ); - } - /** * @return array[] */ From 67bef483c55187623aa41e0ec07fcb413e7506cc Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 20 Apr 2023 17:55:44 +0200 Subject: [PATCH 20/39] Introduce step function and insertion mode --- .../html-api/class-wp-html-processor.php | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 717d692b703b0..86566dbda20cf 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -3,7 +3,48 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { public $open_elements = array(); + /** + * Advance the parser by one step. + * + * Implements the HTML fragment parsing algorithm. + * See https://html.spec.whatwg.org/#parsing-html-fragments + * + * Only parts of the full algorithm are supported in this class. + * For cases where the input HTML doesn't conform to the supported + * domain of the fragment parsing algorithm this method will abort + * and return `false`. + * + * @param string $insertion_mode Starting insertion mode for parser, best to leave as the default value + * unless knowingly handling HTML that will be included inside known tags. + * + * @return boolean Whether an element was found. + */ + public function step( $insertion_mode = 'in-body' ) { + switch ( $insertion_mode ) { + case 'in-body': + return $this->step_in_body(); + + default: + return false; + } + } + + /** + * Parses next element in the 'in body' insertion mode. + * + * @return boolean Whether an element was found. + */ + private function step_in_body() { + return false; + } + public function next_tag( $query = null ) { + /* + * The first thing that needs to happen when stepping through the HTML is to + * close any void and self-closing elements. These appear on the open stack + * to support matching CSS selectors and gauging depths, but they don't + * truly have distinct openings and closings. + */ if ( 0 < count( $this->open_elements ) ) { $element = WP_HTML_Spec::element_info( end( $this->open_elements ) ); if ( $element::IS_VOID || ( ! $element::IS_HTML && $this->has_self_closing_flag() ) ) { From be6e9011e0d2d2fbbcc7daddc5a436cb8b2aee9d Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 3 May 2023 20:15:05 +0200 Subject: [PATCH 21/39] Create some IS_SPECIAL flags --- .../html-api/class-wp-html-spec.php | 71 ++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-spec.php b/src/wp-includes/html-api/class-wp-html-spec.php index a82826903a422..a714ad9bcb614 100644 --- a/src/wp-includes/html-api/class-wp-html-spec.php +++ b/src/wp-includes/html-api/class-wp-html-spec.php @@ -295,6 +295,7 @@ public static function element_info( $tag_name ) { class WP_HTML_Element_Meta { const IS_VOID = false; const IS_HTML = true; + const IS_SPECIAL = false; } class WP_HTMLElement extends WP_HTML_Element_Meta { @@ -320,16 +321,22 @@ class WP_HTMLAbbrElement extends WP_HTML_Element_Meta { } class WP_HTMLAddressElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } +// @TODO: Add deprecated special rule: APPLET + class WP_HTMLAreaElement extends WP_HTML_Element_Meta { const IS_VOID = true; + const IS_SPECIAL = true; } class WP_HTMLArticleElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLAsideElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLAudioElement extends WP_HTML_Element_Meta { @@ -339,6 +346,7 @@ class WP_HTMLBElement extends WP_HTML_Element_Meta { } class WP_HTMLBaseElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } @@ -349,22 +357,27 @@ class WP_HTMLBdoElement extends WP_HTML_Element_Meta { } class WP_HTMLBlockquoteElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLBodyElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLBrElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } class WP_HTMLButtonElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLCanvasElement extends WP_HTML_Element_Meta { } class WP_HTMLCaptionElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLCiteElement extends WP_HTML_Element_Meta { @@ -374,10 +387,12 @@ class WP_HTMLCodeElement extends WP_HTML_Element_Meta { } class WP_HTMLColElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } class WP_HTMLColgroupElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLDataElement extends WP_HTML_Element_Meta { @@ -387,12 +402,14 @@ class WP_HTMLDataListElement extends WP_HTML_Element_Meta { } class WP_HTMLDdElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLDelElement extends WP_HTML_Element_Meta { } class WP_HTMLDetailsElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLDfnElement extends WP_HTML_Element_Meta { @@ -402,81 +419,104 @@ class WP_HTMLDialogElement extends WP_HTML_Element_Meta { } class WP_HTMLDivElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLDlElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLDtElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLEmElement extends WP_HTML_Element_Meta { } class WP_HTMLEmbedElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } class WP_HTMLFieldsetElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLFigcaptionElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLFigureElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLFooterElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLFormElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLH1Element extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLH2Element extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLH3Element extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLH4Element extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLH5Element extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLH6Element extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLHeadElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLHeaderElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } -class WP_HTMLHgropuElement extends WP_HTML_Element_Meta { +class WP_HTMLHgroupElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLHrElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } class WP_HTMLHtmlElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLIElement extends WP_HTML_Element_Meta { } class WP_HTMLIframeElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLImgElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } class WP_HTMLInputElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } @@ -493,13 +533,16 @@ class WP_HTMLLegendElement extends WP_HTML_Element_Meta { } class WP_HTMLLiElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLLinkElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } class WP_HTMLMainElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLMapElement extends WP_HTML_Element_Meta { @@ -512,9 +555,11 @@ class WP_HTMLMathElement extends WP_HTML_Element_Meta { } class WP_HTMLMenuElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLMetaElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } @@ -522,15 +567,18 @@ class WP_HTMLMeterElement extends WP_HTML_Element_Meta { } class WP_HTMLNavElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLNoscriptElement extends WP_HTML_Element_Meta { } class WP_HTMLObjectElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLOlElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLOptgroupElement extends WP_HTML_Element_Meta { @@ -543,12 +591,14 @@ class WP_HTMLOutputElement extends WP_HTML_Element_Meta { } class WP_HTMLPElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLPictureElement extends WP_HTML_Element_Meta { } class WP_HTMLPreElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLProgressElement extends WP_HTML_Element_Meta { @@ -573,12 +623,15 @@ class WP_HTMLSampElement extends WP_HTML_Element_Meta { } class WP_HTMLScriptElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLSectionElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLSelectElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLSlotElement extends WP_HTML_Element_Meta { @@ -588,6 +641,7 @@ class WP_HTMLSmallElement extends WP_HTML_Element_Meta { } class WP_HTMLSourceElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } @@ -598,12 +652,14 @@ class WP_HTMLStrongElement extends WP_HTML_Element_Meta { } class WP_HTMLStyleElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLSubElement extends WP_HTML_Element_Meta { } class WP_HTMLSummaryElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLSupElement extends WP_HTML_Element_Meta { @@ -613,39 +669,50 @@ class WP_HTMLSvgElement extends WP_HTML_Element_Meta { } class WP_HTMLTableElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTbodyElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTdElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTemplateElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTextareaElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTfootElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLThElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTheadElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTimeElement extends WP_HTML_Element_Meta { } class WP_HTMLTitleElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTrElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLTrackElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } @@ -653,6 +720,7 @@ class WP_HTMLUElement extends WP_HTML_Element_Meta { } class WP_HTMLUlElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; } class WP_HTMLVarElement extends WP_HTML_Element_Meta { @@ -662,5 +730,6 @@ class WP_HTMLVideoElement extends WP_HTML_Element_Meta { } class WP_HTMLWbrElement extends WP_HTML_Element_Meta { + const IS_SPECIAL = true; const IS_VOID = true; } From 2e874e4f67acc38e8954327f247d69c980f32f5c Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 4 May 2023 10:17:09 +0200 Subject: [PATCH 22/39] Some bookmarking stuff --- .../html-api/class-wp-html-processor.php | 58 +++++++++++++++++-- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 86566dbda20cf..45fa3d70ffef4 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1,7 +1,7 @@ depth++; + + parent::set_bookmark( "{$this->depth}_{$element}" ); + } + + private function exit_element( $element ) { + parent::release_bookmark( "{$this->depth}_{$element}" ); + } + + private function opened_element() { + if ( 0 === $this->depth ) { + return false; + } + + $max_depth = 0; + foreach ( $this->bookmarks as $name => $bookmark ) { + if ( '_' === $name[0] ) { + continue; + } + + list( $depth, $element ) = explode( '_', $name ); + if ( $depth === "{$this->depth}" ) { + return $element; + } + } + + return false; + } + public function next_tag( $query = null ) { /* * The first thing that needs to happen when stepping through the HTML is to @@ -110,13 +148,25 @@ public function first_child() { } public function seek( $bookmark_name ) { - parent::seek( $bookmark_name ); + parent::seek( '_' . $bookmark_name ); + $max_depth = $this->depth; foreach ( $this->bookmarks as $name => $mark ) { - if ( str_starts_with( $name, '__open_elements_' ) && $mark->start > $this->bookmarks[ $bookmark_name ]->start ) { - $this->release_bookmark( $name ); + // Regular bookmarks are prefixed with "_" so they can be ignored here. + if ( '_' === $name[0] ) { + continue; + } + + // Element stack bookmarks are like "3_P" and "4_DIV". + if ( $mark->start > $this->bookmarks[ $bookmark_name ]->start ) { + parent::release_bookmark( $name ); + } else { + $this_depth = (int) explode( '_', $name )[0]; + $max_depth = max( $max_depth, $this_depth ); } } + + $this->depth = $max_depth; } private function find_closing_tag() { From ece3a68c07be29c210fd2591d3d67f035811349c Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 4 May 2023 17:43:56 +0200 Subject: [PATCH 23/39] I think we need a separate actual stack for open elements --- .../html-api/class-wp-html-processor.php | 187 +++++++++++++++++- 1 file changed, 183 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 45fa3d70ffef4..cd55326b9dfd6 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1,7 +1,11 @@ 'visit' ); + private $insertion_mode = 'in-body'; /** * Advance the parser by one step. @@ -19,22 +23,158 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * @return boolean Whether an element was found. */ - public function step( $insertion_mode = 'in-body' ) { - switch ( $insertion_mode ) { + public function step( $insertion_mode = null ) { + switch ( $insertion_mode ?: $this->insertion_mode ) { case 'in-body': return $this->step_in_body(); default: - return false; + return self::NOT_IMPLEMENTED_YET; } } + /** + * Parses next element in the 'in head' insertion mode. + * + * Not yet implemented. + * + * @see https://html.spec.whatwg.org/#parsing-main-inhead + * + * @return false + */ + private function step_in_head() { + return self::NOT_IMPLEMENTED_YET; + } + /** * Parses next element in the 'in body' insertion mode. * + * @see https://html.spec.whatwg.org/#parsing-main-inbody + * * @return boolean Whether an element was found. */ private function step_in_body() { + ignored: + parent::set_bookmark( 'current' ); + if ( ! $this->next_tag( self::$query ) ) { + return false; + } + + $tag_name = $this->get_tag(); + $tag_type = $this->is_tag_closer() ? 'closer' : 'opener'; + + /* + * > A start tag whose tag name is "html" + */ + if ( 'HTML' === $tag_name && 'opener' === $tag_type ) { + goto ignored; + } + + /* + * > A start tag whose tag name is one of: "base", "basefont", "bgsound", + * > "link", "meta", "noframes", "script", "style", "template", "title" + * + * > An end tag whose tag name is "template" + */ + if ( + 'opener' === $tag_type && ( + 'BASE' === $tag_name || + 'BASEFONT' === $tag_name || + 'BGSOUND' === $tag_name || + 'LINK' === $tag_name || + 'META' === $tag_name || + 'NOFRAMES' === $tag_name || + 'SCRIPT' === $tag_name || + 'STYLE' === $tag_name || + 'TEMPLATE' === $tag_name || + 'TITLE' === $tag_name + ) || + ( + 'closer' === $tag_type && + 'TEMPLATE' === $tag_name + ) ) + { + parent::seek( 'current' ); + $this->insertion_mode = 'in-head'; + return $this->step(); + } + + /* + * > A start tag whose tag name is "body" + */ + if ( 'opener' === $tag_type && 'BODY' === $tag_name ) { + goto ignored; + } + + /* + * > A start tag whose tag name is "frameset" + */ + if ( 'opener' === $tag_type && 'FRAMESET' === $tag_name ) { + return self::NOT_IMPLEMENTED_YET; + } + + /* + * > An end-of-file token + * + * Stop parsing. + */ + + /* + * > An end tag whose tag name is "body" + * > An end tag whose tag name is "html" + */ + if ( 'closer' === $tag_type && ( 'BODY' === $tag_name || 'HTML' === $tag_name ) ) { + /* + * > If the stack of open elements does not have a body element in scope, this is a parse error; ignore the token. + * + * @TODO: We didn't construct an open HTML or BODY tag, but we have to make a choice here based on that. + * Probably need to create these _or_ assume this will always transfer to "after body". + */ + $this->insertion_mode = 'after-body'; + return true; + } + + /* + * > A start tag whose tag name is one of: "address", "article", "aside", "blockquote", "center", + * > "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", + * > "header", "hgroup", "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul" + */ + if ( + 'opener' === $tag_type && ( + 'ADDRESS' === $tag_name || + 'ARTICLE' === $tag_name || + 'ASIDE' === $tag_name || + 'BLOCKQUOTE' === $tag_name || + 'CENTER' === $tag_name || + 'DETAILS' === $tag_name || + 'DIALOG' === $tag_name || + 'DIR' === $tag_name || + 'DIV' === $tag_name || + 'DL' === $tag_name || + 'FIELDSET' === $tag_name || + 'FIGCAPTION' === $tag_name || + 'FIGURE' === $tag_name || + 'FOOTER' === $tag_name || + 'HEADER' === $tag_name || + 'HGROUP' === $tag_name || + 'MAIN' === $tag_name || + 'MENU' === $tag_name || + 'NAV' === $tag_name || + 'OL' === $tag_name || + 'P' === $tag_name || + 'SEARCH' === $tag_name || + 'SECTION' === $tag_name || + 'SUMMARY' === $tag_name || + 'UL' === $tag_name + ) + ) { + if ( $this->has_in_scope( 'P', 'BUTTON' ) ) { + $this->close_p_element(); + } + + $this->enter_element( $tag_name ); + } + return false; } @@ -56,7 +196,35 @@ private function exit_element( $element ) { parent::release_bookmark( "{$this->depth}_{$element}" ); } - private function opened_element() { + /** + * @see https://html.spec.whatwg.org/#close-a-p-element + * @return void + */ + private function close_p_element() { + $this->generate_implied_end_tags( 'P' ); + + + } + + /** + * @TODO: Implement this + * + * @see https://html.spec.whatwg.org/#generate-implied-end-tags + * + * @param string|null $except_for_this_element Perform as if this element doesn't exist in the stack of open elements. + * @return void + */ + private function generate_implied_end_tags( $except_for_this_element = null ) { + + } + + /** + * The current node is the bottommost node in this stack of open elements. + * + * @see https://html.spec.whatwg.org/#current-node + * @return false|mixed|string + */ + private function current_node() { if ( 0 === $this->depth ) { return false; } @@ -76,6 +244,17 @@ private function opened_element() { return false; } + /** + * Indicates if the stack of open elements has an element in a given scope. + * + * @param $element + * @param $scope + * @return false + */ + private function has_in_scope( $element, $scope ) { + return self::NOT_IMPLEMENTED_YET; + } + public function next_tag( $query = null ) { /* * The first thing that needs to happen when stepping through the HTML is to From 98180bc40373ec3178a182be7b8afacac73283a6 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 17 May 2023 16:58:36 +0200 Subject: [PATCH 24/39] Add WIP stack class, trap exceptions in `step()` to allow nested calls to escape --- .../class-wp-html-element-stack-item.php | 45 +++ .../html-api/class-wp-html-element-stack.php | 266 ++++++++++++++++++ .../html-api/class-wp-html-processor.php | 20 +- 3 files changed, 325 insertions(+), 6 deletions(-) create mode 100644 src/wp-includes/html-api/class-wp-html-element-stack-item.php create mode 100644 src/wp-includes/html-api/class-wp-html-element-stack.php diff --git a/src/wp-includes/html-api/class-wp-html-element-stack-item.php b/src/wp-includes/html-api/class-wp-html-element-stack-item.php new file mode 100644 index 0000000000000..a9cf9caf84b25 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-element-stack-item.php @@ -0,0 +1,45 @@ +bookmark_name = $bookmark_name; + $this->element = $element; + $this->flags = $flags; + $this->related_item = $related_item; + } +} diff --git a/src/wp-includes/html-api/class-wp-html-element-stack.php b/src/wp-includes/html-api/class-wp-html-element-stack.php new file mode 100644 index 0000000000000..cff94e2b76aed --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-element-stack.php @@ -0,0 +1,266 @@ +bookmark_name = $bookmark_name; + $this->element = $element; + $this->flags = $flags; + } + + /** + * Add an item to the top of the stack. + * + * @TODO: Do we need to insertion-sort these? + * + * @param $stack_item + * @return void + */ + public function push( $stack_item ) { + $this->stack[] = $stack_item; + } + + public function count() { + return count( $this->stack ); + } + + /** + * Returns the bottom-most node on the stack. + * + * @return WP_HTML_Element_Stack_Item|null + */ + public function current_node() { + $count = $this->count(); + + return $this->count() > 0 + ? $this->stack[ $count - 1 ] + : null; + } + + /** + * Returns whether the given element is on the stack. + * + * @param string $element the ::class name of the element to check for. + * @return boolean whether the given element is on the stack. + */ + public function has_element( $element ) { + for ( $i = count( $this->stack ) - 1; $i > 0; $i++ ) { + if ( $this->stack[ $i ]->element === $element ) { + return true; + } + } + + return false; + } + + /** + * Returns whether an element is in a specific scope. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-the-specific-scope + * + * @param string $element The target node. + * @param string[] $termination_list List of elements that terminate the search. + * @return bool + */ + public function has_element_in_specific_scope( $element, $termination_list ) { + $i = $this->count(); + if ( $i === 0 ) { + return false; + } + + $node = $this->stack[ --$i ]; + + if ( $node->element === $element ) { + return true; + } + + if ( in_array( $element, $termination_list, true ) ) { + return false; + } + + while ( $i > 0 && null !== ( $node = $this->stack[ --$i ] ) ) { + if ( $node->element === $element ) { + return true; + } + } + + return false; + } + + /** + * Returns whether a given element is in a particular scope. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-scope + * + * @param string $element + * @return bool + */ + public function has_element_in_particular_scope( $element ) { + return $this->has_element_in_specific_scope( $element, array( + WP_HTMLAppletElement::class, + WP_HTMLCaptionElement::class, + WP_HTMLHtmlElement::class, + WP_HTMLTableElement::class, + WP_HTMLTdElement::class, + WP_HTMLThElement::class, + WP_HTMLMarqueeElement::class, + WP_HTMLObjectElement::class, + WP_HTMLTemplateElement::class, + WP_MathML_Mi_Element::class, + WP_MathML_Mo_Element::class, + WP_MathML_Mn_Element::class, + WP_MathML_Ms_Element::class, + WP_MathML_Mtext_Element::class, + WP_MathML_Annotation_Xml_Element::class, + WP_SVG_ForeignObject_Element::class, + WP_SVG_Description_Element::class, + WP_SVG_Title_Element::class, + ) ); + } + + /** + * Returns whether a given element is in list item scope. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope + * + * @param $element + * @return void + */ + public function has_element_in_list_item_scope( $element ) { + return $this->has_element_in_specific_scope( $element, array( + WP_HTMLAppletElement::class, + WP_HTMLCaptionElement::class, + WP_HTMLHtmlElement::class, + WP_HTMLTableElement::class, + WP_HTMLTdElement::class, + WP_HTMLThElement::class, + WP_HTMLMarqueeElement::class, + WP_HTMLObjectElement::class, + WP_HTMLTemplateElement::class, + WP_MathML_Mi_Element::class, + WP_MathML_Mo_Element::class, + WP_MathML_Mn_Element::class, + WP_MathML_Ms_Element::class, + WP_MathML_Mtext_Element::class, + WP_MathML_Annotation_Xml_Element::class, + WP_SVG_ForeignObject_Element::class, + WP_SVG_Description_Element::class, + WP_SVG_Title_Element::class, + + // Additionally these elements. + WP_HTMLOlElement::class, + WP_HTMLUlElement::class, + ) ); + } + + /** + * Returns whether a given element is in button scope. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope + * + * @param string $element + * @return boolean + */ + public function has_element_in_button_scope( $element ) { + return $this->has_element_in_specific_scope( $element, array( + WP_HTMLAppletElement::class, + WP_HTMLCaptionElement::class, + WP_HTMLHtmlElement::class, + WP_HTMLTableElement::class, + WP_HTMLTdElement::class, + WP_HTMLThElement::class, + WP_HTMLMarqueeElement::class, + WP_HTMLObjectElement::class, + WP_HTMLTemplateElement::class, + WP_MathML_Mi_Element::class, + WP_MathML_Mo_Element::class, + WP_MathML_Mn_Element::class, + WP_MathML_Ms_Element::class, + WP_MathML_Mtext_Element::class, + WP_MathML_Annotation_Xml_Element::class, + WP_SVG_ForeignObject_Element::class, + WP_SVG_Description_Element::class, + WP_SVG_Title_Element::class, + + // Additionally these elements. + WP_HTMLButtonElement::class, + ) ); + } + + /** + * Returns whether the given element is in table scope. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-table-scope + * + * @param string $element + * @return bool + */ + public function has_element_in_table_scope( $element ) { + return $this->has_element_in_specific_scope( $element, array( + WP_HTMLAppletElement::class, + WP_HTMLCaptionElement::class, + WP_HTMLHtmlElement::class, + WP_HTMLTableElement::class, + WP_HTMLTdElement::class, + WP_HTMLThElement::class, + WP_HTMLMarqueeElement::class, + WP_HTMLObjectElement::class, + WP_HTMLTemplateElement::class, + WP_MathML_Mi_Element::class, + WP_MathML_Mo_Element::class, + WP_MathML_Mn_Element::class, + WP_MathML_Ms_Element::class, + WP_MathML_Mtext_Element::class, + WP_MathML_Annotation_Xml_Element::class, + WP_SVG_ForeignObject_Element::class, + WP_SVG_Description_Element::class, + WP_SVG_Title_Element::class, + + // Additionally these elements. + WP_HTMLHtmlElement::class, + WP_HTMLTableElement::class, + WP_HTMLTemplateElement::class, + ) ); + } + + /** + * Returns whether a given element is in select scope. + * + * @see https://html.spec.whatwg.org/#has-an-element-in-select-scope + * + * @param string $element + * @return bool + */ + public function has_element_in_select_scope( $element ) { + return $this->has_element_in_specific_scope( $element, array( + WP_HTMLAppletElement::class, + WP_HTMLCaptionElement::class, + WP_HTMLHtmlElement::class, + WP_HTMLTableElement::class, + WP_HTMLTdElement::class, + WP_HTMLThElement::class, + WP_HTMLMarqueeElement::class, + WP_HTMLObjectElement::class, + WP_HTMLTemplateElement::class, + WP_MathML_Mi_Element::class, + WP_MathML_Mo_Element::class, + WP_MathML_Mn_Element::class, + WP_MathML_Ms_Element::class, + WP_MathML_Mtext_Element::class, + WP_MathML_Annotation_Xml_Element::class, + WP_SVG_ForeignObject_Element::class, + WP_SVG_Description_Element::class, + WP_SVG_Title_Element::class, + + // Additionally these elements. + WP_HTMLOptgroupElement::class, + WP_HTMLOptionElement::class, + ) ); + } +} diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index cd55326b9dfd6..3249ac3e07feb 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -24,12 +24,20 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return boolean Whether an element was found. */ public function step( $insertion_mode = null ) { - switch ( $insertion_mode ?: $this->insertion_mode ) { - case 'in-body': - return $this->step_in_body(); + try { + switch ( $insertion_mode ?: $this->insertion_mode ) { + case 'in-body': + return $this->step_in_body(); - default: - return self::NOT_IMPLEMENTED_YET; + default: + return self::NOT_IMPLEMENTED_YET; + } + } catch ( Exception $e ) { + /* + * Exceptions are used in this class to escape deep call stacks that + * otherwise might involve messier calling and return conventions. + */ + return false; } } @@ -215,7 +223,7 @@ private function close_p_element() { * @return void */ private function generate_implied_end_tags( $except_for_this_element = null ) { - + } /** From 0e3ada551f91fa062d3afe5cb6c6408a36f6def7 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 17 May 2023 17:25:23 +0200 Subject: [PATCH 25/39] Play with an alternate matching syntax --- .../html-api/class-wp-html-element-stack.php | 6 - .../html-api/class-wp-html-processor.php | 135 +++++++++++++++++- 2 files changed, 134 insertions(+), 7 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-element-stack.php b/src/wp-includes/html-api/class-wp-html-element-stack.php index cff94e2b76aed..3e9cd6f3fc1bd 100644 --- a/src/wp-includes/html-api/class-wp-html-element-stack.php +++ b/src/wp-includes/html-api/class-wp-html-element-stack.php @@ -8,12 +8,6 @@ class WP_HTML_Element_Stack { */ public $stack = array(); - public function __construct( $bookmark_name, $element, $flags ) { - $this->bookmark_name = $bookmark_name; - $this->element = $element; - $this->flags = $flags; - } - /** * Add an item to the top of the stack. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 3249ac3e07feb..2ffb992eace58 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1,5 +1,7 @@ 'visit' ); private $insertion_mode = 'in-body'; + /** + * @var int Unique id for creating bookmarks. + */ + private $bookmark_id = 0; + + /** + * @var WP_HTML_Element_Stack Refers to element opening tags. + */ + private $tag_openers = null; + + /** + * @var WP_HTML_Element_Stack Referes to element closing tags. + */ + private $tag_closers = null; + + /** + * Create a new HTML Processor for reading and modifying HTML structure. + * + * @param string $html Input HTML document. + */ + public function __construct( $html ) { + parent::__construct( $html ); + + $this->tag_openers = new WP_HTML_Element_Stack(); + $this->tag_closers = new WP_HTML_Element_Stack(); + } + /** * Advance the parser by one step. * @@ -70,6 +99,110 @@ private function step_in_body() { $tag_name = $this->get_tag(); $tag_type = $this->is_tag_closer() ? 'closer' : 'opener'; + $op_sigil = $this->is_tag_closer() ? '-' : '+'; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A start tag whose tag name is "html" + */ + case '+HTML': + goto ignored; + + /* + * > A start tag whose tag name is one of: "base", "basefont", "bgsound", + * > "link", "meta", "noframes", "script", "style", "template", "title" + * + * > An end tag whose tag name is "template" + */ + case '+BASE': + case '+BASEFONT': + case '+BGSOUND': + case '+LINK': + case '+META': + case '+NOFRAMES': + case '+SCRIPT': + case '+STYLE': + case '+TEMPLATE': + case '+TITLE': + case '-TEMPLATE': + parent::seek( 'current' ); + $this->insertion_mode = 'in-head'; + return $this->step(); + + /* + * > A start tag whose tag name is "body" + */ + case '+BODY': + goto ignored; + + + /* + * > A start tag whose tag name is "frameset" + */ + case '+FRAMESET': + throw new Exception( self::NOT_IMPLEMENTED_YET ); + + /* + * > An end tag whose tag name is "body" + * > An end tag whose tag name is "html" + */ + case '-BODY': + case '-HTML': + /* + * > If the stack of open elements does not have a body element in scope, this is a parse error; ignore the token. + * + * @TODO: We didn't construct an open HTML or BODY tag, but we have to make a choice here based on that. + * Probably need to create these _or_ assume this will always transfer to "after body". + */ + $this->insertion_mode = 'after-body'; + return true; + + /* + * > A start tag whose tag name is one of: "address", "article", "aside", "blockquote", "center", + * > "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", + * > "header", "hgroup", "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul" + */ + case '+ADDRESS': + case '+ARTICLE': + case '+ASIDE': + case '+BLOCKQUOTE': + case '+CENTER': + case '+DETAILS': + case '+DIALOG': + case '+DIR': + case '+DIV': + case '+DL': + case '+FIELDSET': + case '+FIGCAPTION': + case '+FIGURE': + case '+FOOTER': + case '+HEADER': + case '+HGROUP': + case '+MAIN': + case '+MENU': + case '+NAV': + case '+OL': + case '+P': + case '+SEARCH': + case '+SECTION': + case '+SUMMARY': + case '+UL': + if ( $this->has_in_scope( 'P', 'BUTTON' ) ) { + $this->close_p_element(); + } + + $this->enter_element( $tag_name ); + return; + + /* + * > An end-of-file token + * + * Stop parsing. + */ + default: + return false; + } /* * > A start tag whose tag name is "html" @@ -260,7 +393,7 @@ private function current_node() { * @return false */ private function has_in_scope( $element, $scope ) { - return self::NOT_IMPLEMENTED_YET; + throw new Exception( self::NOT_IMPLEMENTED_YET ); } public function next_tag( $query = null ) { From 739b4aa7ca1b68315fc44420ebfc2022fa556578 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 18 May 2023 01:40:56 +0200 Subject: [PATCH 26/39] Add reset_insertion_mode_appropriately --- .../class-wp-html-element-stack-item.php | 1 + .../html-api/class-wp-html-element-stack.php | 16 +- .../html-api/class-wp-html-processor.php | 404 +++++++++++++----- .../html-api/class-wp-html-spec.php | 3 + 4 files changed, 305 insertions(+), 119 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-element-stack-item.php b/src/wp-includes/html-api/class-wp-html-element-stack-item.php index a9cf9caf84b25..527b526c0e02a 100644 --- a/src/wp-includes/html-api/class-wp-html-element-stack-item.php +++ b/src/wp-includes/html-api/class-wp-html-element-stack-item.php @@ -1,6 +1,7 @@ = $this->count() ) { + return null; + } + + return $this->stack[ $this->count() - $nth_from_top - 1 ]; + } + /** * Add an item to the top of the stack. * * @TODO: Do we need to insertion-sort these? * - * @param $stack_item + * @param WP_HTML_Element_Stack_Item $stack_item * @return void */ public function push( $stack_item ) { diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 2ffb992eace58..1f03d1045d456 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1,13 +1,44 @@ 'visit' ); - private $insertion_mode = 'in-body'; /** * @var int Unique id for creating bookmarks. @@ -24,16 +55,125 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $tag_closers = null; + /** + * @var string Tree construction insertion mode. + */ + private $insertion_mode = 'initial'; + + /** + * Context node initializing HTML fragment parsing, if in that mode. + * + * @var [string, array]|null + */ + private $context_node = null; + + /** + * @TODO: Implement this. + * + * @var null + */ + private $head_element_pointer = null; + + /** + * @TODO: Implement this. + * + * @var null + */ + private $form_element_pointer = null; + + /** + * Original insertion mode when entering 'text' or 'in-table-text' modes. + * + * Not implemented yet. + * + * @var string|null + */ + private $original_insertion_mode = null; + + /** + * Stack of template insertion modes. + * + * Not implemented yet. + * + * @var null + */ + private $template_insertion_mode_stack = null; + /** * Create a new HTML Processor for reading and modifying HTML structure. * + * ## Initial mode + * + * Most invocations of the HTML parser operate in the "fragment parsing" mode, + * which assumes that the given HTML document existing within an existing HTML + * document. For example, block HTML exists within a larger document, and some + * inner block HTML might exist within a TABLE element, which holds special + * parsing rules. + * + * The parser can operate in a full parsing mode or the fragment parsing mode, + * and it's important to indicate which is necessary when creating the HTML + * processor. + * + * Example + * // Parse an entire HTML document + * $p = new WP_HTML_Processor( $html, array( 'full', WP_HTML_Processor::INITIAL ) ); + * + * // Parse a full HTML document, but inside a BODY element. E.g. when parsing `post_content`. + * $p = new WP_HTML_Processor( $html, array( 'full', WP_HTML_Processor::IN_BODY ) ); + * + * // Parse a chunk of HTML provided inside a post's block content. + * $p = new WP_HTML_Processor( $html, array( 'fragment', '' ) ); + * + * // Parse a chunk of HTML provided inside a post's block content, using the default initial mode. + * $p = new WP_HTML_Processor( $html ); + * + * // Parse a chunk of HTML known to exist within a TEXTAREA element. E.g. when parsing code input. + * $p = new WP_HTML_Processor( $html, array( 'fragment', '