From e0985c17ccc6e6dabb58048bf75b237e78235e36 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 7 Mar 2023 19:45:42 -0700 Subject: [PATCH 1/5] WIP: Explore using Tag Processor for wp_kses_hair --- src/wp-includes/kses.php | 193 ++++++++------------------------------- 1 file changed, 37 insertions(+), 156 deletions(-) diff --git a/src/wp-includes/kses.php b/src/wp-includes/kses.php index 7eeadca569944..a8356ac44418d 100644 --- a/src/wp-includes/kses.php +++ b/src/wp-includes/kses.php @@ -1329,143 +1329,32 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe * @return array[] Array of attribute information after parsing. */ function wp_kses_hair( $attr, $allowed_protocols ) { - $attrarr = array(); - $mode = 0; - $attrname = ''; - $uris = wp_kses_uri_attributes(); - - // Loop through the whole attribute list. - - while ( strlen( $attr ) !== 0 ) { - $working = 0; // Was the last operation successful? - - switch ( $mode ) { - case 0: - if ( preg_match( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/', $attr, $match ) ) { - $attrname = $match[1]; - $working = 1; - $mode = 1; - $attr = preg_replace( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/', '', $attr ); - } - - break; - - case 1: - if ( preg_match( '/^\s*=\s*/', $attr ) ) { // Equals sign. - $working = 1; - $mode = 2; - $attr = preg_replace( '/^\s*=\s*/', '', $attr ); - break; - } + $attributes = array(); + $uris = wp_kses_uri_attributes(); + $p = new WP_HTML_Tag_Processor( "" ); - if ( preg_match( '/^\s+/', $attr ) ) { // Valueless. - $working = 1; - $mode = 0; + // Parse the attributes. + $p->next_tag(); - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => '', - 'whole' => $attrname, - 'vless' => 'y', - ); - } + foreach ( $p->get_attribute_names_with_prefix( '' ) as $name ) { + $value = $p->get_attribute( $name ); + $is_boolean = true === $value; - $attr = preg_replace( '/^\s+/', '', $attr ); - } - - break; - - case 2: - if ( preg_match( '%^"([^"]*)"(\s+|/?$)%', $attr, $match ) ) { - // "value" - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname=\"$thisval\"", - 'vless' => 'n', - ); - } - - $working = 1; - $mode = 0; - $attr = preg_replace( '/^"[^"]*"(\s+|$)/', '', $attr ); - break; - } - - if ( preg_match( "%^'([^']*)'(\s+|/?$)%", $attr, $match ) ) { - // 'value' - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname='$thisval'", - 'vless' => 'n', - ); - } - - $working = 1; - $mode = 0; - $attr = preg_replace( "/^'[^']*'(\s+|$)/", '', $attr ); - break; - } - - if ( preg_match( "%^([^\s\"']+)(\s+|/?$)%", $attr, $match ) ) { - // value - $thisval = $match[1]; - if ( in_array( strtolower( $attrname ), $uris, true ) ) { - $thisval = wp_kses_bad_protocol( $thisval, $allowed_protocols ); - } - - if ( false === array_key_exists( $attrname, $attrarr ) ) { - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => $thisval, - 'whole' => "$attrname=\"$thisval\"", - 'vless' => 'n', - ); - } - - // We add quotes to conform to W3C's HTML spec. - $working = 1; - $mode = 0; - $attr = preg_replace( "%^[^\s\"']+(\s+|$)%", '', $attr ); - } - - break; - } // End switch. - - if ( 0 === $working ) { // Not well-formed, remove and try again. - $attr = wp_kses_html_error( $attr ); - $mode = 0; + if ( ! $is_boolean && in_array( $name, $uris, true ) ) { + $value = wp_kses_bad_protocol( $value, $allowed_protocols ); } - } // End while. - if ( 1 === $mode && false === array_key_exists( $attrname, $attrarr ) ) { - /* - * Special case, for when the attribute list ends with a valueless - * attribute like "selected". - */ - $attrarr[ $attrname ] = array( - 'name' => $attrname, - 'value' => '', - 'whole' => $attrname, - 'vless' => 'y', + $value = esc_attr( $value ); + + $attributes[ $name ] = array( + 'name' => $name, + 'value' => $is_boolean ? '' : $value, + 'whole' => $is_boolean ? $name : "{$name}=\"{$value}\"", + 'vless' => $is_boolean, ); } - return $attrarr; + return $attributes; } /** @@ -1481,41 +1370,30 @@ function wp_kses_hair( $attr, $allowed_protocols ) { * @return array|false List of attributes found in the element. Returns false on failure. */ function wp_kses_attr_parse( $element ) { - $valid = preg_match( '%^(<\s*)(/\s*)?([a-zA-Z0-9]+\s*)([^>]*)(>?)$%', $element, $matches ); - if ( 1 !== $valid ) { - return false; - } - - $begin = $matches[1]; - $slash = $matches[2]; - $elname = $matches[3]; - $attr = $matches[4]; - $end = $matches[5]; + $attributes = array(); + $p = new WP_HTML_Tag_Processor( $element ); - if ( '' !== $slash ) { - // Closing elements do not get parsed. + if ( ! $p->next_tag() ) { return false; } - // Is there a closing XHTML slash at the end of the attributes? - if ( 1 === preg_match( '%\s*/\s*$%', $attr, $matches ) ) { - $xhtml_slash = $matches[0]; - $attr = substr( $attr, 0, -strlen( $xhtml_slash ) ); - } else { - $xhtml_slash = ''; - } + $tag_name = strtolower( $p->get_tag() ); + $attributes[] = "<{$tag_name} "; - // Split it. - $attrarr = wp_kses_hair_parse( $attr ); - if ( false === $attrarr ) { - return false; + foreach ( $p->get_attribute_names_with_prefix( '' ) as $name ) { + $value = $p->get_attribute( $name ); + + if ( true === $value ) { + $attributes[] = $name; + } else { + $value = esc_attr( $value ); + $attributes[] = "$name=\"{$value}\""; + } } - // Make sure all input is returned by adding front and back matter. - array_unshift( $attrarr, $begin . $slash . $elname ); - array_push( $attrarr, $xhtml_slash . $end ); + $attributes[] = '>'; - return $attrarr; + return $attributes; } /** @@ -1528,6 +1406,8 @@ function wp_kses_attr_parse( $element ) { * * @since 4.2.3 * + * @deprecated 6.3.0 no longer used as a helper function. + * * @param string $attr Attribute list from HTML element to closing HTML element tag. * @return array|false List of attributes found in $attr. Returns false on failure. */ @@ -1795,6 +1675,7 @@ function wp_kses_array_lc( $inarray ) { * but it deals with quotes and apostrophes as well. * * @since 1.0.0 + * @deprecated 6.3.0 no longer needed as a helper function * * @param string $attr * @return string From b5afba80eda67557c82bbb570562be1f589568e6 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 5 Sep 2023 15:32:41 -0700 Subject: [PATCH 2/5] Compare equal markup instead of equal strings --- tests/phpunit/tests/kses.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/kses.php b/tests/phpunit/tests/kses.php index a19df38626a7a..5c2d9881757e4 100644 --- a/tests/phpunit/tests/kses.php +++ b/tests/phpunit/tests/kses.php @@ -17,7 +17,7 @@ class Tests_Kses extends WP_UnitTestCase { public function test_wp_filter_post_kses_address( $content, $expected ) { global $allowedposttags; - $this->assertSame( $expected, wp_kses( $content, $allowedposttags ) ); + $this->assertEqualMarkup( $expected, wp_kses( $content, $allowedposttags ) ); } /** From 278ea28402b5620da1fcaf576ac765b60e9514be Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 5 Sep 2023 15:34:03 -0700 Subject: [PATCH 3/5] Trim extra space from parsed attributes --- tests/phpunit/tests/kses.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/phpunit/tests/kses.php b/tests/phpunit/tests/kses.php index 5c2d9881757e4..f2ead4e89c472 100644 --- a/tests/phpunit/tests/kses.php +++ b/tests/phpunit/tests/kses.php @@ -725,7 +725,8 @@ public function data_hair_parse() { * @dataProvider data_attr_parse */ public function test_attr_parse( $input, $output ) { - return $this->assertSame( $output, wp_kses_attr_parse( $input ) ); + // @TODO: Why has this test been hard-encoding the spaces at the end of the attribute? + return $this->assertSame( $output, trim( wp_kses_attr_parse( $input ) ) ); } public function data_attr_parse() { From ff37898b8e5c262311fada710d4af618ed498b98 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 5 Sep 2023 16:39:28 -0700 Subject: [PATCH 4/5] Add STYLE element to skip --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 2e84b3d7193a0..3aaf120213359 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -575,7 +575,7 @@ public function next_tag( $query = null ) { $this->bytes_already_parsed = strlen( $this->html ); return false; } elseif ( - ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) && + ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name || 'STYLE' === $tag_name ) && ! $this->skip_rcdata( $tag_name ) ) { $this->bytes_already_parsed = strlen( $this->html ); From ca096d2cc74faadad3973e7140354dcd62e25401 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Tue, 5 Sep 2023 17:33:34 -0700 Subject: [PATCH 5/5] More assertEqualMarkup --- tests/phpunit/tests/kses.php | 10 +++++----- tests/phpunit/tests/oembed/filterResult.php | 2 +- tests/phpunit/tests/post/filtering.php | 4 ++-- tests/phpunit/tests/shortcode.php | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/phpunit/tests/kses.php b/tests/phpunit/tests/kses.php index f2ead4e89c472..b0b53cf24ae70 100644 --- a/tests/phpunit/tests/kses.php +++ b/tests/phpunit/tests/kses.php @@ -65,7 +65,7 @@ public function data_wp_filter_post_kses_address() { public function test_wp_filter_post_kses_a( $content, $expected ) { global $allowedposttags; - $this->assertSame( $expected, wp_kses( $content, $allowedposttags ) ); + $this->assertEqualMarkup( $expected, wp_kses( $content, $allowedposttags ) ); } /** @@ -171,7 +171,7 @@ public function data_wp_kses_video() { public function test_wp_filter_post_kses_abbr( $content, $expected ) { global $allowedposttags; - $this->assertSame( $expected, wp_kses( $content, $allowedposttags ) ); + $this->assertEqualMarkup( $expected, wp_kses( $content, $allowedposttags ) ); } /** @@ -1712,7 +1712,7 @@ public function test_wp_kses_main_tag_standard_attributes() { * @param string $expected The expected result from KSES. */ public function test_wp_kses_object_tag_allowed( $html, $expected ) { - $this->assertSame( $expected, wp_kses_post( $html ) ); + $this->assertEqualMarkup( $expected, wp_kses_post( $html ) ); } /** @@ -1913,7 +1913,7 @@ public function filter_wp_kses_object_added_in_html_filter( $tags, $context ) { * @param array $allowed_html The allowed HTML to pass to KSES. */ public function test_wp_kses_allowed_values_list( $content, $expected, $allowed_html ) { - $this->assertSame( $expected, wp_kses( $content, $allowed_html ) ); + $this->assertEqualMarkup( $expected, wp_kses( $content, $allowed_html ) ); } /** @@ -1971,7 +1971,7 @@ static function ( $datum ) { * @param array $allowed_html The allowed HTML to pass to KSES. */ public function test_wp_kses_required_attribute( $content, $expected, $allowed_html ) { - $this->assertSame( $expected, wp_kses( $content, $allowed_html ) ); + $this->assertEqualMarkup( $expected, wp_kses( $content, $allowed_html ) ); } /** diff --git a/tests/phpunit/tests/oembed/filterResult.php b/tests/phpunit/tests/oembed/filterResult.php index 543d336cb9a41..fb21832679d02 100644 --- a/tests/phpunit/tests/oembed/filterResult.php +++ b/tests/phpunit/tests/oembed/filterResult.php @@ -124,7 +124,7 @@ public function test_wp_filter_pre_oembed_custom_result( $html, $expected ) { 'html' => $html, ); $actual = _wp_oembed_get_object()->data2html( $data, 'https://untrusted.localhost' ); - $this->assertSame( $expected, $actual ); + $this->assertEqualMarkup( $expected, $actual ); } /** diff --git a/tests/phpunit/tests/post/filtering.php b/tests/phpunit/tests/post/filtering.php index c4d393be22ce6..4dd907d9a6559 100644 --- a/tests/phpunit/tests/post/filtering.php +++ b/tests/phpunit/tests/post/filtering.php @@ -70,7 +70,7 @@ public function test_post_content_disallowed_attr() { $id = self::factory()->post->create( array( 'post_content' => $content ) ); $post = get_post( $id ); - $this->assertSame( $expected, $post->post_content ); + $this->assertEqualMarkup( $expected, $post->post_content ); } /** @@ -90,7 +90,7 @@ public function test_post_content_xhtml_empty_elem() { $id = self::factory()->post->create( array( 'post_content' => $content ) ); $post = get_post( $id ); - $this->assertSame( $expected, $post->post_content ); + $this->assertEqualMarkup( $expected, $post->post_content ); } // Make sure unbalanced tags are untouched when the balance option is off. diff --git a/tests/phpunit/tests/shortcode.php b/tests/phpunit/tests/shortcode.php index 1c19cfb42d046..723b108b06449 100644 --- a/tests/phpunit/tests/shortcode.php +++ b/tests/phpunit/tests/shortcode.php @@ -532,7 +532,7 @@ public function test_spaces_around_shortcodes() { * @dataProvider data_escaping */ public function test_escaping( $input, $output ) { - return $this->assertSame( $output, do_shortcode( $input ) ); + return $this->assertEqualMarkup( $output, do_shortcode( $input ) ); } public function data_escaping() {