Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 72 additions & 32 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -3220,34 +3220,81 @@ function _split_str_by_whitespace( $text, $goal ) {
* @return string HTML A element with the added rel attribute.
*/
function wp_rel_callback( $matches, $rel ) {
$text = $matches[1];
$atts = wp_kses_hair( $matches[1], wp_allowed_protocols() );
_deprecated_function(
__FUNCTION__,
'{WP_VERSION}',
'wp_include_in_all_a_rel()'
);
return wp_include_in_all_a_rel( $matches[0], $rel );
}

if ( ! empty( $atts['href'] ) && wp_is_internal_link( $atts['href']['value'] ) ) {
$rel = trim( str_replace( 'nofollow', '', $rel ) );
/**
* Ensures that all A elements in the given HTML contain
* the provided and unique “rel” keywords.
*
* Example:
*
* `<a rel="nofollow">` === wp_include_in_all_a_rel( '<a>', 'nofollow' );
* `<a rel="nofollow">` === wp_include_in_all_a_rel( '<a rel="nofollow">', 'nofollow' );
* `<a rel="pingback nofollow">` === wp_include_in_all_a_rel( '<a rel="pingback">', 'nofollow' );
* `<a rel="a b c">` === wp_include_in_all_a_rel( '<a rel="a a a">`, 'a a a b b c' );
*
* @since {WP_VERSION}
*
* @param string $html Add the given `rel` keywords to every `A` tag in this HTML.
* @param string $space_separated_rel_keywords Each of these keywords will be present in the final HTML.
* @return string Modified HTML with all `A` tags containing the given `rel` keywords.
*/
function wp_include_in_all_a_rel( $html, $space_separated_rel_keywords ) {
if ( empty( $html ) || empty( $space_separated_rel_keywords ) ) {
return $html;
}

if ( ! empty( $atts['rel'] ) ) {
$parts = array_map( 'trim', explode( ' ', $atts['rel']['value'] ) );
$rel_array = array_map( 'trim', explode( ' ', $rel ) );
$parts = array_unique( array_merge( $parts, $rel_array ) );
$rel = implode( ' ', $parts );
unset( $atts['rel'] );
/*
* It’s not necessary to add the `nofollow` guard to internal links;
* these are used to only check and remove `nofollow` when adding it.
*/
$without_nofollow = $space_separated_rel_keywords;
$adding_no_follow = false;

$html = '';
foreach ( $atts as $name => $value ) {
if ( isset( $value['vless'] ) && 'y' === $value['vless'] ) {
$html .= $name . ' ';
/*
* Although this could falsely match on longer tokens like `nofollowers`,
* it’s safe to check generously since the parsing will ensure that only
* `nofollow` is removed; only a bit of unnecessary processing will occur.
*/
if ( str_contains( $without_nofollow, 'nofollow' ) ) {
$tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $without_nofollow );
$without_nofollow = '';

foreach ( $tokens as $token ) {
if ( 'nofollow' === $token ) {
$adding_no_follow = true;
} else {
$html .= "{$name}=\"" . esc_attr( $value['value'] ) . '" ';
$without_nofollow .= " {$token}";
}
}
$text = trim( $html );
}

$rel_attr = $rel ? ' rel="' . esc_attr( $rel ) . '"' : '';
// Update the `rel` attributes in every `A` element.
$processor = new WP_HTML_Tag_Processor( $html );
while ( $processor->next_tag( 'A' ) ) {
$rel = $processor->get_attribute( 'rel' );
$rel = is_string( $rel ) ? $rel : '';

return "<a {$text}{$rel_attr}>";
$href = $adding_no_follow ? $processor->get_attribute( 'href' ) : null;
$skip_nofollow = is_string( $href ) && wp_is_internal_link( $href );

$combined = $skip_nofollow
? "{$rel} {$without_nofollow}"
: "{$rel} {$space_separated_rel_keywords}";

$tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $combined );
$new_rel = empty( $tokens ) ? false : implode( ' ', $tokens );

$processor->set_attribute( 'rel', $new_rel );
}

return $processor->get_updated_html();
}

/**
Expand All @@ -3261,13 +3308,7 @@ function wp_rel_callback( $matches, $rel ) {
function wp_rel_nofollow( $text ) {
// This is a pre-save filter, so text is already escaped.
$text = stripslashes( $text );
$text = preg_replace_callback(
'|<a (.+?)>|i',
static function ( $matches ) {
return wp_rel_callback( $matches, 'nofollow' );
},
$text
);
$text = wp_include_in_all_a_rel( $text, 'nofollow' );
return wp_slash( $text );
}

Expand All @@ -3281,6 +3322,11 @@ static function ( $matches ) {
* @return string HTML A Element with `rel="nofollow"`.
*/
function wp_rel_nofollow_callback( $matches ) {
_deprecated_function(
__FUNCTION__,
'{WP_VERSION}',
'wp_include_in_all_a_rel()'
);
return wp_rel_callback( $matches, 'nofollow' );
}

Expand All @@ -3295,13 +3341,7 @@ function wp_rel_nofollow_callback( $matches ) {
function wp_rel_ugc( $text ) {
// This is a pre-save filter, so text is already escaped.
$text = stripslashes( $text );
$text = preg_replace_callback(
'|<a (.+?)>|i',
static function ( $matches ) {
return wp_rel_callback( $matches, 'nofollow ugc' );
},
$text
);
$text = wp_include_in_all_a_rel( $text, 'nofollow ugc' );
return wp_slash( $text );
}

Expand Down
64 changes: 64 additions & 0 deletions src/wp-includes/html-api/class-wp-html-attribute.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<?php

class WP_HTML_Attribute {
/**
* Parses and returns an unordered set of space-separated tokens.
*
* Tokens in the returned array appear in the same order as they are uniquely
* found in the given attribute value string. When case-insensitive, output
* tokens will all be ASCII lowercase.
*
* Example:
*
* array( 'a', 'b', 'c' ) === WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( "a b a\t\nc" );
*
* > A set of space-separated tokens is a string containing zero or more
* > words (known as tokens) separated by one or more ASCII whitespace,
* > where words consist of any string of one or more characters, none
* > of which are ASCII whitespace.
*
* > An unordered set of unique space-separated tokens is a set of
* > space-separated tokens where none of the tokens are duplicated.
*
* > How tokens in a set of space-separated tokens are to be compared
* > (e.g. case-sensitively or not) is defined on a per-set basis.
*
* @see https://html.spec.whatwg.org/#unordered-set-of-unique-space-separated-tokens
*
* @since {WP_VERSION}
*
* @param string $attribute_value HTML-decoded attribute value to parse.
* @param string $case_sensitivity Optional. Constrain uniqueness with 'case-sensitive'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious why this is a string param and not a boolean?

If going with a string, then this could use 'case-sensitive'|'case-insensitive' as the type instead of string, in alignment with the proposal to adopt PHPStan. This would add static type checking for bad string values. If string values are used as well, maybe they should be added as constants to the class so that the literals aren't passed around everywhere.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they are there for discoverability in the code, plus I believe that in a very inconsequential amount the checking of string equality is faster than even booleans, because PHP short-circuits casting

100% it’s there because I find boolean parameters opaque and string parameters are explicit.

happy to update to use the string values in the types, but I thought WPCS nags yapped at me in the past because I tried that and it wanted string instead

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, performance is not my concern here. I'm more concerned with typos, and the DX of having to type in an exact string and what happens if you get it wrong.

Alternatively, this could take an $options array which has a case_sensitive key with a boolean value. This would seem more WordPressy and would allow for more flexibility in the future to add more options without adding additional positional params.

* or 'case-insensitive'. Default 'case-sensitive'.
* @return string[] Set of unique tokens parsed from attribute value.
*/
public static function from_unordered_set_of_space_separated_tokens( $attribute_value, $case_sensitivity = 'case-sensitive' ) {
if ( empty( $attribute_value ) ) {
return array();
}

if ( 'case-insensitive' === $case_sensitivity ) {
$attribute_value = strtolower( $attribute_value );
}

$tokens = array();
$uniques = ' ';
$at = 0;
$end = strlen( $attribute_value );
while ( $at < $end ) {
$at += strspn( $attribute_value, " \t\f\r\n", $at );

$word_length = strcspn( $attribute_value, " \t\f\r\n", $at );
$word = substr( $attribute_value, $at, $word_length );

if ( 0 < $word_length && ! str_contains( $uniques, " {$word} " ) ) {
$uniques .= "{$word} ";
$tokens[] = $word;
}

$at += $word_length;
}

return $tokens;
}
}
1 change: 1 addition & 0 deletions src/wp-settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@
require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-attribute.php';
require ABSPATH . WPINC . '/class-wp-http.php';
require ABSPATH . WPINC . '/class-wp-http-streams.php';
require ABSPATH . WPINC . '/class-wp-http-curl.php';
Expand Down
Loading