diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 234d71a2a175a..e2d02d1b8f966 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -3220,34 +3220,81 @@ function _split_str_by_whitespace( $text, $goal ) { * @return string HTML A element with the added rel attribute. */ function wp_rel_callback( $matches, $rel ) { - $text = $matches[1]; - $atts = wp_kses_hair( $matches[1], wp_allowed_protocols() ); + _deprecated_function( + __FUNCTION__, + '{WP_VERSION}', + 'wp_include_in_all_a_rel()' + ); + return wp_include_in_all_a_rel( $matches[0], $rel ); +} - if ( ! empty( $atts['href'] ) && wp_is_internal_link( $atts['href']['value'] ) ) { - $rel = trim( str_replace( 'nofollow', '', $rel ) ); +/** + * Ensures that all A elements in the given HTML contain + * the provided and unique “rel” keywords. + * + * Example: + * + * `` === wp_include_in_all_a_rel( '', 'nofollow' ); + * `` === wp_include_in_all_a_rel( '', 'nofollow' ); + * `` === wp_include_in_all_a_rel( '', 'nofollow' ); + * `` === wp_include_in_all_a_rel( '`, 'a a a b b c' ); + * + * @since {WP_VERSION} + * + * @param string $html Add the given `rel` keywords to every `A` tag in this HTML. + * @param string $space_separated_rel_keywords Each of these keywords will be present in the final HTML. + * @return string Modified HTML with all `A` tags containing the given `rel` keywords. + */ +function wp_include_in_all_a_rel( $html, $space_separated_rel_keywords ) { + if ( empty( $html ) || empty( $space_separated_rel_keywords ) ) { + return $html; } - if ( ! empty( $atts['rel'] ) ) { - $parts = array_map( 'trim', explode( ' ', $atts['rel']['value'] ) ); - $rel_array = array_map( 'trim', explode( ' ', $rel ) ); - $parts = array_unique( array_merge( $parts, $rel_array ) ); - $rel = implode( ' ', $parts ); - unset( $atts['rel'] ); + /* + * It’s not necessary to add the `nofollow` guard to internal links; + * these are used to only check and remove `nofollow` when adding it. + */ + $without_nofollow = $space_separated_rel_keywords; + $adding_no_follow = false; - $html = ''; - foreach ( $atts as $name => $value ) { - if ( isset( $value['vless'] ) && 'y' === $value['vless'] ) { - $html .= $name . ' '; + /* + * Although this could falsely match on longer tokens like `nofollowers`, + * it’s safe to check generously since the parsing will ensure that only + * `nofollow` is removed; only a bit of unnecessary processing will occur. + */ + if ( str_contains( $without_nofollow, 'nofollow' ) ) { + $tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $without_nofollow ); + $without_nofollow = ''; + + foreach ( $tokens as $token ) { + if ( 'nofollow' === $token ) { + $adding_no_follow = true; } else { - $html .= "{$name}=\"" . esc_attr( $value['value'] ) . '" '; + $without_nofollow .= " {$token}"; } } - $text = trim( $html ); } - $rel_attr = $rel ? ' rel="' . esc_attr( $rel ) . '"' : ''; + // Update the `rel` attributes in every `A` element. + $processor = new WP_HTML_Tag_Processor( $html ); + while ( $processor->next_tag( 'A' ) ) { + $rel = $processor->get_attribute( 'rel' ); + $rel = is_string( $rel ) ? $rel : ''; - return ""; + $href = $adding_no_follow ? $processor->get_attribute( 'href' ) : null; + $skip_nofollow = is_string( $href ) && wp_is_internal_link( $href ); + + $combined = $skip_nofollow + ? "{$rel} {$without_nofollow}" + : "{$rel} {$space_separated_rel_keywords}"; + + $tokens = WP_HTML_Attribute::from_unordered_set_of_space_separated_tokens( $combined ); + $new_rel = empty( $tokens ) ? false : implode( ' ', $tokens ); + + $processor->set_attribute( 'rel', $new_rel ); + } + + return $processor->get_updated_html(); } /** @@ -3261,13 +3308,7 @@ function wp_rel_callback( $matches, $rel ) { function wp_rel_nofollow( $text ) { // This is a pre-save filter, so text is already escaped. $text = stripslashes( $text ); - $text = preg_replace_callback( - '||i', - static function ( $matches ) { - return wp_rel_callback( $matches, 'nofollow' ); - }, - $text - ); + $text = wp_include_in_all_a_rel( $text, 'nofollow' ); return wp_slash( $text ); } @@ -3281,6 +3322,11 @@ static function ( $matches ) { * @return string HTML A Element with `rel="nofollow"`. */ function wp_rel_nofollow_callback( $matches ) { + _deprecated_function( + __FUNCTION__, + '{WP_VERSION}', + 'wp_include_in_all_a_rel()' + ); return wp_rel_callback( $matches, 'nofollow' ); } @@ -3295,13 +3341,7 @@ function wp_rel_nofollow_callback( $matches ) { function wp_rel_ugc( $text ) { // This is a pre-save filter, so text is already escaped. $text = stripslashes( $text ); - $text = preg_replace_callback( - '||i', - static function ( $matches ) { - return wp_rel_callback( $matches, 'nofollow ugc' ); - }, - $text - ); + $text = wp_include_in_all_a_rel( $text, 'nofollow ugc' ); return wp_slash( $text ); } diff --git a/src/wp-includes/html-api/class-wp-html-attribute.php b/src/wp-includes/html-api/class-wp-html-attribute.php new file mode 100644 index 0000000000000..fad2aadd09782 --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-attribute.php @@ -0,0 +1,64 @@ + A set of space-separated tokens is a string containing zero or more + * > words (known as tokens) separated by one or more ASCII whitespace, + * > where words consist of any string of one or more characters, none + * > of which are ASCII whitespace. + * + * > An unordered set of unique space-separated tokens is a set of + * > space-separated tokens where none of the tokens are duplicated. + * + * > How tokens in a set of space-separated tokens are to be compared + * > (e.g. case-sensitively or not) is defined on a per-set basis. + * + * @see https://html.spec.whatwg.org/#unordered-set-of-unique-space-separated-tokens + * + * @since {WP_VERSION} + * + * @param string $attribute_value HTML-decoded attribute value to parse. + * @param string $case_sensitivity Optional. Constrain uniqueness with 'case-sensitive' + * or 'case-insensitive'. Default 'case-sensitive'. + * @return string[] Set of unique tokens parsed from attribute value. + */ + public static function from_unordered_set_of_space_separated_tokens( $attribute_value, $case_sensitivity = 'case-sensitive' ) { + if ( empty( $attribute_value ) ) { + return array(); + } + + if ( 'case-insensitive' === $case_sensitivity ) { + $attribute_value = strtolower( $attribute_value ); + } + + $tokens = array(); + $uniques = ' '; + $at = 0; + $end = strlen( $attribute_value ); + while ( $at < $end ) { + $at += strspn( $attribute_value, " \t\f\r\n", $at ); + + $word_length = strcspn( $attribute_value, " \t\f\r\n", $at ); + $word = substr( $attribute_value, $at, $word_length ); + + if ( 0 < $word_length && ! str_contains( $uniques, " {$word} " ) ) { + $uniques .= "{$word} "; + $tokens[] = $word; + } + + $at += $word_length; + } + + return $tokens; + } +} diff --git a/src/wp-settings.php b/src/wp-settings.php index 60ffc307c5f6e..1f9f7aad0d4ed 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -266,6 +266,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-stack-event.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; +require ABSPATH . WPINC . '/html-api/class-wp-html-attribute.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; require ABSPATH . WPINC . '/class-wp-http-curl.php';