@@ -1385,149 +1385,49 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe
13851385 * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`).
13861386 *
13871387 * @since 1.0.0
1388+ * @since 6.9.0 Rebuilt on HTML API
13881389 *
13891390 * @param string $attr Attribute list from HTML element to closing HTML element tag.
13901391 * @param string[] $allowed_protocols Array of allowed URL protocols.
13911392 * @return array[] Array of attribute information after parsing.
13921393 */
13931394function wp_kses_hair ( $ attr , $ allowed_protocols ) {
1394- $ attrarr = array ();
1395- $ mode = 0 ;
1396- $ attrname = '' ;
1397- $ uris = wp_kses_uri_attributes ();
1395+ $ attributes = array ();
1396+ $ uris = wp_kses_uri_attributes ();
13981397
13991398 // Loop through the whole attribute list.
14001399
1401- while ( strlen ( $ attr ) !== 0 ) {
1402- $ working = 0 ; // Was the last operation successful?
1400+ $ processor = new WP_HTML_Tag_Processor ( " <wp { $ attr} > " );
1401+ $ processor -> next_token ();
14031402
1404- switch ( $ mode ) {
1405- case 0 :
1406- if ( preg_match ( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/ ' , $ attr , $ match ) ) {
1407- $ attrname = $ match [1 ];
1408- $ working = 1 ;
1409- $ mode = 1 ;
1410- $ attr = preg_replace ( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/ ' , '' , $ attr );
1411- }
1412-
1413- break ;
1414-
1415- case 1 :
1416- if ( preg_match ( '/^\s*=\s*/ ' , $ attr ) ) { // Equals sign.
1417- $ working = 1 ;
1418- $ mode = 2 ;
1419- $ attr = preg_replace ( '/^\s*=\s*/ ' , '' , $ attr );
1420- break ;
1421- }
1422-
1423- if ( preg_match ( '/^\s+/ ' , $ attr ) ) { // Valueless.
1424- $ working = 1 ;
1425- $ mode = 0 ;
1426-
1427- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1428- $ attrarr [ $ attrname ] = array (
1429- 'name ' => $ attrname ,
1430- 'value ' => '' ,
1431- 'whole ' => $ attrname ,
1432- 'vless ' => 'y ' ,
1433- );
1434- }
1435-
1436- $ attr = preg_replace ( '/^\s+/ ' , '' , $ attr );
1437- }
1438-
1439- break ;
1440-
1441- case 2 :
1442- if ( preg_match ( '%^"([^"]*)"(\s+|/?$)% ' , $ attr , $ match ) ) {
1443- // "value"
1444- $ thisval = $ match [1 ];
1445- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1446- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1447- }
1448-
1449- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1450- $ attrarr [ $ attrname ] = array (
1451- 'name ' => $ attrname ,
1452- 'value ' => $ thisval ,
1453- 'whole ' => "$ attrname= \"$ thisval \"" ,
1454- 'vless ' => 'n ' ,
1455- );
1456- }
1457-
1458- $ working = 1 ;
1459- $ mode = 0 ;
1460- $ attr = preg_replace ( '/^"[^"]*"(\s+|$)/ ' , '' , $ attr );
1461- break ;
1462- }
1463-
1464- if ( preg_match ( "%^'([^']*)'(\s+|/?$)% " , $ attr , $ match ) ) {
1465- // 'value'
1466- $ thisval = $ match [1 ];
1467- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1468- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1469- }
1470-
1471- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1472- $ attrarr [ $ attrname ] = array (
1473- 'name ' => $ attrname ,
1474- 'value ' => $ thisval ,
1475- 'whole ' => "$ attrname=' $ thisval' " ,
1476- 'vless ' => 'n ' ,
1477- );
1478- }
1479-
1480- $ working = 1 ;
1481- $ mode = 0 ;
1482- $ attr = preg_replace ( "/^'[^']*'(\s+|$)/ " , '' , $ attr );
1483- break ;
1484- }
1485-
1486- if ( preg_match ( "%^([^\s \"']+)(\s+|/?$)% " , $ attr , $ match ) ) {
1487- // value
1488- $ thisval = $ match [1 ];
1489- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1490- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1491- }
1492-
1493- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1494- $ attrarr [ $ attrname ] = array (
1495- 'name ' => $ attrname ,
1496- 'value ' => $ thisval ,
1497- 'whole ' => "$ attrname= \"$ thisval \"" ,
1498- 'vless ' => 'n ' ,
1499- );
1500- }
1501-
1502- // We add quotes to conform to W3C's HTML spec.
1503- $ working = 1 ;
1504- $ mode = 0 ;
1505- $ attr = preg_replace ( "%^[^\s \"']+(\s+|$)% " , '' , $ attr );
1506- }
1403+ foreach ( $ processor ->get_attribute_names_with_prefix ( '' ) as $ name ) {
1404+ $ value = $ processor ->get_attribute ( $ name );
1405+ $ is_bool = true === $ value ;
1406+ if ( is_string ( $ value ) && in_array ( $ name , $ uris , true ) ) {
1407+ $ value = wp_kses_bad_protocol ( $ value , $ allowed_protocols );
1408+ }
15071409
1508- break ;
1509- } // End switch.
1410+ // Reconstruct and normalize the attribute value.
1411+ $ syntax_characters = array (
1412+ '& ' => '& ' ,
1413+ '< ' => '< ' ,
1414+ '> ' => '> ' ,
1415+ "' " => '' ' ,
1416+ '" ' => '" ' ,
1417+ );
15101418
1511- if ( 0 === $ working ) { // Not well-formed, remove and try again.
1512- $ attr = wp_kses_html_error ( $ attr );
1513- $ mode = 0 ;
1514- }
1515- } // End while.
1419+ $ recoded = $ is_bool ? '' : strtr ( $ value , $ syntax_characters );
1420+ $ whole = $ is_bool ? $ name : "{$ name }= \"{$ recoded }\"" ;
15161421
1517- if ( 1 === $ mode && false === array_key_exists ( $ attrname , $ attrarr ) ) {
1518- /*
1519- * Special case, for when the attribute list ends with a valueless
1520- * attribute like "selected".
1521- */
1522- $ attrarr [ $ attrname ] = array (
1523- 'name ' => $ attrname ,
1524- 'value ' => '' ,
1525- 'whole ' => $ attrname ,
1526- 'vless ' => 'y ' ,
1422+ $ attributes [] = array (
1423+ 'name ' => $ name ,
1424+ 'value ' => $ recoded ,
1425+ 'whole ' => $ whole ,
1426+ 'vless ' => $ is_bool ,
15271427 );
15281428 }
15291429
1530- return $ attrarr ;
1430+ return $ attributes ;
15311431}
15321432
15331433/**
0 commit comments