@@ -1416,149 +1416,50 @@ function wp_kses_attr_check( &$name, &$value, &$whole, $vless, $element, $allowe
14161416 * attribute defined first (`foo='bar' foo='baz'` will result in `foo='bar'`).
14171417 *
14181418 * @since 1.0.0
1419+ * @since 6.9.0 Rebuilt on HTML API
14191420 *
14201421 * @param string $attr Attribute list from HTML element to closing HTML element tag.
14211422 * @param string[] $allowed_protocols Array of allowed URL protocols.
14221423 * @return array[] Array of attribute information after parsing.
14231424 */
14241425function wp_kses_hair ( $ attr , $ allowed_protocols ) {
1425- $ attrarr = array ();
1426- $ mode = 0 ;
1427- $ attrname = '' ;
1428- $ uris = wp_kses_uri_attributes ();
1426+ $ attributes = array ();
1427+ $ uris = wp_kses_uri_attributes ();
14291428
14301429 // Loop through the whole attribute list.
14311430
1432- while ( strlen ( $ attr ) !== 0 ) {
1433- $ working = 0 ; // Was the last operation successful?
1431+ $ processor = new WP_HTML_Tag_Processor ( " <wp { $ attr} > " );
1432+ $ processor -> next_token ();
14341433
1435- switch ( $ mode ) {
1436- case 0 :
1437- if ( preg_match ( '/^([_a-zA-Z][-_a-zA-Z0-9:.]*)/ ' , $ attr , $ match ) ) {
1438- $ attrname = $ match [1 ];
1439- $ working = 1 ;
1440- $ mode = 1 ;
1441- $ attr = preg_replace ( '/^[_a-zA-Z][-_a-zA-Z0-9:.]*/ ' , '' , $ attr );
1442- }
1443-
1444- break ;
1445-
1446- case 1 :
1447- if ( preg_match ( '/^\s*=\s*/ ' , $ attr ) ) { // Equals sign.
1448- $ working = 1 ;
1449- $ mode = 2 ;
1450- $ attr = preg_replace ( '/^\s*=\s*/ ' , '' , $ attr );
1451- break ;
1452- }
1453-
1454- if ( preg_match ( '/^\s+/ ' , $ attr ) ) { // Valueless.
1455- $ working = 1 ;
1456- $ mode = 0 ;
1457-
1458- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1459- $ attrarr [ $ attrname ] = array (
1460- 'name ' => $ attrname ,
1461- 'value ' => '' ,
1462- 'whole ' => $ attrname ,
1463- 'vless ' => 'y ' ,
1464- );
1465- }
1466-
1467- $ attr = preg_replace ( '/^\s+/ ' , '' , $ attr );
1468- }
1469-
1470- break ;
1471-
1472- case 2 :
1473- if ( preg_match ( '%^"([^"]*)"(\s+|/?$)% ' , $ attr , $ match ) ) {
1474- // "value"
1475- $ thisval = $ match [1 ];
1476- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1477- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1478- }
1479-
1480- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1481- $ attrarr [ $ attrname ] = array (
1482- 'name ' => $ attrname ,
1483- 'value ' => $ thisval ,
1484- 'whole ' => "$ attrname= \"$ thisval \"" ,
1485- 'vless ' => 'n ' ,
1486- );
1487- }
1488-
1489- $ working = 1 ;
1490- $ mode = 0 ;
1491- $ attr = preg_replace ( '/^"[^"]*"(\s+|$)/ ' , '' , $ attr );
1492- break ;
1493- }
1494-
1495- if ( preg_match ( "%^'([^']*)'(\s+|/?$)% " , $ attr , $ match ) ) {
1496- // 'value'
1497- $ thisval = $ match [1 ];
1498- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1499- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1500- }
1501-
1502- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1503- $ attrarr [ $ attrname ] = array (
1504- 'name ' => $ attrname ,
1505- 'value ' => $ thisval ,
1506- 'whole ' => "$ attrname=' $ thisval' " ,
1507- 'vless ' => 'n ' ,
1508- );
1509- }
1510-
1511- $ working = 1 ;
1512- $ mode = 0 ;
1513- $ attr = preg_replace ( "/^'[^']*'(\s+|$)/ " , '' , $ attr );
1514- break ;
1515- }
1516-
1517- if ( preg_match ( "%^([^\s \"']+)(\s+|/?$)% " , $ attr , $ match ) ) {
1518- // value
1519- $ thisval = $ match [1 ];
1520- if ( in_array ( strtolower ( $ attrname ), $ uris , true ) ) {
1521- $ thisval = wp_kses_bad_protocol ( $ thisval , $ allowed_protocols );
1522- }
1523-
1524- if ( false === array_key_exists ( $ attrname , $ attrarr ) ) {
1525- $ attrarr [ $ attrname ] = array (
1526- 'name ' => $ attrname ,
1527- 'value ' => $ thisval ,
1528- 'whole ' => "$ attrname= \"$ thisval \"" ,
1529- 'vless ' => 'n ' ,
1530- );
1531- }
1532-
1533- // We add quotes to conform to W3C's HTML spec.
1534- $ working = 1 ;
1535- $ mode = 0 ;
1536- $ attr = preg_replace ( "%^[^\s \"']+(\s+|$)% " , '' , $ attr );
1537- }
1434+ foreach ( $ processor ->get_attribute_names_with_prefix ( '' ) as $ name ) {
1435+ $ value = $ processor ->get_attribute ( $ name );
1436+ $ is_bool = true === $ value ;
1437+ if ( is_string ( $ value ) && in_array ( $ name , $ uris , true ) ) {
1438+ $ value = wp_kses_bad_protocol ( $ value , $ allowed_protocols );
1439+ }
15381440
1539- break ;
1540- } // End switch.
1441+ // Reconstruct and normalize the attribute value.
1442+ $ syntax_characters = array (
1443+ '& ' => '& ' ,
1444+ '< ' => '< ' ,
1445+ '> ' => '> ' ,
1446+ "' " => '' ' ,
1447+ '" ' => '" ' ,
1448+ );
15411449
1542- if ( 0 === $ working ) { // Not well-formed, remove and try again.
1543- $ attr = wp_kses_html_error ( $ attr );
1544- $ mode = 0 ;
1545- }
1546- } // End while.
1450+ $ recoded = $ is_bool ? '' : strtr ( $ value , $ syntax_characters );
1451+ $ whole = $ is_bool ? $ name : "{$ name }= \"{$ recoded }\"" ;
15471452
1548- if ( 1 === $ mode && false === array_key_exists ( $ attrname , $ attrarr ) ) {
1549- /*
1550- * Special case, for when the attribute list ends with a valueless
1551- * attribute like "selected".
1552- */
1553- $ attrarr [ $ attrname ] = array (
1554- 'name ' => $ attrname ,
1555- 'value ' => '' ,
1556- 'whole ' => $ attrname ,
1557- 'vless ' => 'y ' ,
1453+ // @todo What security issue need review on the names?
1454+ $ attributes [ $ name ] = array (
1455+ 'name ' => $ name ,
1456+ 'value ' => $ recoded ,
1457+ 'whole ' => $ whole ,
1458+ 'vless ' => $ is_bool ? 'y ' : 'n ' ,
15581459 );
15591460 }
15601461
1561- return $ attrarr ;
1462+ return $ attributes ;
15621463}
15631464
15641465/**
0 commit comments