Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
d8ac361
Add spawn_fragment_parser method
sirreal Sep 12, 2024
ad8f8db
Fix the processor context_node
sirreal Sep 12, 2024
e2efee4
Make it public
sirreal Sep 13, 2024
4f5249c
Fix spawn_fragment_parser method
sirreal Sep 13, 2024
eaed863
Process non-body context tests
sirreal Sep 13, 2024
25b18fa
Handle all the different document context in html5lib tests
sirreal Sep 13, 2024
9ac142f
lints
sirreal Sep 13, 2024
9ede14f
Merge branch 'trunk' into html-api/add-spawn-fragment-parser-method
sirreal Nov 6, 2024
3f35886
Make spawned fragment parse have HTML > [context-node-tag] in breadcr…
sirreal Nov 6, 2024
ba9e218
Fallback to context node when checking namespace
sirreal Nov 6, 2024
fe48fa5
Add tests
sirreal Nov 6, 2024
fa4c5cb
Set the form element pointer on the fragment parser
sirreal Nov 6, 2024
fbb5c2f
Merge branch 'trunk' into html-api/add-spawn-fragment-parser-method
sirreal Nov 6, 2024
336050d
Merge branch 'trunk' into html-api/add-spawn-fragment-parser-method
sirreal Nov 6, 2024
943bbdd
Revert "Fallback to context node when checking namespace"
sirreal Nov 6, 2024
e3a0a86
Fix initial namespace on integration nodes
sirreal Nov 6, 2024
9d3b318
Merge branch 'trunk' into html-api/add-spawn-fragment-parser-method
sirreal Nov 7, 2024
27a9781
Rename method, use static constructor, add comments
sirreal Nov 7, 2024
0789538
Update method name in tests
sirreal Nov 8, 2024
5e8b82e
Add ticket to tests
sirreal Nov 8, 2024
7eeec27
Merge branch 'trunk' into html-api/add-spawn-fragment-parser-method
sirreal Nov 11, 2024
37f9ff4
Update method name in html5lib tests
sirreal Nov 12, 2024
80ae6f2
Handle null return from create_fragment
sirreal Nov 12, 2024
9866402
Use a cloned copy of the FORM element from the parent processor
sirreal Nov 12, 2024
00ed28c
Use create_fragement_at_node internally in create_fragment
sirreal Nov 12, 2024
c247869
Use create_fragment_at_node internally in create_fragment
sirreal Nov 12, 2024
bcebeba
Remove stale comment
sirreal Nov 12, 2024
9904524
Merge branch 'html-api/add-spawn-fragment-parser-method' into html-ap…
sirreal Nov 12, 2024
9e11f19
Improve method documentation with examples
sirreal Nov 12, 2024
05801bb
Merge branch 'trunk' into html-api/add-spawn-fragment-parser-method
sirreal Nov 12, 2024
4618b90
Merge branch 'trunk' into html-api/add-spawn-fragment-parser-method
sirreal Nov 13, 2024
fc6ec54
Merge branch 'html-api/add-spawn-fragment-parser-method' into html-ap…
sirreal Nov 13, 2024
662a9b5
Use starts_with assertion for nul byte test
sirreal Nov 20, 2024
50a00a8
Add since tag, update comment
sirreal Nov 21, 2024
c5487b7
Add test
sirreal Nov 21, 2024
32f50b4
Prevent fragment creation on a tag closer
sirreal Nov 21, 2024
5216f21
Include non-empty fragment HTML in test
sirreal Nov 21, 2024
fd43a92
Merge branch 'trunk' into html-api/use-create-fragment-at-node-for-ma…
sirreal Nov 21, 2024
c45ada6
Merge branch 'html-api/disallow-fragment-creation-at-closer' into htm…
sirreal Nov 21, 2024
f9b5bea
Remove redundant early initialization of processor var
sirreal Nov 21, 2024
48e4738
Pass encoding from context into full processor
sirreal Nov 21, 2024
479c0b3
Update comments, remove "only <body> context" mentions
sirreal Nov 21, 2024
ed3bb54
Improve documentation for create_fragment
sirreal Nov 21, 2024
d2e4814
Add tests for unsupported contexts
sirreal Nov 21, 2024
3412256
Add _doing_it_wrong messages
sirreal Nov 21, 2024
f391255
Move fragment tests into the fragment test suite
sirreal Nov 21, 2024
75da0cc
Restore better comment from trunk
sirreal Nov 21, 2024
72e2c19
Merge branch 'trunk' into html-api/use-create-fragment-at-node-for-ma…
sirreal Nov 25, 2024
719dfe0
Merge branch 'trunk' into html-api/use-create-fragment-at-node-for-ma…
sirreal Nov 27, 2024
1461534
Check seeking to final_node success
sirreal Nov 27, 2024
f1ba41c
Merge branch 'trunk' into html-api/use-create-fragment-at-node-for-ma…
sirreal Nov 27, 2024
d6d5305
Update ticket numbers
sirreal Nov 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 87 additions & 39 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -279,51 +279,62 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
* form is provided because a context element may have attributes that
* impact the parse, such as with a SCRIPT tag and its `type` attribute.
*
* ## Current HTML Support
* Example:
*
* // Usually, snippets of HTML ought to be processed in the default `<body>` context:
* $processor = WP_HTML_Processor::create_fragment( '<p>Hi</p>' );
*
* // Some fragments should be processed in the correct context like this SVG:
* $processor = WP_HTML_Processor::create_fragment( '<rect width="10" height="10" />', '<svg>' );
*
* // This fragment with TD tags should be processed in a TR context:
* $processor = WP_HTML_Processor::create_fragment(
* '<td>1<td>2<td>3',
* '<table><tbody><tr>'
* );
*
* - The only supported context is `<body>`, which is the default value.
* - The only supported document encoding is `UTF-8`, which is the default value.
* In order to create a fragment processor at the correct location, the
* provided fragment will be processed as part of a full HTML document.
* The processor will search for the last opener tag in the document and
* create a fragment processor at that location. The document will be
* forced into "no-quirks" mode by including the HTML5 doctype.
*
* For advanced usage and precise control over the context element, use
* `WP_HTML_Processor::create_full_processor()` and
* `WP_HTML_Processor::create_fragment_at_current_node()`.
*
* UTF-8 is the only allowed encoding. If working with a document that
* isn't UTF-8, first convert the document to UTF-8, then pass in the
* converted HTML.
*
* @since 6.4.0
* @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances.
* @since 6.8.0 Can create fragments with any context element.
*
* @param string $html Input HTML fragment to process.
* @param string $context Context element for the fragment, must be default of `<body>`.
* @param string $context Context element for the fragment. Defaults to `<body>`.
* @param string $encoding Text encoding of the document; must be default of 'UTF-8'.
* @return static|null The created processor if successful, otherwise null.
*/
public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) {
if ( '<body>' !== $context || 'UTF-8' !== $encoding ) {
$context_processor = static::create_full_parser( "<!DOCTYPE html>{$context}", $encoding );
if ( null === $context_processor ) {
return null;
}

$processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE );
$processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
$processor->state->encoding = $encoding;
$processor->state->encoding_confidence = 'certain';

// @todo Create "fake" bookmarks for non-existent but implied nodes.
$processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 );
$processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );

$root_node = new WP_HTML_Token(
'root-node',
'HTML',
false
);

$processor->state->stack_of_open_elements->push( $root_node );

$context_node = new WP_HTML_Token(
'context-node',
'BODY',
false
);
while ( $context_processor->next_tag() ) {
$context_processor->set_bookmark( 'final_node' );
}

$processor->context_node = $context_node;
$processor->breadcrumbs = array( 'HTML', $context_node->node_name );
if (
! $context_processor->has_bookmark( 'final_node' ) ||
! $context_processor->seek( 'final_node' )
) {
_doing_it_wrong( __METHOD__, __( 'No valid context element was detected.' ), '6.8.0' );
return null;
}

return $processor;
return $context_processor->create_fragment_at_current_node( $html );
}

/**
Expand All @@ -333,9 +344,9 @@ public static function create_fragment( $html, $context = '<body>', $encoding =
* entire HTML document from start to finish. Consider a fragment parser with
* a context node of `<body>`.
*
* Since UTF-8 is the only currently-accepted charset, if working with a
* document that isn't UTF-8, it's important to convert the document before
* creating the processor: pass in the converted HTML.
* UTF-8 is the only allowed encoding. If working with a document that
* isn't UTF-8, first convert the document to UTF-8, then pass in the
* converted HTML.
*
* @param string $html Input HTML document to process.
* @param string|null $known_definite_encoding Optional. If provided, specifies the charset used
Expand Down Expand Up @@ -459,35 +470,72 @@ function ( WP_HTML_Token $token ): void {
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm
*
* @since 6.8.0
*
* @param string $html Input HTML fragment to process.
* @return static|null The created processor if successful, otherwise null.
*/
public function create_fragment_at_current_node( string $html ) {
if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) {
_doing_it_wrong(
__METHOD__,
__( 'The context element must be a start tag.' ),
'6.8.0'
);
return null;
}

$tag_name = $this->current_element->token->node_name;
$namespace = $this->current_element->token->namespace;

if ( 'html' === $namespace && self::is_void( $tag_name ) ) {
_doing_it_wrong(
__METHOD__,
sprintf(
// translators: %s: A tag name like INPUT or BR.
__( 'The context element cannot be a void element, found "%s".' ),
$tag_name
),
'6.8.0'
);
return null;
}

/*
* Prevent creating fragments at nodes that require a special tokenizer state.
* This is unsupported by the HTML Processor.
*/
if (
'html' === $namespace &&
in_array( $this->current_element->token->node_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true )
in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true )
) {
_doing_it_wrong(
__METHOD__,
sprintf(
// translators: %s: A tag name like IFRAME or TEXTAREA.
__( 'The context element "%s" is not supported.' ),
$tag_name
),
'6.8.0'
);
return null;
}

$fragment_processor = static::create_fragment( $html );
if ( null === $fragment_processor ) {
return null;
}
$fragment_processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE );

$fragment_processor->compat_mode = $this->compat_mode;

$fragment_processor->context_node = clone $this->state->current_token;
// @todo Create "fake" bookmarks for non-existent but implied nodes.
$fragment_processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 );
$root_node = new WP_HTML_Token(
'root-node',
'HTML',
false
);
$fragment_processor->state->stack_of_open_elements->push( $root_node );

$fragment_processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 );
$fragment_processor->context_node = clone $this->current_element->token;
$fragment_processor->context_node->bookmark_name = 'context-node';
$fragment_processor->context_node->on_destroy = null;

Expand Down
77 changes: 0 additions & 77 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -1043,83 +1043,6 @@ public function test_ensure_next_token_method_extensibility( $html, $expected_to
$this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) );
}

/**
* @ticket 62357
*/
public function test_create_fragment_at_current_node_in_foreign_content() {
$processor = WP_HTML_Processor::create_full_parser( '<svg>' );
$this->assertTrue( $processor->next_tag( 'SVG' ) );

$fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte<rect /><circle></circle><foreignobject><div></div></foreignobject><g>" );

$this->assertSame( 'svg', $fragment->get_namespace() );
$this->assertTrue( $fragment->next_token() );

/*
* In HTML parsing, a nul byte would be ignored.
* In SVG it should be replaced with a replacement character.
*/
$this->assertSame( '#text', $fragment->get_token_type() );
$this->assertSame( "\u{FFFD}", $fragment->get_modifiable_text() );

$this->assertTrue( $fragment->next_tag( 'RECT' ) );
$this->assertSame( 'svg', $fragment->get_namespace() );

$this->assertTrue( $fragment->next_tag( 'CIRCLE' ) );
$this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() );
$this->assertTrue( $fragment->next_tag( 'foreignObject' ) );
$this->assertSame( 'svg', $fragment->get_namespace() );
}

/**
* @ticket 62357
*/
public function test_create_fragment_at_current_node_in_foreign_content_integration_point() {
$processor = WP_HTML_Processor::create_full_parser( '<svg><foreignObject>' );
$this->assertTrue( $processor->next_tag( 'foreignObject' ) );

$fragment = $processor->create_fragment_at_current_node( "<image>\0not-preceded-by-nul-byte<rect />" );

// Nothing has been processed, the html namespace should be used for parsing as an integration point.
$this->assertSame( 'html', $fragment->get_namespace() );

// HTML parsing transforms IMAGE into IMG.
$this->assertTrue( $fragment->next_tag( 'IMG' ) );

$this->assertTrue( $fragment->next_token() );

// In HTML parsing, the nul byte is ignored and the text is reached.
$this->assertSame( '#text', $fragment->get_token_type() );
$this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() );

/*
* svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace.
* RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close.
*/
$this->assertTrue( $fragment->next_tag( 'RECT' ) );
$this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() );
$this->assertSame( 'html', $fragment->get_namespace() );
$this->assertTrue( $fragment->has_self_closing_flag() );
$this->assertTrue( $fragment->expects_closer() );
}

/**
* @ticket 62357
*/
public function test_prevent_fragment_creation_on_closers() {
$processor = WP_HTML_Processor::create_full_parser( '<p></p>' );
$processor->next_tag( 'P' );
$processor->next_tag(
array(
'tag_name' => 'P',
'tag_closers' => 'visit',
)
);
$this->assertSame( 'P', $processor->get_tag() );
$this->assertTrue( $processor->is_tag_closer() );
$this->assertNull( $processor->create_fragment_at_current_node( '<i>fragment HTML</i>' ) );
}

/**
* Ensure that lowercased tag_name query matches tags case-insensitively.
*
Expand Down
Loading
Loading