diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php
index e0469bea020e5..97f6da95a0012 100644
--- a/src/wp-includes/html-api/class-wp-html-processor-state.php
+++ b/src/wp-includes/html-api/class-wp-html-processor-state.php
@@ -428,6 +428,38 @@ class WP_HTML_Processor_State {
 	 */
 	public $context_node = null;
 
+	/**
+	 * The recognized encoding of the input byte stream.
+	 *
+	 * > The stream of code points that comprises the input to the tokenization
+	 * > stage will be initially seen by the user agent as a stream of bytes
+	 * > (typically coming over the network or from the local file system).
+	 * > The bytes encode the actual characters according to a particular character
+	 * > encoding, which the user agent uses to decode the bytes into characters.
+	 *
+	 * @since 6.7.0
+	 *
+	 * @var string|null
+	 */
+	public $encoding = null;
+
+	/**
+	 * The parser's confidence in the input encoding.
+	 *
+	 * > When the HTML parser is decoding an input byte stream, it uses a character
+	 * > encoding and a confidence. The confidence is either tentative, certain, or
+	 * > irrelevant. The encoding used, and whether the confidence in that encoding
+	 * > is tentative or certain, is used during the parsing to determine whether to
+	 * > change the encoding. If no encoding is necessary, e.g. because the parser is
+	 * > operating on a Unicode stream and doesn't have to use a character encoding
+	 * > at all, then the confidence is irrelevant.
+	 *
+	 * @since 6.7.0
+	 *
+	 * @var string
+	 */
+	public $encoding_confidence = 'tentative';
+
 	/**
 	 * HEAD element pointer.
 	 *
diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index 9f2662c9e4c48..51802ac558a60 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -256,21 +256,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
 	 */
 	private $context_node = null;
 
-	/**
-	 * Whether the parser has yet processed the context node,
-	 * if created as a fragment parser.
-	 *
-	 * The context node will be initially pushed onto the stack of open elements,
-	 * but when created as a fragment parser, this context element (and the implicit
-	 * HTML document node above it) should not be exposed as a matched token or node.
-	 *
-	 * This boolean indicates whether the processor should skip over the current
-	 * node in its initial search for the first node created from the input HTML.
-	 *
-	 * @var bool
-	 */
-	private $has_seen_context_node = false;
-
 	/*
 	 * Public Interface Functions
 	 */
@@ -312,9 +297,11 @@ public static function create_fragment( $html, $context = '<body>', $encoding =
 			return null;
 		}
 
-		$processor                        = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE );
-		$processor->state->context_node   = array( 'BODY', array() );
-		$processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+		$processor                             = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE );
+		$processor->state->context_node        = array( 'BODY', array() );
+		$processor->state->insertion_mode      = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+		$processor->state->encoding            = $encoding;
+		$processor->state->encoding_confidence = 'certain';
 
 		// @todo Create "fake" bookmarks for non-existent but implied nodes.
 		$processor->bookmarks['root-node']    = new WP_HTML_Span( 0, 0 );
@@ -340,6 +327,34 @@ public static function create_fragment( $html, $context = '<body>', $encoding =
 		return $processor;
 	}
 
+	/**
+	 * Creates an HTML processor in the full parsing mode.
+	 *
+	 * It's likely that a fragment parser is more appropriate, unless sending an
+	 * entire HTML document from start to finish. Consider a fragment parser with
+	 * a context node of `<body>`.
+	 *
+	 * Since UTF-8 is the only currently-accepted charset, if working with a
+	 * document that isn't UTF-8, it's important to convert the document before
+	 * creating the processor: pass in the converted HTML.
+	 *
+	 * @param string      $html                    Input HTML document to process.
+	 * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used
+	 *                                             in the input byte stream. Currently must be UTF-8.
+	 * @return static|null The created processor if successful, otherwise null.
+	 */
+	public static function create_full_parser( $html, $known_definite_encoding = 'UTF-8' ) {
+		if ( 'UTF-8' !== $known_definite_encoding ) {
+			return null;
+		}
+
+		$processor                             = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE );
+		$processor->state->encoding            = $known_definite_encoding;
+		$processor->state->encoding_confidence = 'certain';
+
+		return $processor;
+	}
+
 	/**
 	 * Constructor.
 	 *
@@ -993,7 +1008,62 @@ public function get_current_depth(): int {
 	 * @return bool Whether an element was found.
 	 */
 	private function step_initial(): bool {
-		$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_INITIAL . ' state.' );
+		$token_name = $this->get_token_name();
+		$token_type = $this->get_token_type();
+		$op_sigil   = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
+		$op         = "{$op_sigil}{$token_name}";
+
+		switch ( $op ) {
+			/*
+			 * > A character token that is one of U+0009 CHARACTER TABULATION,
+			 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
+			 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+			 *
+			 * Parse error: ignore the token.
+			 */
+			case '#text':
+				$text = $this->get_modifiable_text();
+				if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+					return $this->step();
+				}
+				goto initial_anything_else;
+				break;
+
+			/*
+			 * > A comment token
+			 */
+			case '#comment':
+			case '#funky-comment':
+			case '#presumptuous-tag':
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			/*
+			 * > A DOCTYPE token
+			 */
+			case 'html':
+				$contents = $this->get_modifiable_text();
+				if ( ' html' !== $contents ) {
+					/*
+					 * @todo When the HTML Tag Processor fully parses the DOCTYPE declaration,
+					 *       this code should examine the contents to set the compatability mode.
+					 */
+					$this->bail( 'Cannot process any DOCTYPE other than a normative HTML5 doctype.' );
+				}
+
+				/*
+				 * > Then, switch the insertion mode to "before html".
+				 */
+				$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML;
+				return true;
+		}
+
+		/*
+		 * > Anything else
+		 */
+		initial_anything_else:
+		$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML;
+		return $this->step( self::REPROCESS_CURRENT_NODE );
 	}
 
 	/**
@@ -1002,7 +1072,7 @@ private function step_initial(): bool {
 	 * This internal function performs the 'before html' insertion mode
 	 * logic for the generalized WP_HTML_Processor::step() function.
 	 *
-	 * @since 6.7.0 Stub implementation.
+	 * @since 6.7.0
 	 *
 	 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
 	 *
@@ -1012,7 +1082,86 @@ private function step_initial(): bool {
 	 * @return bool Whether an element was found.
 	 */
 	private function step_before_html(): bool {
-		$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML . ' state.' );
+		$token_name = $this->get_token_name();
+		$token_type = $this->get_token_type();
+		$is_closer  = parent::is_tag_closer();
+		$op_sigil   = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
+		$op         = "{$op_sigil}{$token_name}";
+
+		switch ( $op ) {
+			/*
+			 * > A DOCTYPE token
+			 */
+			case 'html':
+				// Parse error: ignore the token.
+				return $this->step();
+
+			/*
+			 * > A comment token
+			 */
+			case '#comment':
+			case '#funky-comment':
+			case '#presumptuous-tag':
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			/*
+			 * > A character token that is one of U+0009 CHARACTER TABULATION,
+			 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
+			 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+			 *
+			 * Parse error: ignore the token.
+			 */
+			case '#text':
+				$text = $this->get_modifiable_text();
+				if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+					return $this->step();
+				}
+				goto before_html_anything_else;
+				break;
+
+			/*
+			 * > A start tag whose tag name is "html"
+			 */
+			case '+HTML':
+				$this->insert_html_element( $this->state->current_token );
+				$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD;
+				return true;
+
+			/*
+			 * > An end tag whose tag name is one of: "head", "body", "html", "br"
+			 *
+			 * Closing BR tags are always reported by the Tag Processor as opening tags.
+			 */
+			case '-HEAD':
+			case '-BODY':
+			case '-HTML':
+				/*
+				 * > Act as described in the "anything else" entry below.
+				 */
+				goto before_html_anything_else;
+				break;
+		}
+
+		/*
+		 * > Any other end tag
+		 */
+		if ( $is_closer ) {
+			// Parse error: ignore the token.
+			return $this->step();
+		}
+
+		/*
+		 * > Anything else.
+		 *
+		 * > Create an html element whose node document is the Document object.
+		 * > Append it to the Document object. Put this element in the stack of open elements.
+		 * > Switch the insertion mode to "before head", then reprocess the token.
+		 */
+		before_html_anything_else:
+		$this->insert_virtual_node( 'HTML' );
+		$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD;
+		return $this->step( self::REPROCESS_CURRENT_NODE );
 	}
 
 	/**
@@ -1031,7 +1180,86 @@ private function step_before_html(): bool {
 	 * @return bool Whether an element was found.
 	 */
 	private function step_before_head(): bool {
-		$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD . ' state.' );
+		$token_name = $this->get_token_name();
+		$token_type = $this->get_token_type();
+		$is_closer  = parent::is_tag_closer();
+		$op_sigil   = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
+		$op         = "{$op_sigil}{$token_name}";
+
+		switch ( $op ) {
+			/*
+			 * > A character token that is one of U+0009 CHARACTER TABULATION,
+			 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
+			 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+			 *
+			 * Parse error: ignore the token.
+			 */
+			case '#text':
+				$text = $this->get_modifiable_text();
+				if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+					return $this->step();
+				}
+				goto before_head_anything_else;
+				break;
+
+			/*
+			 * > A comment token
+			 */
+			case '#comment':
+			case '#funky-comment':
+			case '#presumptuous-tag':
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			/*
+			 * > A DOCTYPE token
+			 */
+			case 'html':
+				// Parse error: ignore the token.
+				return $this->step();
+
+			/*
+			 * > A start tag whose tag name is "html"
+			 */
+			case '+HTML':
+				return $this->step_in_body();
+
+			/*
+			 * > A start tag whose tag name is "head"
+			 */
+			case '+HEAD':
+				$this->insert_html_element( $this->state->current_token );
+				$this->state->head_element   = $this->state->current_token;
+				$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD;
+				return true;
+
+			/*
+			 * > An end tag whose tag name is one of: "head", "body", "html", "br"
+			 * > Act as described in the "anything else" entry below.
+			 *
+			 * Closing BR tags are always reported by the Tag Processor as opening tags.
+			 */
+			case '-HEAD':
+			case '-BODY':
+			case '-HTML':
+				goto before_head_anything_else;
+				break;
+		}
+
+		if ( $is_closer ) {
+			// Parse error: ignore the token.
+			return $this->step();
+		}
+
+		/*
+		 * > Anything else
+		 *
+		 * > Insert an HTML element for a "head" start tag token with no attributes.
+		 */
+		before_head_anything_else:
+		$this->state->head_element   = $this->insert_virtual_node( 'HEAD' );
+		$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD;
+		return $this->step( self::REPROCESS_CURRENT_NODE );
 	}
 
 	/**
@@ -1056,29 +1284,31 @@ private function step_in_head(): bool {
 		$op_sigil   = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
 		$op         = "{$op_sigil}{$token_name}";
 
-		/*
-		 * > A character token that is one of U+0009 CHARACTER TABULATION,
-		 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
-		 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
-		 */
-		if ( '#text' === $op ) {
-			$text = $this->get_modifiable_text();
-			if ( '' === $text ) {
+		switch ( $op ) {
+			case '#text':
 				/*
-				 * If the text is empty after processing HTML entities and stripping
-				 * U+0000 NULL bytes then ignore the token.
+				 * > A character token that is one of U+0009 CHARACTER TABULATION,
+				 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
+				 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
 				 */
-				return $this->step();
-			}
+				$text = $this->get_modifiable_text();
+				if ( '' === $text ) {
+					/*
+					 * If the text is empty after processing HTML entities and stripping
+					 * U+0000 NULL bytes then ignore the token.
+					 */
+					return $this->step();
+				}
 
-			if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
-				// Insert the character.
-				$this->insert_html_element( $this->state->current_token );
-				return true;
-			}
-		}
+				if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+					// Insert the character.
+					$this->insert_html_element( $this->state->current_token );
+					return true;
+				}
+
+				goto in_head_anything_else;
+				break;
 
-		switch ( $op ) {
 			/*
 			 * > A comment token
 			 */
@@ -1124,7 +1354,7 @@ private function step_in_head(): bool {
 				 * >     tentative, then change the encoding to the resulting encoding.
 				 */
 				$charset = $this->get_attribute( 'charset' );
-				if ( is_string( $charset ) ) {
+				if ( is_string( $charset ) && 'tentative' === $this->state->encoding_confidence ) {
 					$this->bail( 'Cannot yet process META tags with charset to determine encoding.' );
 				}
 
@@ -1141,7 +1371,8 @@ private function step_in_head(): bool {
 				if (
 					is_string( $http_equiv ) &&
 					is_string( $content ) &&
-					0 === strcasecmp( $http_equiv, 'Content-Type' )
+					0 === strcasecmp( $http_equiv, 'Content-Type' ) &&
+					'tentative' === $this->state->encoding_confidence
 				) {
 					$this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' );
 				}
@@ -1193,10 +1424,11 @@ private function step_in_head(): bool {
 
 			/*
 			 * > An end tag whose tag name is one of: "body", "html", "br"
+			 *
+			 * BR tags are always reported by the Tag Processor as opening tags.
 			 */
 			case '-BODY':
 			case '-HTML':
-			case '-BR':
 				/*
 				 * > Act as described in the "anything else" entry below.
 				 */
@@ -1273,7 +1505,92 @@ private function step_in_head(): bool {
 	 * @return bool Whether an element was found.
 	 */
 	private function step_in_head_noscript(): bool {
-		$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT . ' state.' );
+		$token_name = $this->get_token_name();
+		$token_type = $this->get_token_type();
+		$is_closer  = parent::is_tag_closer();
+		$op_sigil   = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
+		$op         = "{$op_sigil}{$token_name}";
+
+		switch ( $op ) {
+			/*
+			 * > A character token that is one of U+0009 CHARACTER TABULATION,
+			 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
+			 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+			 *
+			 * Parse error: ignore the token.
+			 */
+			case '#text':
+				$text = $this->get_modifiable_text();
+				if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+					return $this->step_in_head();
+				}
+
+				goto in_head_noscript_anything_else;
+				break;
+
+			/*
+			 * > A DOCTYPE token
+			 */
+			case 'html':
+				// Parse error: ignore the token.
+				return $this->step();
+
+			/*
+			 * > A start tag whose tag name is "html"
+			 */
+			case '+HTML':
+				return $this->step_in_body();
+
+			/*
+			 * > An end tag whose tag name is "noscript"
+			 */
+			case '-NOSCRIPT':
+				$this->state->stack_of_open_elements->pop();
+				$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD;
+				return true;
+
+			/*
+			 * > A comment token
+			 * >
+			 * > A start tag whose tag name is one of: "basefont", "bgsound",
+			 * > "link", "meta", "noframes", "style"
+			 */
+			case '#comment':
+			case '#funky-comment':
+			case '#presumptuous-tag':
+			case '+BASEFONT':
+			case '+BGSOUND':
+			case '+LINK':
+			case '+META':
+			case '+NOFRAMES':
+			case '+STYLE':
+				return $this->step_in_head();
+
+			/*
+			 * > An end tag whose tag name is "br"
+			 *
+			 * This should never happen, as the Tag Processor prevents showing a BR closing tag.
+			 */
+		}
+
+		/*
+		 * > A start tag whose tag name is one of: "head", "noscript"
+		 * > Any other end tag
+		 */
+		if ( '+HEAD' === $op || '+NOSCRIPT' === $op || $is_closer ) {
+			// Parse error: ignore the token.
+			return $this->step();
+		}
+
+		/*
+		 * > Anything else
+		 *
+		 * Anything here is a parse error.
+		 */
+		in_head_noscript_anything_else:
+		$this->state->stack_of_open_elements->pop();
+		$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD;
+		return $this->step( self::REPROCESS_CURRENT_NODE );
 	}
 
 	/**
@@ -1292,7 +1609,133 @@ private function step_in_head_noscript(): bool {
 	 * @return bool Whether an element was found.
 	 */
 	private function step_after_head(): bool {
-		$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD . ' state.' );
+		$token_name = $this->get_token_name();
+		$token_type = $this->get_token_type();
+		$is_closer  = parent::is_tag_closer();
+		$op_sigil   = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : '';
+		$op         = "{$op_sigil}{$token_name}";
+
+		switch ( $op ) {
+			/*
+			 * > A character token that is one of U+0009 CHARACTER TABULATION,
+			 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF),
+			 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+			 */
+			case '#text':
+				$text = $this->get_modifiable_text();
+				if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+					// Insert the character.
+					$this->insert_html_element( $this->state->current_token );
+					return true;
+				}
+				goto after_head_anything_else;
+				break;
+
+			/*
+			 * > A comment token
+			 */
+			case '#comment':
+			case '#funky-comment':
+			case '#presumptuous-tag':
+				$this->insert_html_element( $this->state->current_token );
+				return true;
+
+			/*
+			 * > A DOCTYPE token
+			 */
+			case 'html':
+				// Parse error: ignore the token.
+				return $this->step();
+
+			/*
+			 * > A start tag whose tag name is "html"
+			 */
+			case '+HTML':
+				return $this->step_in_body();
+
+			/*
+			 * > A start tag whose tag name is "body"
+			 */
+			case '+BODY':
+				$this->insert_html_element( $this->state->current_token );
+				$this->state->frameset_ok    = false;
+				$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+				return true;
+
+			/*
+			 * > A start tag whose tag name is "frameset"
+			 */
+			case '+FRAMESET':
+				$this->insert_html_element( $this->state->current_token );
+				$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET;
+				return true;
+
+			/*
+			 * > A start tag whose tag name is one of: "base", "basefont", "bgsound",
+			 * > "link", "meta", "noframes", "script", "style", "template", "title"
+			 *
+			 * Anything here is a parse error.
+			 */
+			case '+BASE':
+			case '+BASEFONT':
+			case '+BGSOUND':
+			case '+LINK':
+			case '+META':
+			case '+NOFRAMES':
+			case '+SCRIPT':
+			case '+STYLE':
+			case '+TEMPLATE':
+			case '+TITLE':
+				/*
+				 * > Push the node pointed to by the head element pointer onto the stack of open elements.
+				 * > Process the token using the rules for the "in head" insertion mode.
+				 * > Remove the node pointed to by the head element pointer from the stack of open elements. (It might not be the current node at this point.)
+				 */
+				$this->bail( 'Cannot process elements after HEAD which reopen the HEAD element.' );
+				/*
+				 * Do not leave this break in when adding support; it's here to prevent
+				 * WPCS from getting confused at the switch structure without a return,
+				 * because it doesn't know that `bail()` always throws.
+				 */
+				break;
+
+			/*
+			 * > An end tag whose tag name is "template"
+			 */
+			case '-TEMPLATE':
+				return $this->step_in_head();
+
+			/*
+			 * > An end tag whose tag name is one of: "body", "html", "br"
+			 *
+			 * Closing BR tags are always reported by the Tag Processor as opening tags.
+			 */
+			case '-BODY':
+			case '-HTML':
+				/*
+				 * > Act as described in the "anything else" entry below.
+				 */
+				goto after_head_anything_else;
+				break;
+		}
+
+		/*
+		 * > A start tag whose tag name is "head"
+		 * > Any other end tag
+		 */
+		if ( '+HEAD' === $op || $is_closer ) {
+			// Parse error: ignore the token.
+			return $this->step();
+		}
+
+		/*
+		 * > Anything else
+		 * > Insert an HTML element for a "body" start tag token with no attributes.
+		 */
+		after_head_anything_else:
+		$this->insert_virtual_node( 'BODY' );
+		$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+		return $this->step( self::REPROCESS_CURRENT_NODE );
 	}
 
 	/**
@@ -4469,14 +4912,17 @@ private function insert_html_element( WP_HTML_Token $token ): void {
 	 * @param string      $token_name    Name of token to create and insert into the stack of open elements.
 	 * @param string|null $bookmark_name Optional. Name to give bookmark for created virtual node.
 	 *                                   Defaults to auto-creating a bookmark name.
+	 * @return WP_HTML_Token Newly-created virtual token.
 	 */
-	private function insert_virtual_node( $token_name, $bookmark_name = null ): void {
+	private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token {
 		$here = $this->bookmarks[ $this->state->current_token->bookmark_name ];
 		$name = $bookmark_name ?? $this->bookmark_token();
 
 		$this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 );
 
-		$this->insert_html_element( new WP_HTML_Token( $name, $token_name, false ) );
+		$token = new WP_HTML_Token( $name, $token_name, false );
+		$this->insert_html_element( $token );
+		return $token;
 	}
 
 	/*
@@ -4633,6 +5079,53 @@ public static function is_void( $tag_name ): bool {
 		);
 	}
 
+	/**
+	 * Gets an encoding from a given string.
+	 *
+	 * This is an algorithm defined in the WHAT-WG specification.
+	 *
+	 * Example:
+	 *
+	 *     'UTF-8' === self::get_encoding( 'utf8' );
+	 *     'UTF-8' === self::get_encoding( "  \tUTF-8 " );
+	 *     null    === self::get_encoding( 'UTF-7' );
+	 *     null    === self::get_encoding( 'utf8; charset=' );
+	 *
+	 * @see https://encoding.spec.whatwg.org/#concept-encoding-get
+	 *
+	 * @todo As this parser only supports UTF-8, only the UTF-8
+	 *       encodings are detected. Add more as desired, but the
+	 *       parser will bail on non-UTF-8 encodings.
+	 *
+	 * @since 6.7.0
+	 *
+	 * @param string $label A string which may specify a known encoding.
+	 * @return string|null Known encoding if matched, otherwise null.
+	 */
+	protected static function get_encoding( string $label ): ?string {
+		/*
+		 * > Remove any leading and trailing ASCII whitespace from label.
+		 */
+		$label = trim( $label, " \t\f\r\n" );
+
+		/*
+		 * > If label is an ASCII case-insensitive match for any of the labels listed in the
+		 * > table below, then return the corresponding encoding; otherwise return failure.
+		 */
+		switch ( strtolower( $label ) ) {
+			case 'unicode-1-1-utf-8':
+			case 'unicode11utf8':
+			case 'unicode20utf8':
+			case 'utf-8':
+			case 'utf8':
+			case 'x-unicode20utf8':
+				return 'UTF-8';
+
+			default:
+				return null;
+		}
+	}
+
 	/*
 	 * Constants that would pollute the top of the class if they were found there.
 	 */
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
index 0dbd45cfa0ead..1486769533e96 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
@@ -25,7 +25,7 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase {
 	public function test_navigates_into_normative_html_for_supported_elements( $html, $tag_name ) {
 		$processor = WP_HTML_Processor::create_fragment( $html );
 
-		$this->assertTrue( $processor->step(), "Failed to step into supported {$tag_name} element." );
+		$this->assertTrue( $processor->next_token(), "Failed to step into supported {$tag_name} element." );
 		$this->assertSame( $tag_name, $processor->get_tag(), "Misread {$tag_name} as a {$processor->get_tag()} element." );
 	}
 
@@ -90,6 +90,7 @@ public static function data_single_tag_of_supported_elements() {
 			'IMG',
 			'INS',
 			'LI',
+			'LINK',
 			'ISINDEX', // Deprecated.
 			'KBD',
 			'KEYGEN', // Deprecated.
@@ -108,6 +109,8 @@ public static function data_single_tag_of_supported_elements() {
 			'NAV',
 			'NEXTID', // Deprecated.
 			'NOBR', // Neutralized.
+			'NOEMBED', // Neutralized.
+			'NOFRAMES', // Neutralized.
 			'NOSCRIPT',
 			'OBJECT',
 			'OL',
@@ -122,6 +125,7 @@ public static function data_single_tag_of_supported_elements() {
 			'RTC', // Neutralized.
 			'RUBY',
 			'SAMP',
+			'SCRIPT',
 			'SEARCH',
 			'SECTION',
 			'SLOT',
@@ -130,21 +134,29 @@ public static function data_single_tag_of_supported_elements() {
 			'SPAN',
 			'STRIKE',
 			'STRONG',
+			'STYLE',
 			'SUB',
 			'SUMMARY',
 			'SUP',
 			'TABLE',
+			'TEXTAREA',
 			'TIME',
+			'TITLE',
 			'TT',
 			'U',
 			'UL',
 			'VAR',
 			'VIDEO',
+			'XMP', // Deprecated, use PRE instead.
 		);
 
 		$data = array();
 		foreach ( $supported_elements as $tag_name ) {
-			$data[ $tag_name ] = array( "<{$tag_name}>", $tag_name );
+			$closer = in_array( $tag_name, array( 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true )
+				? "</{$tag_name}>"
+				: '';
+
+			$data[ $tag_name ] = array( "<{$tag_name}>{$closer}", $tag_name );
 		}
 
 		$data['IMAGE (treated as an IMG)'] = array( '<image>', 'IMG' );
@@ -182,22 +194,9 @@ public function test_fails_when_encountering_unsupported_tag( $html ) {
 	 */
 	public static function data_unsupported_elements() {
 		$unsupported_elements = array(
-			'BODY',
-			'FRAME',
-			'FRAMESET',
-			'HEAD',
-			'HTML',
-			'IFRAME',
 			'MATH',
-			'NOEMBED', // Neutralized.
-			'NOFRAMES', // Neutralized.
 			'PLAINTEXT', // Neutralized.
-			'SCRIPT',
-			'STYLE',
 			'SVG',
-			'TEXTAREA',
-			'TITLE',
-			'XMP', // Deprecated, use PRE instead.
 		);
 
 		$data = array();
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
index 69329f51321ba..cc9528c3ff083 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
@@ -25,35 +25,41 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
 	 * The HTML Processor only accepts HTML in document <body>.
 	 * Do not run tests that look for anything in document <head>.
 	 */
-	const SKIP_HEAD_TESTS = true;
+	const SKIP_HEAD_TESTS = false;
 
 	/**
 	 * Skip specific tests that may not be supported or have known issues.
 	 */
 	const SKIP_TESTS = array(
-		'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.',
-		'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.',
-		'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.',
-		'template/line0885'   => 'Unimplemented: no parsing of attributes on context node.',
-		'tests1/line0720'     => 'Unimplemented: Reconstruction of active formatting elements.',
-		'tests15/line0001'    => 'Unimplemented: Reconstruction of active formatting elements.',
-		'tests15/line0022'    => 'Unimplemented: Reconstruction of active formatting elements.',
-		'tests15/line0068'    => 'Unimplemented: no support outside of IN BODY yet.',
-		'tests2/line0650'     => 'Whitespace only test never enters "in body" parsing mode.',
-		'tests19/line0965'    => 'Unimplemented: no support outside of IN BODY yet.',
-		'tests23/line0001'    => 'Unimplemented: Reconstruction of active formatting elements.',
-		'tests23/line0041'    => 'Unimplemented: Reconstruction of active formatting elements.',
-		'tests23/line0069'    => 'Unimplemented: Reconstruction of active formatting elements.',
-		'tests23/line0101'    => 'Unimplemented: Reconstruction of active formatting elements.',
-		'tests26/line0263'    => 'Bug: An active formatting element should be created for a trailing text node.',
-		'webkit01/line0231'   => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
-		'webkit02/line0013'   => "Asserting behavior with scripting flag enabled, which this parser doesn't support.",
-		'webkit01/line0300'   => 'Unimplemented: no support outside of IN BODY yet.',
-		'webkit01/line0310'   => 'Unimplemented: no support outside of IN BODY yet.',
-		'webkit01/line0336'   => 'Unimplemented: no support outside of IN BODY yet.',
-		'webkit01/line0349'   => 'Unimplemented: no support outside of IN BODY yet.',
-		'webkit01/line0362'   => 'Unimplemented: no support outside of IN BODY yet.',
-		'webkit01/line0375'   => 'Unimplemented: no support outside of IN BODY yet.',
+		'tests1/line0537'        => 'Bug: Investigate',
+
+		'adoption01/line0046'    => 'Unimplemented: Reconstruction of active formatting elements.',
+		'adoption01/line0159'    => 'Unimplemented: Reconstruction of active formatting elements.',
+		'adoption01/line0318'    => 'Unimplemented: Reconstruction of active formatting elements.',
+		'comments01/line0155'    => 'Unimplemented: Need to access raw comment text on non-normative comments.',
+		'comments01/line0169'    => 'Unimplemented: Need to access raw comment text on non-normative comments.',
+		'html5test-com/line0129' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
+		'noscript01/line0014'    => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+		'template/line0885'      => 'Unimplemented: no parsing of attributes on context node.',
+		'tests1/line0692'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
+		'tests1/line0720'        => 'Unimplemented: Reconstruction of active formatting elements.',
+		'tests14/line0022'       => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+		'tests14/line0055'       => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+		'tests15/line0001'       => 'Unimplemented: Reconstruction of active formatting elements.',
+		'tests15/line0022'       => 'Unimplemented: Reconstruction of active formatting elements.',
+		'tests19/line1079'       => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+		'tests19/line0965'       => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
+		'tests2/line0207'        => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+		'tests2/line0686'        => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+		'tests2/line0709'        => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+		'tests23/line0001'       => 'Unimplemented: Reconstruction of active formatting elements.',
+		'tests23/line0041'       => 'Unimplemented: Reconstruction of active formatting elements.',
+		'tests23/line0069'       => 'Unimplemented: Reconstruction of active formatting elements.',
+		'tests23/line0101'       => 'Unimplemented: Reconstruction of active formatting elements.',
+		'tests5/line0013'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
+		'tests5/line0077'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
+		'tests5/line0091'        => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
+		'webkit01/line0231'      => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
 	);
 
 	/**
@@ -68,14 +74,43 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
 	 * @param string $html             Given test HTML.
 	 * @param string $expected_tree    Tree structure of parsed HTML.
 	 */
-	public function test_parse( $fragment_context, $html, $expected_tree ) {
+	public function test_parse( ?string $fragment_context, string $html, string $expected_tree ) {
 		$processed_tree = self::build_tree_representation( $fragment_context, $html );
 
 		if ( null === $processed_tree ) {
 			$this->markTestSkipped( 'Test includes unsupported markup.' );
 		}
+		$fragment_detail = $fragment_context ? " in context <{$fragment_context}>" : '';
 
-		$this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly:\n{$html}" );
+		/*
+		 * The HTML processor does not produce html, head, body tags if the processor does not reach them.
+		 * These should all be produced when reaching the end-of-file.
+		 * For now, append the missing tags when necessary.
+		 *
+		 * @todo remove this section when when the processor handles this.
+		 */
+		$auto_generated_html_head_body = "<html>\n  <head>\n  <body>\n\n";
+		$auto_generated_head_body      = "  <head>\n  <body>\n\n";
+		$auto_generated_body           = "  <body>\n\n";
+		if ( str_ends_with( $expected_tree, $auto_generated_html_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_html_head_body ) ) {
+			if ( str_ends_with( $processed_tree, "<html>\n  <head>\n\n" ) ) {
+				$processed_tree = substr_replace( $processed_tree, "  <body>\n\n", -1 );
+			} elseif ( str_ends_with( $processed_tree, "<html>\n\n" ) ) {
+				$processed_tree = substr_replace( $processed_tree, "  <head>\n  <body>\n\n", -1 );
+			} else {
+				$processed_tree = substr_replace( $processed_tree, $auto_generated_html_head_body, -1 );
+			}
+		} elseif ( str_ends_with( $expected_tree, $auto_generated_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_head_body ) ) {
+			if ( str_ends_with( $processed_tree, "<head>\n\n" ) ) {
+				$processed_tree = substr_replace( $processed_tree, "  <body>\n\n", -1 );
+			} else {
+				$processed_tree = substr_replace( $processed_tree, $auto_generated_head_body, -1 );
+			}
+		} elseif ( str_ends_with( $expected_tree, $auto_generated_body ) && ! str_ends_with( $processed_tree, $auto_generated_body ) ) {
+			$processed_tree = substr_replace( $processed_tree, $auto_generated_body, -1 );
+		}
+
+		$this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly{$fragment_detail}:\n{$html}" );
 	}
 
 	/**
@@ -100,7 +135,9 @@ public function data_external_html5lib_tests() {
 				$line       = str_pad( strval( $test[0] ), 4, '0', STR_PAD_LEFT );
 				$test_name  = "{$test_suite}/line{$line}";
 
-				if ( self::should_skip_test( $test_name, $test[3] ) ) {
+				$test_context_element = $test[1];
+
+				if ( self::should_skip_test( $test_context_element, $test_name, $test[3] ) ) {
 					continue;
 				}
 
@@ -118,7 +155,11 @@ public function data_external_html5lib_tests() {
 	 *
 	 * @return bool True if the test case should be skipped. False otherwise.
 	 */
-	private static function should_skip_test( $test_name, $expected_tree ): bool {
+	private static function should_skip_test( ?string $test_context_element, string $test_name, string $expected_tree ): bool {
+		if ( null !== $test_context_element && 'body' !== $test_context_element ) {
+			return true;
+		}
+
 		if ( self::SKIP_HEAD_TESTS ) {
 			$html_start = "<html>\n  <head>\n  <body>\n";
 			if (
@@ -146,15 +187,18 @@ private static function should_skip_test( $test_name, $expected_tree ): bool {
 	private static function build_tree_representation( ?string $fragment_context, string $html ) {
 		$processor = $fragment_context
 			? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
-			: WP_HTML_Processor::create_fragment( $html );
+			: WP_HTML_Processor::create_full_parser( $html );
 		if ( null === $processor ) {
 			return null;
 		}
 
-		$output = "<html>\n  <head>\n  <body>\n";
-
-		// Initially, assume we're 2 levels deep at: html > body > [position]
-		$indent_level = 2;
+		/*
+		 * The fragment parser will start in 2 levels deep at: html > body > [position]
+		 * and requires adjustment to initial parameters.
+		 * The full parser will not.
+		 */
+		$output       = $fragment_context ? "<html>\n  <head>\n  <body>\n" : '';
+		$indent_level = $fragment_context ? 2 : 0;
 		$indent       = '  ';
 		$was_text     = null;
 		$text_node    = '';
@@ -238,6 +282,11 @@ private static function build_tree_representation( ?string $fragment_context, st
 					$text_node .= $processor->get_modifiable_text();
 					break;
 
+				case '#funky-comment':
+					// Comments must be "<" then "!-- " then the data then " -->".
+					$output .= str_repeat( $indent, $indent_level ) . "<!-- {$processor->get_modifiable_text()} -->\n";
+					break;
+
 				case '#comment':
 					switch ( $processor->get_comment_type() ) {
 						case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT:
@@ -250,6 +299,10 @@ private static function build_tree_representation( ?string $fragment_context, st
 							$comment_text_content = "[CDATA[{$processor->get_modifiable_text()}]]";
 							break;
 
+						case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
+							$comment_text_content = "?{$processor->get_tag()}{$processor->get_modifiable_text()}?";
+							break;
+
 						default:
 							throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" );
 					}
@@ -301,6 +354,7 @@ public static function parse_html5_dat_testfile( $filename ) {
 		$test_html            = '';
 		$test_dom             = '';
 		$test_context_element = null;
+		$test_script_flag     = false;
 		$test_line_number     = 0;
 
 		while ( false !== ( $line = fgets( $handle ) ) ) {
@@ -309,8 +363,12 @@ public static function parse_html5_dat_testfile( $filename ) {
 			if ( '#' === $line[0] ) {
 				// Finish section.
 				if ( "#data\n" === $line ) {
-					// Yield when switching from a previous state.
-					if ( $state ) {
+					/*
+					 * Yield when switching from a previous state.
+					 * Do not yield tests with the scripting flag enabled. The scripting flag
+					 * is always disabled in the HTML API.
+					 */
+					if ( $state && ! $test_script_flag ) {
 						yield array(
 							$test_line_number,
 							$test_context_element,
@@ -325,6 +383,10 @@ public static function parse_html5_dat_testfile( $filename ) {
 					$test_html            = '';
 					$test_dom             = '';
 					$test_context_element = null;
+					$test_script_flag     = false;
+				}
+				if ( "#script-on\n" === $line ) {
+					$test_script_flag = true;
 				}
 
 				$state = trim( substr( $line, 1 ) );
@@ -376,7 +438,15 @@ public static function parse_html5_dat_testfile( $filename ) {
 				 */
 				case 'document':
 					if ( '|' === $line[0] ) {
-						$test_dom .= substr( $line, 2 );
+						/*
+						 * The next_token() method these tests rely on do not stop
+						 * at doctype nodes. Strip doctypes from output.
+						 * @todo Restore this line if and when the processor
+						 * exposes doctypes.
+						 */
+						if ( '| <!DOCTYPE ' !== substr( $line, 0, 12 ) ) {
+							$test_dom .= substr( $line, 2 );
+						}
 					} else {
 						// This is a text node that includes unescaped newlines.
 						// Everything else should be singles lines starting with "| ".