Skip to content

Commit fe81fae

Browse files
committed
HTML-API: Introduce minimal HTML Processor.
1 parent 6281ce4 commit fe81fae

11 files changed

+3007
-4
lines changed
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
<?php
2+
/**
3+
* HTML API: WP_HTML_Active_Formatting_Elements class
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
* @since 6.4.0
8+
*/
9+
10+
/**
11+
* Core class used by the HTML processor during HTML parsing
12+
* for managing the stack of active formatting elements.
13+
*
14+
* This class is designed for internal use by the HTML processor.
15+
*
16+
* > Initially, the list of active formatting elements is empty.
17+
* > It is used to handle mis-nested formatting element tags.
18+
* >
19+
* > The list contains elements in the formatting category, and markers.
20+
* > The markers are inserted when entering applet, object, marquee,
21+
* > template, td, th, and caption elements, and are used to prevent
22+
* > formatting from "leaking" into applet, object, marquee, template,
23+
* > td, th, and caption elements.
24+
* >
25+
* > In addition, each element in the list of active formatting elements
26+
* > is associated with the token for which it was created, so that
27+
* > further elements can be created for that token if necessary.
28+
*
29+
* @since 6.4.0
30+
*
31+
* @access private
32+
*
33+
* @see https://html.spec.whatwg.org/#list-of-active-formatting-elements
34+
* @see WP_HTML_Processor
35+
*/
36+
class WP_HTML_Active_Formatting_Elements {
37+
/**
38+
* Holds the stack of active formatting element references.
39+
*
40+
* @since 6.4.0
41+
*
42+
* @var WP_HTML_Token[]
43+
*/
44+
private $stack = array();
45+
46+
/**
47+
* Reports if a specific node is in the stack of active formatting elements.
48+
*
49+
* @since 6.4.0
50+
*
51+
* @param WP_HTML_Token $token Look for this node in the stack.
52+
* @return bool Whether the referenced node is in the stack of active formatting elements.
53+
*
54+
*/
55+
public function contains_node( $token ) {
56+
foreach ( $this->walk_up() as $item ) {
57+
if ( $token->bookmark_name === $item->bookmark_name ) {
58+
return true;
59+
}
60+
}
61+
62+
return false;
63+
}
64+
65+
/**
66+
* Returns how many nodes are currently in the stack of active formatting elements.
67+
*
68+
* @since 6.4.0
69+
*
70+
* @return int How many node are in the stack of active formatting elements.
71+
*/
72+
public function count() {
73+
return count( $this->stack );
74+
}
75+
76+
/**
77+
* Returns the node at the end of the stack of active formatting elements,
78+
* if one exists. If the stack is empty, returns null.
79+
*
80+
* @since 6.4.0
81+
*
82+
* @return WP_HTML_Token|null Last node in the stack of active formatting elements, if one exists, otherwise null.
83+
*/
84+
public function current_node() {
85+
$current_node = end( $this->stack );
86+
87+
return $current_node ? $current_node : null;
88+
}
89+
90+
/**
91+
* Pushes a node onto the stack of active formatting elements.
92+
*
93+
* @since 6.4.0
94+
*
95+
* @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements
96+
*
97+
* @param WP_HTML_Token $token Push this node onto the stack.
98+
*/
99+
public function push( $token ) {
100+
/*
101+
* > If there are already three elements in the list of active formatting elements after the last marker,
102+
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
103+
* > attributes as element, then remove the earliest such element from the list of active formatting
104+
* > elements. For these purposes, the attributes must be compared as they were when the elements were
105+
* > created by the parser; two elements have the same attributes if all their parsed attributes can be
106+
* > paired such that the two attributes in each pair have identical names, namespaces, and values
107+
* > (the order of the attributes does not matter).
108+
*
109+
* @TODO: Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack.
110+
*/
111+
// > Add element to the list of active formatting elements.
112+
$this->stack[] = $token;
113+
}
114+
115+
/**
116+
* Removes a node from the stack of active formatting elements.
117+
*
118+
* @since 6.4.0
119+
*
120+
* @param WP_HTML_Token $token Remove this node from the stack, if it's there already.
121+
* @return bool Whether the node was found and removed from the stack of active formatting elements.
122+
*/
123+
public function remove_node( $token ) {
124+
foreach ( $this->walk_up() as $position_from_end => $item ) {
125+
if ( $token->bookmark_name !== $item->bookmark_name ) {
126+
continue;
127+
}
128+
129+
$position_from_start = $this->count() - $position_from_end - 1;
130+
array_splice( $this->stack, $position_from_start, 1 );
131+
return true;
132+
}
133+
134+
return false;
135+
}
136+
137+
/**
138+
* Steps through the stack of active formatting elements, starting with the
139+
* top element (added first) and walking downwards to the one added last.
140+
*
141+
* This generator function is designed to be used inside a "foreach" loop.
142+
*
143+
* Example:
144+
*
145+
* $html = '<em><strong><a>We are here';
146+
* foreach ( $stack->walk_down() as $node ) {
147+
* echo "{$node->node_name} -> ";
148+
* }
149+
* > EM -> STRONG -> A ->
150+
*
151+
* To start with the most-recently added element and walk towards the top,
152+
* @see WP_HTML_Active_Formatting_Elements::walk_up
153+
*
154+
* @since 6.4.0
155+
*/
156+
public function walk_down() {
157+
$count = count( $this->stack );
158+
159+
for ( $i = 0; $i < $count; $i++ ) {
160+
yield $this->stack[ $i ];
161+
}
162+
}
163+
164+
/**
165+
* Steps through the stack of active formatting elements, starting with the
166+
* bottom element (added last) and walking upwards to the one added first.
167+
*
168+
* This generator function is designed to be used inside a "foreach" loop.
169+
*
170+
* Example:
171+
*
172+
* $html = '<em><strong><a>We are here';
173+
* foreach ( $stack->walk_up() as $node ) {
174+
* echo "{$node->node_name} -> ";
175+
* }
176+
* > A -> STRONG -> EM ->
177+
*
178+
* To start with the first added element and walk towards the bottom,
179+
* @see WP_HTML_Active_Formatting_Elements::walk_down
180+
*
181+
* @since 6.4.0
182+
*/
183+
public function walk_up() {
184+
for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
185+
yield $this->stack[ $i ];
186+
}
187+
}
188+
}

0 commit comments

Comments
 (0)