Skip to content

Commit 70d501a

Browse files
committed
comments, minor edits
1 parent f650143 commit 70d501a

File tree

1 file changed

+28
-8
lines changed

1 file changed

+28
-8
lines changed

Lib/difflib.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2318,7 +2318,7 @@ class _LCSUBAutomaton:
23182318
eposs: list[int], # index of last match position
23192319
]
23202320
2321-
Next logic:
2321+
Next logic (Memory optimization as > 50% of state have only 1 transition):
23222322
next2 == -1 -> empty
23232323
next2 == -3 -> next1: dict
23242324
next2 >= 0 -> next2 - index, next1 - key
@@ -2374,7 +2374,7 @@ def build(self, start2=0, stop2=None):
23742374

23752375
def findall(self, seq1, start1=0, stop1=None, start2=0, stop2=None, *,
23762376
mink=1, maxk=None, maximal=False):
2377-
"""
2377+
"""Find all common substrings from single O(n) scan
23782378
Args:
23792379
mink : int
23802380
filter out shorter length matches
@@ -2528,12 +2528,13 @@ def _make_nodes(self, n):
25282528
return lengths, links, next1s, next2s, eposs
25292529

25302530
def _build(self, start2, stop2):
2531+
"""Automaton builder"""
25312532
seq2 = self.seq2
25322533
junk = self.junk
25332534
# Make Nodes
25342535
size = (stop2 - start2)
2535-
n_nodes = 4 * size // 3 + 1
2536-
inc = size // 10 + 1
2536+
n_nodes = 4 * size // 3 + 1 # Maximum 25% overallocation
2537+
inc = size // 10 + 1 # Then, 10% increments
25372538
nodes = self._make_nodes(n_nodes)
25382539
lengths, links, next1s, next2s, eposs = nodes
25392540
nstates = 1
@@ -2645,12 +2646,18 @@ def _build(self, start2, stop2):
26452646
return nodes
26462647

26472648
def _finditer(self, seq1, start1, stop1, best=False):
2648-
"""
2649+
"""Core scanning routine
26492650
Args:
26502651
best : bool
26512652
False - return all matches, including non-maximal
26522653
True - return all matches of maximum length
26532654
all these will naturally be maximal
2655+
Returns:
2656+
generator of tuples (e1, e2, k), where
2657+
e1, e2 are ending positions in seq1 and seq2 respectively
2658+
k is length of a match
2659+
Thus, starting position is: e1 + 1 - k
2660+
And stop for a slice is: e1 + 1
26542661
"""
26552662
if best not in (0, 1):
26562663
raise ValueError(f'{best=} not in (0, 1)')
@@ -2731,7 +2738,9 @@ def __repr__(self):
27312738

27322739

27332740
def _calc_skew(i, j, k, alo, ahi, blo, bhi):
2734-
# NOTE: -1 <= skew <= 1
2741+
"""Difference in normalized positions
2742+
Returns skew : float, where -1 < skew < 1
2743+
"""
27352744
k_div_2 = k // 2
27362745
apos = (i + k_div_2 - alo) / (ahi - alo)
27372746
bpos = (j + k_div_2 - blo) / (bhi - blo)
@@ -2836,6 +2845,9 @@ def _modifier(self, depth, block, alo, ahi, blo, bhi):
28362845
RESULTBLOCKS - List of blocks that terminate recursion
28372846
e.g. (RESULTBLOCKS, [(0, 0, 10), (10, 10, 10)])
28382847
2848+
If data contains no blocks or only blocks of 0 length,
2849+
the algorithm does not recurse further.
2850+
28392851
Note, one can get `a`, `b`, `automaton`, etc from self
28402852
"""
28412853
pass
@@ -2908,10 +2920,11 @@ def _get_matching_blocks(self):
29082920
i0 = i + k
29092921
j0 = j + k
29102922

2923+
if not validated:
2924+
continue
2925+
29112926
# 2.1.3. Apply action
29122927
if mtype is REPLACEBLOCK:
2913-
if not validated:
2914-
continue
29152928
i, j, k = init_block = validated[0]
29162929

29172930
elif mtype is ANCHORBLOCKS:
@@ -2968,6 +2981,10 @@ def _get_matching_blocks(self):
29682981
for triple in triples:
29692982
triple[0] = job_results[triple[0]]
29702983
triple[2] = job_results[triple[2]]
2984+
# k**1.3 is empirically tuned
2985+
# prefers one long match to many small ones
2986+
# but not too aggressively, so to be able to jump
2987+
# out of skewed positions
29712988
total = sum(t[2]**1.3 for t in triple)
29722989
skew = _calc_skew(*triple[1], *bounds)
29732990
triple.append((total, -abs(skew)))
@@ -2988,6 +3005,9 @@ def _get_matching_blocks(self):
29883005
q_tail.append((_RANGE, (i0, ii, j0, jj)))
29893006
q_tail.append((_BLOCK, block))
29903007
i0, j0 = ii + kk, jj + kk
3008+
if not q_tail:
3009+
# No blocks identified. Do not recurse further.
3010+
continue
29913011
q_tail.append((_RANGE, (i0, ahi, j0, bhi)))
29923012

29933013
# 3.2. Yield what is possible straight away

0 commit comments

Comments
 (0)