Skip to content

Commit 522e42b

Browse files
akxstkao05
andcommitted
Improve extract performance via ignoring directories early during os.walk
Co-authored-by: Steven Kao <st.kao.05@gmail.com>
1 parent d249286 commit 522e42b

File tree

2 files changed

+46
-6
lines changed

2 files changed

+46
-6
lines changed

babel/messages/extract.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import os
2424
import sys
2525
import tokenize
26+
import warnings
2627
from collections.abc import (
2728
Callable,
2829
Collection,
@@ -114,7 +115,33 @@ def _strip(line: str):
114115
comments[:] = [_strip(c) for c in comments]
115116

116117

117-
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool:
118+
def make_default_directory_filter(
119+
method_map: Iterable[tuple[str, str]],
120+
root_dir: str | os.PathLike[str],
121+
):
122+
def directory_filter(dirpath: str | os.PathLike[str]) -> bool:
123+
subdir = os.path.basename(dirpath)
124+
# Legacy default behavior: ignore dot and underscore directories
125+
if subdir.startswith('.') or subdir.startswith('_'):
126+
return False
127+
128+
dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/')
129+
130+
for pattern, method in method_map:
131+
if method == "ignore" and pathmatch(pattern, dir_rel):
132+
return False
133+
134+
return True
135+
136+
return directory_filter
137+
138+
139+
def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: # pragma: no cover
140+
warnings.warn(
141+
"`default_directory_filter` is deprecated and will be removed in a future version of Babel.",
142+
DeprecationWarning,
143+
stacklevel=2,
144+
)
118145
subdir = os.path.basename(dirpath)
119146
# Legacy default behavior: ignore dot and underscore directories
120147
return not (subdir.startswith('.') or subdir.startswith('_'))
@@ -201,13 +228,19 @@ def extract_from_dir(
201228
"""
202229
if dirname is None:
203230
dirname = os.getcwd()
231+
204232
if options_map is None:
205233
options_map = {}
234+
235+
dirname = os.path.abspath(dirname)
236+
206237
if directory_filter is None:
207-
directory_filter = default_directory_filter
238+
directory_filter = make_default_directory_filter(
239+
method_map=method_map,
240+
root_dir=dirname,
241+
)
208242

209-
absname = os.path.abspath(dirname)
210-
for root, dirnames, filenames in os.walk(absname):
243+
for root, dirnames, filenames in os.walk(dirname):
211244
dirnames[:] = [
212245
subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir))
213246
]
@@ -224,7 +257,7 @@ def extract_from_dir(
224257
keywords,
225258
comment_tags,
226259
strip_comment_tags,
227-
dirpath=absname,
260+
dirpath=dirname,
228261
)
229262

230263

tests/messages/frontend/test_extract.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,16 @@ def test_extraction_with_mapping_file(self):
213213

214214
@freeze_time("1994-11-11")
215215
def test_extraction_with_mapping_dict(self):
216+
self._test_extraction_with_mapping_dict(ignore_pattern='**/ignored/**.*')
217+
218+
@freeze_time("1994-11-11")
219+
def test_extraction_with_entire_directory_ignored(self):
220+
self._test_extraction_with_mapping_dict(ignore_pattern='ignored')
221+
222+
def _test_extraction_with_mapping_dict(self, *, ignore_pattern):
216223
self.dist.message_extractors = {
217224
'project': [
218-
('**/ignored/**.*', 'ignore', None),
225+
(ignore_pattern, 'ignore', None),
219226
('**.py', 'python', None),
220227
],
221228
}

0 commit comments

Comments
 (0)