Skip to content

Commit b186393

Browse files
committed
Revert "Extractor: move overlay-changes check from traverser to worker"
This reverts commit 2ed8dc7.
1 parent feb4c3a commit b186393

File tree

2 files changed

+31
-36
lines changed

2 files changed

+31
-36
lines changed

python/extractor/semmle/traverser.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,19 @@ def __init__(self, options, modulenames, logger):
3131
if not os.path.exists(p) and not options.ignore_missing_modules:
3232
raise FileNotFoundError("'%s' does not exist." % p)
3333
self.paths.add(p)
34+
# During overlay extraction, only traverse the files that were changed.
35+
self.overlay_changes = None
36+
if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES' in os.environ:
37+
overlay_changes_file = os.environ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES']
38+
logger.info("Overlay extraction mode: only extracting files changed according to '%s'", overlay_changes_file)
39+
try:
40+
with open(overlay_changes_file, 'r', encoding='utf-8') as f:
41+
data = json.load(f)
42+
changed_paths = data.get('changes', [])
43+
self.overlay_changes = { os.path.abspath(p) for p in changed_paths }
44+
except (IOError, ValueError) as e:
45+
logger.warn("Failed to read overlay changes from '%s' (falling back to full extraction): %s", overlay_changes_file, e)
46+
self.overlay_changes = None
3447
self.exclude_paths = set([ os.path.abspath(f) for f in options.exclude_file ])
3548
self.exclude = exclude_filter_from_options(options)
3649
self.filter = filter_from_options_and_environment(options)
@@ -49,11 +62,20 @@ def __iter__(self):
4962
if mod is None:
5063
self.logger.error("No module named '%s'.", name)
5164
raise ExtractorFailure()
65+
if self.overlay_changes is not None and mod.path not in self.overlay_changes:
66+
self.logger.debug("Skipping module '%s' as it was not changed in overlay extraction.", name)
67+
continue
5268
yield mod.get_extractable()
5369
for path in self.paths:
70+
if self.overlay_changes is not None and path not in self.overlay_changes:
71+
self.logger.debug("Skipping path '%s' as it was not changed in overlay extraction.", path)
72+
continue
5473
yield Extractable.from_path(path)
5574
for path in self.recurse_files:
5675
for modpath in self._treewalk(path):
76+
if self.overlay_changes is not None and modpath not in self.overlay_changes:
77+
self.logger.debug("Skipping file '%s' as it was not changed in overlay extraction.", modpath)
78+
continue
5779
yield Extractable.from_path(modpath)
5880
for name in self.recurse_packages:
5981
mod = self.finder.find(name)
@@ -67,6 +89,9 @@ def __iter__(self):
6789
self.logger.error("Package '%s' does not have a path.", name)
6890
raise ExtractorFailure()
6991
for modpath in self._treewalk(path):
92+
if self.overlay_changes is not None and modpath not in self.overlay_changes:
93+
self.logger.debug("Skipping package '%s' as it was not changed in overlay extraction.", modpath)
94+
continue
7095
yield Extractable.from_path(modpath)
7196

7297
def _treewalk(self, path):

python/extractor/semmle/worker.py

Lines changed: 6 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from semmle.profiling import get_profiler
1212
from semmle.path_rename import renamer_from_options_and_env
1313
from semmle.logging import WARN, recursion_error_message, internal_error_message, Logger
14-
from semmle.util import FileExtractable, FolderExtractable
1514

1615
class ExtractorFailure(Exception):
1716
'Generic exception representing the failure of an extractor.'
@@ -20,32 +19,17 @@ class ExtractorFailure(Exception):
2019

2120
class ModuleImportGraph(object):
2221

23-
def __init__(self, max_depth, logger: Logger):
22+
def __init__(self, max_depth):
2423
self.modules = {}
2524
self.succ = defaultdict(set)
2625
self.todo = set()
2726
self.done = set()
2827
self.max_depth = max_depth
29-
self.logger = logger
30-
31-
# During overlay extraction, only traverse the files that were changed.
32-
self.overlay_changes = None
33-
if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES' in os.environ:
34-
overlay_changes_file = os.environ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES']
35-
logger.info("Overlay extraction mode: only extracting files changed according to '%s'", overlay_changes_file)
36-
try:
37-
with open(overlay_changes_file, 'r', encoding='utf-8') as f:
38-
data = json.load(f)
39-
changed_paths = data.get('changes', [])
40-
self.overlay_changes = { os.path.abspath(p) for p in changed_paths }
41-
except (IOError, ValueError) as e:
42-
logger.warn("Failed to read overlay changes from '%s' (falling back to full extraction): %s", overlay_changes_file, e)
43-
self.overlay_changes = None
4428

4529
def add_root(self, mod):
4630
self.modules[mod] = 0
4731
if mod not in self.done:
48-
self.add_todo(mod)
32+
self.todo.add(mod)
4933

5034
def add_import(self, mod, imported):
5135
assert mod in self.modules
@@ -55,7 +39,7 @@ def add_import(self, mod, imported):
5539
self._reduce_depth(imported, self.modules[mod] + 1)
5640
else:
5741
if self.modules[mod] < self.max_depth and imported not in self.done:
58-
self.add_todo(imported)
42+
self.todo.add(imported)
5943
self.modules[imported] = self.modules[mod] + 1
6044

6145
def _reduce_depth(self, mod, depth):
@@ -64,7 +48,7 @@ def _reduce_depth(self, mod, depth):
6448
if depth > self.max_depth:
6549
return
6650
if mod not in self.done:
67-
self.add_todo(mod)
51+
self.todo.add(mod)
6852
self.modules[mod] = depth
6953
for imp in self.succ[mod]:
7054
self._reduce_depth(imp, depth+1)
@@ -77,25 +61,11 @@ def get(self):
7761

7862
def push_back(self, mod):
7963
self.done.remove(mod)
80-
self.add_todo(mod)
64+
self.todo.add(mod)
8165

8266
def empty(self):
8367
return not self.todo
8468

85-
def add_todo(self, mod):
86-
if not self._module_in_overlay_changes(mod):
87-
self.logger.debug("Skipping module '%s' as it was not changed in overlay extraction.", mod)
88-
return
89-
self.todo.add(mod)
90-
91-
def _module_in_overlay_changes(self, mod):
92-
if self.overlay_changes is not None:
93-
if isinstance(mod, FileExtractable):
94-
return mod.path in self.overlay_changes
95-
if isinstance(mod, FolderExtractable):
96-
return mod.path + '/__init__.py' in self.overlay_changes
97-
return True
98-
9969
class ExtractorPool(object):
10070
'''Pool of worker processes running extractors'''
10171

@@ -120,7 +90,7 @@ def __init__(self, outdir, archive, proc_count, options, logger: Logger):
12090
self.enqueued = set()
12191
self.done = set()
12292
self.requirements = {}
123-
self.import_graph = ModuleImportGraph(options.max_import_depth, logger)
93+
self.import_graph = ModuleImportGraph(options.max_import_depth)
12494
logger.debug("Source archive: %s", archive)
12595
self.logger = logger
12696
DiagnosticsWriter.create_output_dir()

0 commit comments

Comments
 (0)