Skip to content

Commit 02ec191

Browse files
authored
Use Hierarchical Machine Config (#711)
* Make all existing MachineConfig keys fully optional * Added 'instrument_type' as a MachineConfig key, for use in hierarchical config files * If MachineConfig.rsync_basepath or MachineConfig.default_model is None, default to 'Path().resolve()' * Renamed 'from_file' to 'machine_config_from_file' for clarity * Removed 'get_machine_config_for_instrument' from 'session_shared' and replaced it with 'get_machine_config' from 'murfey.util.config' instead * Updated 'machine_config_from_file' function to parse and connstruct instrument machine configs hierarchically; it can load all the machine configs or just the specified one
1 parent 8b4803a commit 02ec191

File tree

14 files changed

+525
-84
lines changed

14 files changed

+525
-84
lines changed

src/murfey/server/api/clem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def validate_and_sanitise(
8383
machine_config = get_machine_config(instrument_name=instrument_name)[
8484
instrument_name
8585
]
86-
rsync_basepath = machine_config.rsync_basepath.resolve()
86+
rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve()
8787

8888
# Check that full file path doesn't contain unallowed characters
8989
# Currently allows only:

src/murfey/server/api/file_io_frontend.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
process_gain as _process_gain,
1515
)
1616
from murfey.server.murfey_db import murfey_db
17+
from murfey.util import secure_path
1718
from murfey.util.config import get_machine_config
1819
from murfey.util.db import Session
1920

@@ -50,10 +51,23 @@ async def create_symlink(
5051
machine_config = get_machine_config(instrument_name=instrument_name)[
5152
instrument_name
5253
]
53-
symlink_full_path = machine_config.rsync_basepath / symlink_params.symlink
54+
rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve()
55+
symlink_full_path = secure_path(
56+
rsync_basepath / symlink_params.symlink, keep_spaces=True
57+
)
58+
# Verify that the symlink provided does not lead elsewhere
59+
if not symlink_full_path.resolve().is_relative_to(rsync_basepath):
60+
logger.warning(
61+
"Symlink rejected because it will be created in a forbidden location"
62+
)
63+
return ""
64+
# Remove and replace symlink if it exists are 'override' is set
5465
if symlink_full_path.is_symlink() and symlink_params.override:
5566
symlink_full_path.unlink()
67+
# If a file/folder already exists using the desired symlink name, return empty string
5668
if symlink_full_path.exists():
5769
return ""
58-
symlink_full_path.symlink_to(machine_config.rsync_basepath / symlink_params.target)
70+
symlink_full_path.symlink_to(
71+
secure_path(rsync_basepath / symlink_params.target, keep_spaces=True)
72+
)
5973
return str(symlink_params.symlink)

src/murfey/server/api/file_io_instrument.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def suggest_path(
5757
)
5858

5959
# Construct the full path to where the dataset is to be saved
60-
check_path = machine_config.rsync_basepath / base_path
60+
rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve()
61+
check_path = rsync_basepath / base_path
6162

6263
# Check previous year to account for the year rolling over during data collection
6364
if not check_path.parent.exists():
@@ -69,7 +70,7 @@ def suggest_path(
6970
base_path_parts[year_idx] = str(int(part) - 1)
7071
base_path = "/".join(base_path_parts)
7172
check_path_prev = check_path
72-
check_path = machine_config.rsync_basepath / base_path
73+
check_path = rsync_basepath / base_path
7374

7475
# If it's not in the previous year either, it's a genuine error
7576
if not check_path.parent.exists():
@@ -88,7 +89,7 @@ def suggest_path(
8889
check_path.mkdir(mode=0o750)
8990
if params.extra_directory:
9091
(check_path / secure_filename(params.extra_directory)).mkdir(mode=0o750)
91-
return {"suggested_path": check_path.relative_to(machine_config.rsync_basepath)}
92+
return {"suggested_path": check_path.relative_to(rsync_basepath)}
9293

9394

9495
class Dest(BaseModel):
@@ -107,7 +108,9 @@ def make_rsyncer_destination(session_id: int, destination: Dest, db=murfey_db):
107108
]
108109
if not machine_config:
109110
raise ValueError("No machine configuration set when making rsyncer destination")
110-
full_destination_path = machine_config.rsync_basepath / destination_path
111+
full_destination_path = (
112+
machine_config.rsync_basepath or Path("")
113+
).resolve() / destination_path
111114
for parent_path in full_destination_path.parents:
112115
parent_path.mkdir(mode=0o750, exist_ok=True)
113116
return destination
@@ -151,7 +154,7 @@ async def write_eer_fractionation_file(
151154
) / secure_filename(fractionation_params.fractionation_file_name)
152155
else:
153156
file_path = (
154-
Path(machine_config.rsync_basepath)
157+
(machine_config.rsync_basepath or Path("")).resolve()
155158
/ str(datetime.now().year)
156159
/ secure_filename(visit_name)
157160
/ machine_config.gain_directory_name

src/murfey/server/api/file_io_shared.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@ async def process_gain(
3737
executables = machine_config.external_executables
3838
env = machine_config.external_environment
3939
safe_path_name = secure_filename(gain_reference_params.gain_ref.name)
40+
rsync_basepath = machine_config.rsync_basepath or Path("")
4041
filepath = (
41-
Path(machine_config.rsync_basepath)
42+
rsync_basepath
4243
/ str(datetime.now().year)
4344
/ secure_filename(visit_name)
4445
/ machine_config.gain_directory_name
@@ -48,7 +49,7 @@ async def process_gain(
4849
if not filepath.exists():
4950
filepath_prev = filepath
5051
filepath = (
51-
Path(machine_config.rsync_basepath)
52+
rsync_basepath
5253
/ str(datetime.now().year - 1)
5354
/ secure_filename(visit_name)
5455
/ machine_config.gain_directory_name
@@ -80,14 +81,12 @@ async def process_gain(
8081
)
8182
if new_gain_ref and new_gain_ref_superres:
8283
return {
83-
"gain_ref": new_gain_ref.relative_to(Path(machine_config.rsync_basepath)),
84-
"gain_ref_superres": new_gain_ref_superres.relative_to(
85-
Path(machine_config.rsync_basepath)
86-
),
84+
"gain_ref": new_gain_ref.relative_to(rsync_basepath),
85+
"gain_ref_superres": new_gain_ref_superres.relative_to(rsync_basepath),
8786
}
8887
elif new_gain_ref:
8988
return {
90-
"gain_ref": new_gain_ref.relative_to(Path(machine_config.rsync_basepath)),
89+
"gain_ref": new_gain_ref.relative_to(rsync_basepath),
9190
"gain_ref_superres": None,
9291
}
9392
else:

src/murfey/server/api/session_control.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,14 @@
2424
get_foil_holes_from_grid_square as _get_foil_holes_from_grid_square,
2525
get_grid_squares as _get_grid_squares,
2626
get_grid_squares_from_dcg as _get_grid_squares_from_dcg,
27-
get_machine_config_for_instrument,
2827
get_tiff_file as _get_tiff_file,
2928
get_upstream_file as _get_upstream_file,
3029
remove_session_by_id,
3130
)
3231
from murfey.server.ispyb import DB as ispyb_db, get_all_ongoing_visits
3332
from murfey.server.murfey_db import murfey_db
3433
from murfey.util import sanitise
35-
from murfey.util.config import MachineConfig
34+
from murfey.util.config import get_machine_config
3635
from murfey.util.db import (
3736
AutoProcProgram,
3837
ClientEnvironment,
@@ -80,8 +79,8 @@ async def get_current_timestamp():
8079

8180

8281
@router.get("/instruments/{instrument_name}/machine")
83-
def machine_info_by_instrument(instrument_name: str) -> Optional[MachineConfig]:
84-
return get_machine_config_for_instrument(instrument_name)
82+
def machine_info_by_instrument(instrument_name: str):
83+
return get_machine_config(instrument_name)[instrument_name]
8584

8685

8786
@router.get("/new_client_id/")

src/murfey/server/api/session_info.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,14 @@
2424
get_foil_holes_from_grid_square as _get_foil_holes_from_grid_square,
2525
get_grid_squares as _get_grid_squares,
2626
get_grid_squares_from_dcg as _get_grid_squares_from_dcg,
27-
get_machine_config_for_instrument,
2827
get_tiff_file as _get_tiff_file,
2928
get_upstream_file as _get_upstream_file,
3029
remove_session_by_id,
3130
)
3231
from murfey.server.ispyb import DB as ispyb_db, get_all_ongoing_visits
3332
from murfey.server.murfey_db import murfey_db
3433
from murfey.util import sanitise
35-
from murfey.util.config import MachineConfig
34+
from murfey.util.config import get_machine_config
3635
from murfey.util.db import (
3736
ClassificationFeedbackParameters,
3837
ClientEnvironment,
@@ -78,8 +77,8 @@ def connections_check():
7877
@router.get("/instruments/{instrument_name}/machine")
7978
def machine_info_by_instrument(
8079
instrument_name: MurfeyInstrumentName,
81-
) -> Optional[MachineConfig]:
82-
return get_machine_config_for_instrument(instrument_name)
80+
):
81+
return get_machine_config(instrument_name)[instrument_name]
8382

8483

8584
@router.get("/instruments/{instrument_name}/visits_raw", response_model=List[Visit])

src/murfey/server/api/session_shared.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
import logging
2-
from functools import lru_cache
32
from pathlib import Path
4-
from typing import Dict, List, Optional
3+
from typing import Dict, List
54

65
from sqlmodel import select
76
from sqlmodel.orm.session import Session as SQLModelSession
87
from werkzeug.utils import secure_filename
98

109
import murfey.server.prometheus as prom
1110
from murfey.util import safe_run, sanitise, secure_path
12-
from murfey.util.config import MachineConfig, from_file, get_machine_config, settings
11+
from murfey.util.config import get_machine_config
1312
from murfey.util.db import (
1413
DataCollection,
1514
DataCollectionGroup,
@@ -23,15 +22,6 @@
2322
logger = logging.getLogger("murfey.server.api.shared")
2423

2524

26-
@lru_cache(maxsize=5)
27-
def get_machine_config_for_instrument(instrument_name: str) -> Optional[MachineConfig]:
28-
if settings.murfey_machine_configuration:
29-
return from_file(Path(settings.murfey_machine_configuration), instrument_name)[
30-
instrument_name
31-
]
32-
return None
33-
34-
3525
def remove_session_by_id(session_id: int, db):
3626
session = db.exec(select(MurfeySession).where(MurfeySession.id == session_id)).one()
3727
sessions_for_visit = db.exec(

src/murfey/server/api/workflow.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -245,15 +245,14 @@ def start_dc(
245245
machine_config = get_machine_config(instrument_name=instrument_name)[
246246
instrument_name
247247
]
248+
rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve()
248249
logger.info(
249250
f"Starting data collection on microscope {instrument_name!r} "
250-
f"with basepath {sanitise(str(machine_config.rsync_basepath))} and directory {sanitise(dc_params.image_directory)}"
251+
f"with basepath {sanitise(str(rsync_basepath))} and directory {sanitise(dc_params.image_directory)}"
251252
)
252253
dc_parameters = {
253254
"visit": visit_name,
254-
"image_directory": str(
255-
machine_config.rsync_basepath / dc_params.image_directory
256-
),
255+
"image_directory": str(rsync_basepath / dc_params.image_directory),
257256
"start_time": str(datetime.now()),
258257
"voltage": dc_params.voltage,
259258
"pixel_size": str(float(dc_params.pixel_size_on_image) * 1e9),
@@ -744,7 +743,10 @@ async def request_tomography_preprocessing(
744743
"fm_dose": proc_file.dose_per_frame,
745744
"frame_count": proc_file.frame_count,
746745
"gain_ref": (
747-
str(machine_config.rsync_basepath / proc_file.gain_ref)
746+
str(
747+
(machine_config.rsync_basepath or Path("")).resolve()
748+
/ proc_file.gain_ref
749+
)
748750
if proc_file.gain_ref and machine_config.data_transfer_enabled
749751
else proc_file.gain_ref
750752
),
@@ -1060,7 +1062,7 @@ async def make_gif(
10601062
instrument_name
10611063
]
10621064
output_dir = (
1063-
Path(machine_config.rsync_basepath)
1065+
(machine_config.rsync_basepath or Path("")).resolve()
10641066
/ secure_filename(year)
10651067
/ secure_filename(visit_name)
10661068
/ "processed"

src/murfey/server/demo_api.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@
4343
from murfey.util import sanitise_path
4444
from murfey.util.config import (
4545
MachineConfig,
46-
from_file,
4746
get_hostname,
47+
machine_config_from_file,
4848
security_from_file,
4949
)
5050
from murfey.util.db import (
@@ -93,7 +93,9 @@ class Settings(BaseSettings):
9393
machine_config: dict[str, MachineConfig] = {}
9494
if settings.murfey_machine_configuration:
9595
microscope = get_microscope()
96-
machine_config = from_file(Path(settings.murfey_machine_configuration), microscope)
96+
machine_config = machine_config_from_file(
97+
Path(settings.murfey_machine_configuration), microscope
98+
)
9799

98100

99101
# This will be the homepage for a given microscope.
@@ -114,19 +116,19 @@ async def root(request: Request):
114116
def machine_info() -> Optional[MachineConfig]:
115117
instrument_name = os.getenv("BEAMLINE")
116118
if settings.murfey_machine_configuration and instrument_name:
117-
return from_file(Path(settings.murfey_machine_configuration), instrument_name)[
118-
instrument_name
119-
]
119+
return machine_config_from_file(
120+
Path(settings.murfey_machine_configuration), instrument_name
121+
)[instrument_name]
120122
return None
121123

122124

123125
@lru_cache(maxsize=5)
124126
@router.get("/instruments/{instrument_name}/machine")
125127
def machine_info_by_name(instrument_name: str) -> Optional[MachineConfig]:
126128
if settings.murfey_machine_configuration:
127-
return from_file(Path(settings.murfey_machine_configuration), instrument_name)[
128-
instrument_name
129-
]
129+
return machine_config_from_file(
130+
Path(settings.murfey_machine_configuration), instrument_name
131+
)[instrument_name]
130132
return None
131133

132134

src/murfey/server/feedback.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,9 @@ def _register_class_selection(message: dict, _db, demo: bool = False):
11001100
def _find_initial_model(visit: str, machine_config: MachineConfig) -> Path | None:
11011101
if machine_config.initial_model_search_directory:
11021102
visit_directory = (
1103-
machine_config.rsync_basepath / str(datetime.now().year) / visit
1103+
(machine_config.rsync_basepath or Path("")).resolve()
1104+
/ str(datetime.now().year)
1105+
/ visit
11041106
)
11051107
possible_models = [
11061108
p
@@ -1512,7 +1514,10 @@ def _flush_tomography_preprocessing(message: dict, _db):
15121514
"fm_dose": proc_params.dose_per_frame,
15131515
"frame_count": proc_params.frame_count,
15141516
"gain_ref": (
1515-
str(machine_config.rsync_basepath / proc_params.gain_ref)
1517+
str(
1518+
(machine_config.rsync_basepath or Path("")).resolve()
1519+
/ proc_params.gain_ref
1520+
)
15161521
if proc_params.gain_ref
15171522
else proc_params.gain_ref
15181523
),
@@ -2042,7 +2047,10 @@ def feedback_callback(header: dict, message: dict, _db=murfey_db) -> None:
20422047
angpix=float(message["pixel_size_on_image"]) * 1e10,
20432048
dose_per_frame=message["dose_per_frame"],
20442049
gain_ref=(
2045-
str(machine_config.rsync_basepath / message["gain_ref"])
2050+
str(
2051+
(machine_config.rsync_basepath or Path("")).resolve()
2052+
/ message["gain_ref"]
2053+
)
20462054
if message["gain_ref"] and machine_config.data_transfer_enabled
20472055
else message["gain_ref"]
20482056
),

0 commit comments

Comments
 (0)