From 2e669e0e95658bfaff06534adab13ed707083abe Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 8 Dec 2025 12:56:33 +0000 Subject: [PATCH 01/14] Removed unused and broken CLI for sending SPA workflow ISPyB messages --- pyproject.toml | 1 - src/murfey/cli/spa_ispyb_messages.py | 443 --------------------------- 2 files changed, 444 deletions(-) delete mode 100644 src/murfey/cli/spa_ispyb_messages.py diff --git a/pyproject.toml b/pyproject.toml index 31c733ec..2978b131 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,6 @@ GitHub = "https://github.com/DiamondLightSource/python-murfey" "murfey.sessions" = "murfey.cli.db_sessions:run" "murfey.simulate" = "murfey.cli.dummy:run" "murfey.spa_inject" = "murfey.cli.inject_spa_processing:run" -"murfey.spa_ispyb_entries" = "murfey.cli.spa_ispyb_messages:run" "murfey.transfer" = "murfey.cli.transfer:run" [project.entry-points."murfey.config.extraction"] "murfey_machine" = "murfey.util.config:get_extended_machine_config" diff --git a/src/murfey/cli/spa_ispyb_messages.py b/src/murfey/cli/spa_ispyb_messages.py deleted file mode 100644 index a183616c..00000000 --- a/src/murfey/cli/spa_ispyb_messages.py +++ /dev/null @@ -1,443 +0,0 @@ -import argparse -import os -import time -from datetime import datetime -from pathlib import Path - -import requests -import workflows -import xmltodict -import zocalo -from ispyb.sqlalchemy._auto_db_schema import ( - AutoProcProgram, - DataCollection, - DataCollectionGroup, - ProcessingJob, -) -from sqlmodel import Session as MurfeySession, create_engine, select - -from murfey.client.contexts.spa import _get_xml_list_index -from murfey.server.feedback import _murfey_id, _register -from murfey.server.ispyb import ISPyBSession, TransportManager, get_session_id -from murfey.server.murfey_db import url -from murfey.util import db -from murfey.util.api import url_path_for -from murfey.util.config import get_machine_config, get_microscope, get_security_config - - -def run(): - parser = argparse.ArgumentParser( - description="Inject movies for SPA processing from Murfey database movie store" - ) - - parser.add_argument( - "--tag", - dest="tag", - type=str, - required=True, - help="Tag from Murfey database Movie table", - ) - parser.add_argument( - "-s", - "--session-id", - dest="session_id", - required=True, - type=int, - help="Murfey session ID", - ) - parser.add_argument( - "-u", - "--url", - dest="url", - required=True, - type=str, - help="URL of Murfey server", - ) - parser.add_argument( - "-v", - "--visit", - dest="visit", - required=True, - type=str, - help="Visit name", - ) - parser.add_argument( - "--image-directory", - dest="image_directory", - required=True, - type=str, - help="Path to directory containing image files", - ) - parser.add_argument( - "--suffix", - dest="suffix", - required=True, - type=str, - help="Movie suffix", - ) - parser.add_argument( - "--metadata-file", - dest="metadata_file", - required=True, - type=str, - help="Path to metadata file", - ) - parser.add_argument( - "-m", - "--microscope", - dest="microscope", - type=str, - required=True, - help="Microscope as specified in the Murfey machine configuration", - ) - parser.add_argument( - "--flush-preprocess", - dest="flush_preprocess", - default=False, - action="store_true", - help="Flush Murfey preprocess stash after creating ISPyB entries", - ) - parser.add_argument( - "--eer-fractionation-file", - dest="eer_fractionation_file", - default=None, - help="Path to EER fractionation file if relevant", - ) - parser.add_argument( - "--dose-per-frame", - dest="dose_per_frame", - default=None, - help="Dose per frame overwrite", - ) - parser.add_argument( - "--injection-delay", - dest="injection_delay", - default=1, - type=float, - help="Time spacing between processing requests in seconds", - ) - - zc = zocalo.configuration.from_file() - zc.activate() - zc.add_command_line_options(parser) - workflows.transport.add_command_line_options(parser, transport_argument=True) - - args = parser.parse_args() - - if args.microscope: - os.environ["BEAMLINE"] = args.microscope - - machine_config = get_machine_config() - _url = url(machine_config) - engine = create_engine(_url) - murfey_db = MurfeySession(engine) - - with open(args.metadata_file, "r") as xml: - data = xmltodict.parse(xml.read()) - - metadata = {} - metadata["voltage"] = ( - float(data["MicroscopeImage"]["microscopeData"]["gun"]["AccelerationVoltage"]) - / 1000 - ) - metadata["image_size_x"] = data["MicroscopeImage"]["microscopeData"]["acquisition"][ - "camera" - ]["ReadoutArea"]["a:width"] - metadata["image_size_y"] = data["MicroscopeImage"]["microscopeData"]["acquisition"][ - "camera" - ]["ReadoutArea"]["a:height"] - metadata["pixel_size_on_image"] = float( - data["MicroscopeImage"]["SpatialScale"]["pixelSize"]["x"]["numericValue"] - ) - magnification = data["MicroscopeImage"]["microscopeData"]["optics"][ - "TemMagnification" - ]["NominalMagnification"] - metadata["magnification"] = magnification - try: - dose_index = _get_xml_list_index( - "Dose", - data["MicroscopeImage"]["CustomData"]["a:KeyValueOfstringanyType"], - ) - metadata["total_exposed_dose"] = round( - float( - data["MicroscopeImage"]["CustomData"]["a:KeyValueOfstringanyType"][ - dose_index - ]["a:Value"]["#text"] - ) - * (1e-20), - 2, - ) # convert e / m^2 to e / A^2 - except ValueError: - metadata["total_exposed_dose"] = 1 - try: - num_fractions = int( - data["MicroscopeImage"]["microscopeData"]["acquisition"]["camera"][ - "CameraSpecificInput" - ]["a:KeyValueOfstringanyType"][2]["a:Value"]["b:NumberOffractions"] - ) - except (KeyError, IndexError): - num_fractions = 1 - metadata["c2aperture"] = data["MicroscopeImage"]["CustomData"][ - "a:KeyValueOfstringanyType" - ][3]["a:Value"]["#text"] - metadata["exposure_time"] = data["MicroscopeImage"]["microscopeData"][ - "acquisition" - ]["camera"]["ExposureTime"] - try: - metadata["slit_width"] = data["MicroscopeImage"]["microscopeData"]["optics"][ - "EnergyFilter" - ]["EnergySelectionSlitWidth"] - except KeyError: - metadata["slit_width"] = None - metadata["phase_plate"] = ( - 1 - if data["MicroscopeImage"]["CustomData"]["a:KeyValueOfstringanyType"][11][ - "a:Value" - ]["#text"] - == "true" - else 0 - ) - binning_factor_xml = int( - data["MicroscopeImage"]["microscopeData"]["acquisition"]["camera"]["Binning"][ - "a:x" - ] - ) - binning_factor = 1 - server_config = requests.get( - f"{args.url}{url_path_for('session_control.router', 'machine_info_by_instrument', instrument_name=args.microscope)}" - ).json() - if server_config.get("superres"): - # If camera is capable of superres and collection is in superres - binning_factor = 2 - elif not server_config.get("superres"): - binning_factor_xml = 2 - if magnification: - ps_from_mag = ( - server_config.get("calibrations", {}) - .get("magnification", {}) - .get(magnification) - ) - if ps_from_mag: - metadata["pixel_size_on_image"] = float(ps_from_mag) * 1e-10 - else: - metadata["pixel_size_on_image"] /= binning_factor - metadata["image_size_x"] = str(int(metadata["image_size_x"]) * binning_factor) - metadata["image_size_y"] = str(int(metadata["image_size_y"]) * binning_factor) - metadata["motion_corr_binning"] = 1 if binning_factor_xml == 2 else 2 - metadata["gain_ref"] = ( - f"{datetime.now().year}/{args.visit}/processing/gain.mrc" - if args.gain_ref is None - else args.gain_ref - ) - metadata["gain_ref_superres"] = ( - f"{datetime.now().year}/{args.visit}/processing/gain_superres.mrc" - if args.gain_ref_superres is None - else args.gain_ref_superres - ) - if args.dose_per_frame: - metadata["dose_per_frame"] = args.dose_per_frame - else: - metadata["dose_per_frame"] = round( - metadata["total_exposed_dose"] / num_fractions, 3 - ) - - metadata["use_cryolo"] = True - metadata["symmetry"] = "C1" - metadata["mask_diameter"] = None - metadata["boxsize"] = None - metadata["downscale"] = True - metadata["small_boxsize"] = None - metadata["eer_fractionation"] = args.eer_fractionation_file - metadata["source"] = args.tag - metadata["particle_diameter"] = 0 - metadata["estimate_particle_diameter"] = True - - ispyb_session_id = ( - get_session_id( - microscope=args.microscope, - proposal_code=args.visit[:2], - proposal_number=args.visit[2:].split("-")[0], - visit_number=args.visit.split("-")[1], - db=ISPyBSession(), - ), - ) - - record = DataCollectionGroup( - sessionId=ispyb_session_id, - experimentType="SPA", - experimentTypeId=37, - ) - dcgid = _register(record, {}) - murfey_dcg = db.DataCollectionGroup( - id=dcgid, - session_id=args.session_id, - tag=args.tag, - ) - murfey_db.add(murfey_dcg) - murfey_db.commit() - murfey_db.close() - - record = DataCollection( - SESSIONID=ispyb_session_id, - experimenttype="SPA", - imageDirectory=args.image_directory, - imageSuffix=args.suffix, - voltage=metadata["voltage"], - dataCollectionGroupId=dcgid, - pixelSizeOnImage=str(float(metadata["pixel_size_on_image"]) * 1e9), - imageSizeX=metadata["image_size_x"], - imageSizeY=metadata["image_size_y"], - slitGapHorizontal=metadata.get("slit_width"), - magnification=metadata.get("magnification"), - exposureTime=metadata.get("exposure_time"), - totalExposedDose=metadata.get("total_exposed_dose"), - c2aperture=metadata.get("c2aperture"), - phasePlate=int(metadata.get("phase_plate", 0)), - ) - dcid = _register( - record, - {}, - tag="", - ) - murfey_dc = db.DataCollection( - id=dcid, - tag=args.tag, - dcg_id=dcgid, - ) - murfey_db.add(murfey_dc) - murfey_db.commit() - murfey_db.close() - - for recipe in ( - "em-spa-preprocess", - "em-spa-extract", - "em-spa-class2d", - "em-spa-class3d", - ): - record = ProcessingJob(dataCollectionId=dcid, recipe=recipe) - pid = _register(record, {}) - murfey_pj = db.ProcessingJob(id=pid, recipe=recipe, dc_id=dcid) - murfey_db.add(murfey_pj) - murfey_db.commit() - record = AutoProcProgram( - processingJobId=pid, processingStartTime=datetime.now() - ) - appid = _register(record, {}) - murfey_app = db.AutoProcProgram(id=appid, pj_id=pid) - murfey_db.add(murfey_app) - murfey_db.commit() - murfey_db.close() - - collected_ids = murfey_db.exec( - select( - db.DataCollectionGroup, - db.DataCollection, - db.ProcessingJob, - db.AutoProcProgram, - ) - .where(db.DataCollectionGroup.session_id == args.session_id) - .where(db.DataCollectionGroup.tag == args.tag) - .where(db.DataCollection.dcg_id == db.DataCollectionGroup.id) - .where(db.ProcessingJob.dc_id == db.DataCollection.id) - .where(db.AutoProcProgram.pj_id == db.ProcessingJob.id) - .where(db.ProcessingJob.recipe == "em-spa-preprocess") - ).one() - machine_config = get_machine_config() - security_config = get_security_config() - params = db.SPARelionParameters( - pj_id=collected_ids[2].id, - angpix=float(metadata["pixel_size_on_image"]) * 1e10, - dose_per_frame=metadata["dose_per_frame"], - gain_ref=( - str(machine_config.rsync_basepath / metadata["gain_ref"]) - if metadata["gain_ref"] - else metadata["gain_ref"] - ), - voltage=metadata["voltage"], - motion_corr_binning=metadata["motion_corr_binning"], - eer_grouping=metadata["eer_fractionation"], - symmetry=metadata["symmetry"], - particle_diameter=metadata["particle_diameter"], - downscale=metadata["downscale"], - boxsize=metadata["boxsize"], - small_boxsize=metadata["small_boxsize"], - mask_diameter=metadata["mask_diameter"], - ) - feedback_params = db.ClassificationFeedbackParameters( - pj_id=collected_ids[2].id, - estimate_particle_diameter=not bool(metadata["particle_diameter"]), - hold_class2d=False, - hold_class3d=False, - class_selection_score=0, - star_combination_job=0, - initial_model="", - next_job=0, - ) - murfey_db.add(params) - murfey_db.add(feedback_params) - murfey_db.commit() - murfey_db.close() - - if args.flush_preprocess: - _transport_object = TransportManager(args.transport) - _transport_object.feedback_queue = security_config.feedback_queue - stashed_files = murfey_db.exec( - select(db.PreprocessStash) - .where(db.PreprocessStash.session_id == args.session_id) - .where(db.PreprocessStash.tag == args.tag) - ).all() - murfey_ids = _murfey_id( - collected_ids[3].id, - murfey_db, - number=2 * len(stashed_files), - close=False, - ) - if feedback_params.picker_murfey_id is None: - feedback_params.picker_murfey_id = murfey_ids[1] - murfey_db.add(feedback_params) - - for i, f in enumerate(stashed_files): - mrcp = Path(f.mrc_out) - if not mrcp.parent.exists(): - mrcp.parent.mkdir(parents=True) - movie = db.Movie( - murfey_id=murfey_ids[2 * i], - path=f.file_path, - image_number=f.image_number, - tag=f.tag, - ) - murfey_db.add(movie) - zocalo_message = { - "recipes": ["em-spa-preprocess"], - "parameters": { - "feedback_queue": _transport_object.feedback_queue, - "dcid": collected_ids[1].id, - "kv": metadata["voltage"], - "autoproc_program_id": collected_ids[3].id, - "movie": f.file_path, - "mrc_out": f.mrc_out, - "pixel_size": float(metadata["pixel_size_on_image"]) * 1e10, - "image_number": f.image_number, - "microscope": get_microscope(), - "mc_uuid": murfey_ids[2 * i], - "ft_bin": metadata["motion_corr_binning"], - "fm_dose": metadata["dose_per_frame"], - "gain_ref": ( - str(machine_config.rsync_basepath / metadata["gain_ref"]) - if metadata["gain_ref"] - else metadata["gain_ref"] - ), - "downscale": metadata["downscale"], - "picker_uuid": murfey_ids[2 * i + 1], - "session_id": args.session_id, - "particle_diameter": metadata["particle_diameter"] or 0, - "fm_int_file": f.eer_fractionation_file, - }, - } - _transport_object.send( - "processing_recipe", zocalo_message, new_connection=True - ) - murfey_db.delete(f) - time.sleep(args.injection_delay) - murfey_db.commit() - murfey_db.close() From d19a40f697b64d5c36f500b00aa83bbdfbb3d73b Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 8 Dec 2025 13:56:04 +0000 Subject: [PATCH 02/14] Make all existing MachineConfig keys fully optional --- src/murfey/util/config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index a4aa6220..6f227c6b 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -36,14 +36,14 @@ class MachineConfig(BaseModel): # type: ignore # Hardware and software ----------------------------------------------------------- camera: str = "FALCON" superres: bool = False - calibrations: dict[str, Any] - acquisition_software: list[str] + calibrations: dict[str, Any] = {} + acquisition_software: list[str] = [] software_versions: dict[str, str] = {} software_settings_output_directories: dict[str, list[str]] = {} data_required_substrings: dict[str, dict[str, list[str]]] = {} # Client side directory setup ----------------------------------------------------- - data_directories: list[Path] + data_directories: list[Path] = [] create_directories: list[str] = ["atlas"] analyse_created_directories: list[str] = [] gain_reference_directory: Optional[Path] = None @@ -58,7 +58,7 @@ class MachineConfig(BaseModel): # type: ignore data_transfer_enabled: bool = True rsync_url: str = "" rsync_module: str = "" - rsync_basepath: Path + rsync_basepath: Optional[Path] = None allow_removal: bool = False # Upstream data download setup @@ -86,7 +86,7 @@ class MachineConfig(BaseModel): # type: ignore } # Particle picking setup - default_model: Path + default_model: Optional[Path] = None picking_model_search_directory: str = "processing" initial_model_search_directory: str = "processing/initial_model" From 6b07927e733d95a54dd2d4e4b3fa8ce5187348ca Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 8 Dec 2025 14:24:42 +0000 Subject: [PATCH 03/14] If MachineConfig.rsync_basepath or MachineConfig.default_model is None, default to 'Path().resolve()' --- src/murfey/server/api/clem.py | 2 +- src/murfey/server/api/file_io_frontend.py | 5 +++-- src/murfey/server/api/file_io_instrument.py | 13 ++++++++----- src/murfey/server/api/file_io_shared.py | 13 ++++++------- src/murfey/server/api/workflow.py | 14 ++++++++------ src/murfey/server/feedback.py | 14 +++++++++++--- src/murfey/util/processing_params.py | 6 ++++-- src/murfey/workflows/clem/__init__.py | 2 +- 8 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/murfey/server/api/clem.py b/src/murfey/server/api/clem.py index 128bb280..df484e7f 100644 --- a/src/murfey/server/api/clem.py +++ b/src/murfey/server/api/clem.py @@ -83,7 +83,7 @@ def validate_and_sanitise( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - rsync_basepath = machine_config.rsync_basepath.resolve() + rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() # Check that full file path doesn't contain unallowed characters # Currently allows only: diff --git a/src/murfey/server/api/file_io_frontend.py b/src/murfey/server/api/file_io_frontend.py index 9bf2b43a..7bd6d44a 100644 --- a/src/murfey/server/api/file_io_frontend.py +++ b/src/murfey/server/api/file_io_frontend.py @@ -50,10 +50,11 @@ async def create_symlink( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - symlink_full_path = machine_config.rsync_basepath / symlink_params.symlink + rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() + symlink_full_path = rsync_basepath / symlink_params.symlink if symlink_full_path.is_symlink() and symlink_params.override: symlink_full_path.unlink() if symlink_full_path.exists(): return "" - symlink_full_path.symlink_to(machine_config.rsync_basepath / symlink_params.target) + symlink_full_path.symlink_to(rsync_basepath / symlink_params.target) return str(symlink_params.symlink) diff --git a/src/murfey/server/api/file_io_instrument.py b/src/murfey/server/api/file_io_instrument.py index 2644b7f8..bbe12f1b 100644 --- a/src/murfey/server/api/file_io_instrument.py +++ b/src/murfey/server/api/file_io_instrument.py @@ -57,7 +57,8 @@ def suggest_path( ) # Construct the full path to where the dataset is to be saved - check_path = machine_config.rsync_basepath / base_path + rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() + check_path = rsync_basepath / base_path # Check previous year to account for the year rolling over during data collection if not check_path.parent.exists(): @@ -69,7 +70,7 @@ def suggest_path( base_path_parts[year_idx] = str(int(part) - 1) base_path = "/".join(base_path_parts) check_path_prev = check_path - check_path = machine_config.rsync_basepath / base_path + check_path = rsync_basepath / base_path # If it's not in the previous year either, it's a genuine error if not check_path.parent.exists(): @@ -88,7 +89,7 @@ def suggest_path( check_path.mkdir(mode=0o750) if params.extra_directory: (check_path / secure_filename(params.extra_directory)).mkdir(mode=0o750) - return {"suggested_path": check_path.relative_to(machine_config.rsync_basepath)} + return {"suggested_path": check_path.relative_to(rsync_basepath)} class Dest(BaseModel): @@ -107,7 +108,9 @@ def make_rsyncer_destination(session_id: int, destination: Dest, db=murfey_db): ] if not machine_config: raise ValueError("No machine configuration set when making rsyncer destination") - full_destination_path = machine_config.rsync_basepath / destination_path + full_destination_path = ( + machine_config.rsync_basepath or Path("") + ).resolve() / destination_path for parent_path in full_destination_path.parents: parent_path.mkdir(mode=0o750, exist_ok=True) return destination @@ -151,7 +154,7 @@ async def write_eer_fractionation_file( ) / secure_filename(fractionation_params.fractionation_file_name) else: file_path = ( - Path(machine_config.rsync_basepath) + (machine_config.rsync_basepath or Path("")).resolve() / str(datetime.now().year) / secure_filename(visit_name) / machine_config.gain_directory_name diff --git a/src/murfey/server/api/file_io_shared.py b/src/murfey/server/api/file_io_shared.py index 310c8e4b..d510d732 100644 --- a/src/murfey/server/api/file_io_shared.py +++ b/src/murfey/server/api/file_io_shared.py @@ -37,8 +37,9 @@ async def process_gain( executables = machine_config.external_executables env = machine_config.external_environment safe_path_name = secure_filename(gain_reference_params.gain_ref.name) + rsync_basepath = machine_config.rsync_basepath or Path("") filepath = ( - Path(machine_config.rsync_basepath) + rsync_basepath / str(datetime.now().year) / secure_filename(visit_name) / machine_config.gain_directory_name @@ -48,7 +49,7 @@ async def process_gain( if not filepath.exists(): filepath_prev = filepath filepath = ( - Path(machine_config.rsync_basepath) + rsync_basepath / str(datetime.now().year - 1) / secure_filename(visit_name) / machine_config.gain_directory_name @@ -80,14 +81,12 @@ async def process_gain( ) if new_gain_ref and new_gain_ref_superres: return { - "gain_ref": new_gain_ref.relative_to(Path(machine_config.rsync_basepath)), - "gain_ref_superres": new_gain_ref_superres.relative_to( - Path(machine_config.rsync_basepath) - ), + "gain_ref": new_gain_ref.relative_to(rsync_basepath), + "gain_ref_superres": new_gain_ref_superres.relative_to(rsync_basepath), } elif new_gain_ref: return { - "gain_ref": new_gain_ref.relative_to(Path(machine_config.rsync_basepath)), + "gain_ref": new_gain_ref.relative_to(rsync_basepath), "gain_ref_superres": None, } else: diff --git a/src/murfey/server/api/workflow.py b/src/murfey/server/api/workflow.py index fcc793de..d34f78bf 100644 --- a/src/murfey/server/api/workflow.py +++ b/src/murfey/server/api/workflow.py @@ -214,15 +214,14 @@ def start_dc( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] + rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() logger.info( f"Starting data collection on microscope {instrument_name!r} " - f"with basepath {sanitise(str(machine_config.rsync_basepath))} and directory {sanitise(dc_params.image_directory)}" + f"with basepath {sanitise(str(rsync_basepath))} and directory {sanitise(dc_params.image_directory)}" ) dc_parameters = { "visit": visit_name, - "image_directory": str( - machine_config.rsync_basepath / dc_params.image_directory - ), + "image_directory": str(rsync_basepath / dc_params.image_directory), "start_time": str(datetime.now()), "voltage": dc_params.voltage, "pixel_size": str(float(dc_params.pixel_size_on_image) * 1e9), @@ -713,7 +712,10 @@ async def request_tomography_preprocessing( "fm_dose": proc_file.dose_per_frame, "frame_count": proc_file.frame_count, "gain_ref": ( - str(machine_config.rsync_basepath / proc_file.gain_ref) + str( + (machine_config.rsync_basepath or Path("")).resolve() + / proc_file.gain_ref + ) if proc_file.gain_ref and machine_config.data_transfer_enabled else proc_file.gain_ref ), @@ -1029,7 +1031,7 @@ async def make_gif( instrument_name ] output_dir = ( - Path(machine_config.rsync_basepath) + (machine_config.rsync_basepath or Path("")).resolve() / secure_filename(year) / secure_filename(visit_name) / "processed" diff --git a/src/murfey/server/feedback.py b/src/murfey/server/feedback.py index 480db7fa..2efad95c 100644 --- a/src/murfey/server/feedback.py +++ b/src/murfey/server/feedback.py @@ -1100,7 +1100,9 @@ def _register_class_selection(message: dict, _db, demo: bool = False): def _find_initial_model(visit: str, machine_config: MachineConfig) -> Path | None: if machine_config.initial_model_search_directory: visit_directory = ( - machine_config.rsync_basepath / str(datetime.now().year) / visit + (machine_config.rsync_basepath or Path("")).resolve() + / str(datetime.now().year) + / visit ) possible_models = [ p @@ -1512,7 +1514,10 @@ def _flush_tomography_preprocessing(message: dict, _db): "fm_dose": proc_params.dose_per_frame, "frame_count": proc_params.frame_count, "gain_ref": ( - str(machine_config.rsync_basepath / proc_params.gain_ref) + str( + (machine_config.rsync_basepath or Path("")).resolve() + / proc_params.gain_ref + ) if proc_params.gain_ref else proc_params.gain_ref ), @@ -2042,7 +2047,10 @@ def feedback_callback(header: dict, message: dict, _db=murfey_db) -> None: angpix=float(message["pixel_size_on_image"]) * 1e10, dose_per_frame=message["dose_per_frame"], gain_ref=( - str(machine_config.rsync_basepath / message["gain_ref"]) + str( + (machine_config.rsync_basepath or Path("")).resolve() + / message["gain_ref"] + ) if message["gain_ref"] and machine_config.data_transfer_enabled else message["gain_ref"] ), diff --git a/src/murfey/util/processing_params.py b/src/murfey/util/processing_params.py index a40ba987..760b69ff 100644 --- a/src/murfey/util/processing_params.py +++ b/src/murfey/util/processing_params.py @@ -42,7 +42,9 @@ def cryolo_model_path(visit: str, instrument_name: str) -> Path: ] if machine_config.picking_model_search_directory: visit_directory = ( - machine_config.rsync_basepath / str(datetime.now().year) / visit + (machine_config.rsync_basepath or Path("")).resolve() + / str(datetime.now().year) + / visit ) possible_models = list( (visit_directory / machine_config.picking_model_search_directory).glob( @@ -51,7 +53,7 @@ def cryolo_model_path(visit: str, instrument_name: str) -> Path: ) if possible_models: return sorted(possible_models, key=lambda x: x.stat().st_ctime)[-1] - return machine_config.default_model + return (machine_config.default_model or Path("")).resolve() class CLEMProcessingParameters(BaseModel): diff --git a/src/murfey/workflows/clem/__init__.py b/src/murfey/workflows/clem/__init__.py index b40d48cb..4aedd01c 100644 --- a/src/murfey/workflows/clem/__init__.py +++ b/src/murfey/workflows/clem/__init__.py @@ -64,7 +64,7 @@ def _validate_and_sanitise( machine_config = get_machine_config(instrument_name=instrument_name)[ instrument_name ] - rsync_basepath = machine_config.rsync_basepath.resolve() + rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() # Check that full file path doesn't contain unallowed characters # Currently allows only: From c92b9e117e76ae82dc8245aa1ef685f5fa6e8480 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 8 Dec 2025 14:32:48 +0000 Subject: [PATCH 04/14] Renamed 'from_file' to 'machine_config_from_file' for clarity --- src/murfey/cli/inject_spa_processing.py | 2 +- src/murfey/server/api/session_shared.py | 13 +++++++++---- src/murfey/server/demo_api.py | 18 ++++++++++-------- src/murfey/util/config.py | 6 ++++-- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/murfey/cli/inject_spa_processing.py b/src/murfey/cli/inject_spa_processing.py index 69f350c6..ba8d4d32 100644 --- a/src/murfey/cli/inject_spa_processing.py +++ b/src/murfey/cli/inject_spa_processing.py @@ -87,7 +87,7 @@ def run(): help="Path to EER fractionation file if relevant", ) - zc = zocalo.configuration.from_file() + zc = zocalo.configuration.machine_config_from_file() zc.activate() zc.add_command_line_options(parser) workflows.transport.add_command_line_options(parser, transport_argument=True) diff --git a/src/murfey/server/api/session_shared.py b/src/murfey/server/api/session_shared.py index 29607d98..eea5e25d 100644 --- a/src/murfey/server/api/session_shared.py +++ b/src/murfey/server/api/session_shared.py @@ -9,7 +9,12 @@ import murfey.server.prometheus as prom from murfey.util import safe_run, sanitise, secure_path -from murfey.util.config import MachineConfig, from_file, get_machine_config, settings +from murfey.util.config import ( + MachineConfig, + get_machine_config, + machine_config_from_file, + settings, +) from murfey.util.db import ( DataCollection, DataCollectionGroup, @@ -26,9 +31,9 @@ @lru_cache(maxsize=5) def get_machine_config_for_instrument(instrument_name: str) -> Optional[MachineConfig]: if settings.murfey_machine_configuration: - return from_file(Path(settings.murfey_machine_configuration), instrument_name)[ - instrument_name - ] + return machine_config_from_file( + Path(settings.murfey_machine_configuration), instrument_name + )[instrument_name] return None diff --git a/src/murfey/server/demo_api.py b/src/murfey/server/demo_api.py index 79fab4c0..039ca905 100644 --- a/src/murfey/server/demo_api.py +++ b/src/murfey/server/demo_api.py @@ -43,8 +43,8 @@ from murfey.util import sanitise_path from murfey.util.config import ( MachineConfig, - from_file, get_hostname, + machine_config_from_file, security_from_file, ) from murfey.util.db import ( @@ -93,7 +93,9 @@ class Settings(BaseSettings): machine_config: dict[str, MachineConfig] = {} if settings.murfey_machine_configuration: microscope = get_microscope() - machine_config = from_file(Path(settings.murfey_machine_configuration), microscope) + machine_config = machine_config_from_file( + Path(settings.murfey_machine_configuration), microscope + ) # This will be the homepage for a given microscope. @@ -114,9 +116,9 @@ async def root(request: Request): def machine_info() -> Optional[MachineConfig]: instrument_name = os.getenv("BEAMLINE") if settings.murfey_machine_configuration and instrument_name: - return from_file(Path(settings.murfey_machine_configuration), instrument_name)[ - instrument_name - ] + return machine_config_from_file( + Path(settings.murfey_machine_configuration), instrument_name + )[instrument_name] return None @@ -124,9 +126,9 @@ def machine_info() -> Optional[MachineConfig]: @router.get("/instruments/{instrument_name}/machine") def machine_info_by_name(instrument_name: str) -> Optional[MachineConfig]: if settings.murfey_machine_configuration: - return from_file(Path(settings.murfey_machine_configuration), instrument_name)[ - instrument_name - ] + return machine_config_from_file( + Path(settings.murfey_machine_configuration), instrument_name + )[instrument_name] return None diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index 6f227c6b..dbe79266 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -147,7 +147,9 @@ def validate_software_versions(cls, v: dict[str, Any]) -> dict[str, str]: return v -def from_file(config_file_path: Path, instrument: str = "") -> dict[str, MachineConfig]: +def machine_config_from_file( + config_file_path: Path, instrument: str = "" +) -> dict[str, MachineConfig]: with open(config_file_path, "r") as config_stream: config = yaml.safe_load(config_stream) return { @@ -260,7 +262,7 @@ def get_machine_config(instrument_name: str = "") -> dict[str, MachineConfig]: } if settings.murfey_machine_configuration: microscope = instrument_name - machine_config = from_file( + machine_config = machine_config_from_file( Path(settings.murfey_machine_configuration), microscope ) return machine_config From 14f42bbea27cafa14c3a85b85d9515418513144d Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 9 Dec 2025 11:19:34 +0000 Subject: [PATCH 05/14] Removed 'get_machine_config_for_instrument' from 'session_shared' and replaced it with 'get_machine_config' from 'murfey.util.config' instead --- src/murfey/server/api/session_control.py | 7 +++---- src/murfey/server/api/session_info.py | 7 +++---- src/murfey/server/api/session_shared.py | 19 ++----------------- 3 files changed, 8 insertions(+), 25 deletions(-) diff --git a/src/murfey/server/api/session_control.py b/src/murfey/server/api/session_control.py index 9d977002..c384fb62 100644 --- a/src/murfey/server/api/session_control.py +++ b/src/murfey/server/api/session_control.py @@ -24,7 +24,6 @@ get_foil_holes_from_grid_square as _get_foil_holes_from_grid_square, get_grid_squares as _get_grid_squares, get_grid_squares_from_dcg as _get_grid_squares_from_dcg, - get_machine_config_for_instrument, get_tiff_file as _get_tiff_file, get_upstream_file as _get_upstream_file, remove_session_by_id, @@ -32,7 +31,7 @@ from murfey.server.ispyb import DB as ispyb_db, get_all_ongoing_visits from murfey.server.murfey_db import murfey_db from murfey.util import sanitise -from murfey.util.config import MachineConfig +from murfey.util.config import get_machine_config from murfey.util.db import ( AutoProcProgram, ClientEnvironment, @@ -80,8 +79,8 @@ async def get_current_timestamp(): @router.get("/instruments/{instrument_name}/machine") -def machine_info_by_instrument(instrument_name: str) -> Optional[MachineConfig]: - return get_machine_config_for_instrument(instrument_name) +def machine_info_by_instrument(instrument_name: str): + return get_machine_config(instrument_name)[instrument_name] @router.get("/new_client_id/") diff --git a/src/murfey/server/api/session_info.py b/src/murfey/server/api/session_info.py index 8da98536..d43dd1d8 100644 --- a/src/murfey/server/api/session_info.py +++ b/src/murfey/server/api/session_info.py @@ -24,7 +24,6 @@ get_foil_holes_from_grid_square as _get_foil_holes_from_grid_square, get_grid_squares as _get_grid_squares, get_grid_squares_from_dcg as _get_grid_squares_from_dcg, - get_machine_config_for_instrument, get_tiff_file as _get_tiff_file, get_upstream_file as _get_upstream_file, remove_session_by_id, @@ -32,7 +31,7 @@ from murfey.server.ispyb import DB as ispyb_db, get_all_ongoing_visits from murfey.server.murfey_db import murfey_db from murfey.util import sanitise -from murfey.util.config import MachineConfig +from murfey.util.config import get_machine_config from murfey.util.db import ( ClassificationFeedbackParameters, ClientEnvironment, @@ -78,8 +77,8 @@ def connections_check(): @router.get("/instruments/{instrument_name}/machine") def machine_info_by_instrument( instrument_name: MurfeyInstrumentName, -) -> Optional[MachineConfig]: - return get_machine_config_for_instrument(instrument_name) +): + return get_machine_config(instrument_name)[instrument_name] @router.get("/instruments/{instrument_name}/visits_raw", response_model=List[Visit]) diff --git a/src/murfey/server/api/session_shared.py b/src/murfey/server/api/session_shared.py index eea5e25d..644b9b35 100644 --- a/src/murfey/server/api/session_shared.py +++ b/src/murfey/server/api/session_shared.py @@ -1,7 +1,6 @@ import logging -from functools import lru_cache from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List from sqlmodel import select from sqlmodel.orm.session import Session as SQLModelSession @@ -9,12 +8,7 @@ import murfey.server.prometheus as prom from murfey.util import safe_run, sanitise, secure_path -from murfey.util.config import ( - MachineConfig, - get_machine_config, - machine_config_from_file, - settings, -) +from murfey.util.config import get_machine_config from murfey.util.db import ( DataCollection, DataCollectionGroup, @@ -28,15 +22,6 @@ logger = logging.getLogger("murfey.server.api.shared") -@lru_cache(maxsize=5) -def get_machine_config_for_instrument(instrument_name: str) -> Optional[MachineConfig]: - if settings.murfey_machine_configuration: - return machine_config_from_file( - Path(settings.murfey_machine_configuration), instrument_name - )[instrument_name] - return None - - def remove_session_by_id(session_id: int, db): session = db.exec(select(MurfeySession).where(MurfeySession.id == session_id)).one() sessions_for_visit = db.exec( From 404b9890ca41db309478d36eb01d90192a79645c Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 9 Dec 2025 11:24:29 +0000 Subject: [PATCH 06/14] Updated 'machine_config_from_file' function to parse and connstruct instrument machine configs hierarchically; it can load all the machine configs or just the specified one --- src/murfey/util/config.py | 89 ++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 21 deletions(-) diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index dbe79266..54840a26 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -147,16 +147,72 @@ def validate_software_versions(cls, v: dict[str, Any]) -> dict[str, str]: return v +@lru_cache(maxsize=1) def machine_config_from_file( - config_file_path: Path, instrument: str = "" + config_file_path: Path, + instrument_name: str, ) -> dict[str, MachineConfig]: + """ + Loads the machine config YAML file and constructs instrument-specific configs from + a hierarchical set of dictionary key-value pairs. It will populate the keys listed + in the general dictionary, then update the keys specified in the shared instrument + dictionary, before finally updating the keys for that specific instrument. + """ + + def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): + """ + Helper function to recursively update nested dictioanry values. + If the old and new values are both dicts, it will add the new keys and values + to the existing dictionary recursively without overwriting entries. + If the old and new values are both lists, it will extend the existing list. + For all other values, it will overwrite the existing value with the new one. + """ + for key, value in new.items(): + # If new values are dicts and dict values already exist, do recursive update + if key in base and isinstance(base[key], dict) and isinstance(value, dict): + base[key] = _update_nested_values(base[key], value) + # If new values are lists and a list already exists, extend the list + if key in base and isinstance(base[key], list) and isinstance(value, list): + base[key].extend(value) + # Otherwise, overwrite values as normal + else: + base[key] = value + return base + + # Load the dict from the file with open(config_file_path, "r") as config_stream: - config = yaml.safe_load(config_stream) - return { - i: MachineConfig(**config[i]) - for i in config.keys() - if not instrument or i == instrument - } + master_config: dict[str, Any] = yaml.safe_load(config_stream) + + # Construct requested machine configs from the YAML file + all_machine_configs: dict[str, MachineConfig] = {} + for i in sorted(master_config.keys()): + # Skip reserved top-level keys + if i in ("general", "clem", "fib", "tem"): + continue + # If instrument name is set, skip irrelevant configs + if instrument_name and i != instrument_name: + continue + print(f"Parsing key {i}") + # Construct instrument config hierarchically + config: dict[str, Any] = {} + + # Populate with general values + general_config: dict[str, Any] = master_config.get("general", {}) + config = _update_nested_values(config, general_config) + + # Populate with shared instrument values + instrument_config: dict[str, Any] = master_config.get(i, {}) + instrument_shared_config: dict[str, Any] = master_config.get( + str(instrument_config.get("instrument_type", "")).lower(), {} + ) + config = _update_nested_values(config, instrument_shared_config) + + # Insert instrument-specific values + config = _update_nested_values(config, instrument_config) + + all_machine_configs[i] = MachineConfig(**config) + + return all_machine_configs class Security(BaseModel): @@ -250,22 +306,13 @@ def get_security_config() -> Security: @lru_cache(maxsize=1) def get_machine_config(instrument_name: str = "") -> dict[str, MachineConfig]: - machine_config = { - "": MachineConfig( - acquisition_software=[], - calibrations={}, - data_directories=[], - rsync_basepath=Path("dls/tmp"), - murfey_db_credentials="", - default_model="/tmp/weights.h5", - ) - } + # Create an empty machine config as a placeholder + machine_configs = {instrument_name: MachineConfig()} if settings.murfey_machine_configuration: - microscope = instrument_name - machine_config = machine_config_from_file( - Path(settings.murfey_machine_configuration), microscope + machine_configs = machine_config_from_file( + Path(settings.murfey_machine_configuration), instrument_name ) - return machine_config + return machine_configs def get_extended_machine_config( From ffc67233fb944c6c5d184c869d7e6c57a8836aa6 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 9 Dec 2025 11:58:47 +0000 Subject: [PATCH 07/14] The 'from_file' in 'murfey.cli.inject_spa_processing' is not the same as the 'from_file' formerly in 'murfey.util.config'; revert the change --- src/murfey/cli/inject_spa_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/murfey/cli/inject_spa_processing.py b/src/murfey/cli/inject_spa_processing.py index ba8d4d32..69f350c6 100644 --- a/src/murfey/cli/inject_spa_processing.py +++ b/src/murfey/cli/inject_spa_processing.py @@ -87,7 +87,7 @@ def run(): help="Path to EER fractionation file if relevant", ) - zc = zocalo.configuration.machine_config_from_file() + zc = zocalo.configuration.from_file() zc.activate() zc.add_command_line_options(parser) workflows.transport.add_command_line_options(parser, transport_argument=True) From 09d324164d2122dfab3b26b5e0d1f3b5f7da4379 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 9 Dec 2025 12:04:27 +0000 Subject: [PATCH 08/14] Fixed typo and missed print statements --- src/murfey/util/config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index 54840a26..95601fa2 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -161,7 +161,7 @@ def machine_config_from_file( def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): """ - Helper function to recursively update nested dictioanry values. + Helper function to recursively update nested dictionary values. If the old and new values are both dicts, it will add the new keys and values to the existing dictionary recursively without overwriting entries. If the old and new values are both lists, it will extend the existing list. @@ -192,7 +192,6 @@ def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): # If instrument name is set, skip irrelevant configs if instrument_name and i != instrument_name: continue - print(f"Parsing key {i}") # Construct instrument config hierarchically config: dict[str, Any] = {} From d5120a79a1055b53eaf88c84eeb1ec3a5eb090a2 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 9 Dec 2025 14:22:46 +0000 Subject: [PATCH 09/14] Sanitise symlink path and verify that it's relative to 'rsync_basepath' --- src/murfey/server/api/file_io_frontend.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/murfey/server/api/file_io_frontend.py b/src/murfey/server/api/file_io_frontend.py index 7bd6d44a..b9837f55 100644 --- a/src/murfey/server/api/file_io_frontend.py +++ b/src/murfey/server/api/file_io_frontend.py @@ -14,6 +14,7 @@ process_gain as _process_gain, ) from murfey.server.murfey_db import murfey_db +from murfey.util import secure_path from murfey.util.config import get_machine_config from murfey.util.db import Session @@ -51,10 +52,22 @@ async def create_symlink( instrument_name ] rsync_basepath = (machine_config.rsync_basepath or Path("")).resolve() - symlink_full_path = rsync_basepath / symlink_params.symlink + symlink_full_path = secure_path( + rsync_basepath / symlink_params.symlink, keep_spaces=True + ) + # Verify that the symlink provided does not lead elsewhere + if not symlink_full_path.resolve().is_relative_to(rsync_basepath): + logger.warning( + "Symlink rejected because it will be created in a forbidden location" + ) + return "" + # Remove and replace symlink if it exists are 'override' is set if symlink_full_path.is_symlink() and symlink_params.override: symlink_full_path.unlink() + # If a file/folder already exists using the desired symlink name, return empty string if symlink_full_path.exists(): return "" - symlink_full_path.symlink_to(rsync_basepath / symlink_params.target) + symlink_full_path.symlink_to( + secure_path(rsync_basepath / symlink_params.target, keep_spaces=True) + ) return str(symlink_params.symlink) From 64636c5fec19b3537342bae2e31da420619ac693 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 9 Dec 2025 16:28:50 +0000 Subject: [PATCH 10/14] Minor formatting and typing fixes --- src/murfey/util/config.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index 95601fa2..343af72f 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -48,14 +48,16 @@ class MachineConfig(BaseModel): # type: ignore analyse_created_directories: list[str] = [] gain_reference_directory: Optional[Path] = None eer_fractionation_file_template: str = "" - substrings_blacklist: dict[str, list] = { + + # Data transfer setup ------------------------------------------------------------- + # General setup + data_transfer_enabled: bool = True + substrings_blacklist: dict[str, list[str]] = { "directories": [], "files": [], } - # Data transfer setup ------------------------------------------------------------- # Rsync setup - data_transfer_enabled: bool = True rsync_url: str = "" rsync_module: str = "" rsync_basepath: Optional[Path] = None @@ -192,6 +194,7 @@ def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): # If instrument name is set, skip irrelevant configs if instrument_name and i != instrument_name: continue + # Construct instrument config hierarchically config: dict[str, Any] = {} @@ -209,6 +212,7 @@ def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): # Insert instrument-specific values config = _update_nested_values(config, instrument_config) + # Add to master dictionary all_machine_configs[i] = MachineConfig(**config) return all_machine_configs From 716fa93665c21986a6a4614cbab52c42d4652b51 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Tue, 9 Dec 2025 16:49:06 +0000 Subject: [PATCH 11/14] Added unit test for'get_machine_config' which tests 'machine_config_from_file' by proxy as well --- tests/util/test_config.py | 348 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 tests/util/test_config.py diff --git a/tests/util/test_config.py b/tests/util/test_config.py new file mode 100644 index 00000000..847029b7 --- /dev/null +++ b/tests/util/test_config.py @@ -0,0 +1,348 @@ +from pathlib import Path +from typing import Any + +import pytest +import yaml +from pytest_mock import MockerFixture + +from murfey.util.config import Settings, get_machine_config + + +@pytest.fixture +def mock_general_config(): + # Most extra keys go in this category + return { + "pkg_2": { + "url": "https://some-url.some.org", + "token": "pneumonoultrasmicroscopicsilicovolcanoconiosis", + } + } + + +@pytest.fixture +def mock_tem_shared_config(): + return { + # Hardware and software + "acquisition_software": ["epu", "tomo", "serialem"], + "software_versions": {"tomo": "5.12"}, + "data_required_substrings": { + "epu": { + ".mrc": ["fractions", "Fractions"], + ".tiff": ["fractions", "Fractions"], + ".eer": ["EER"], + }, + "tomo": { + ".mrc": ["fractions", "Fractions"], + ".tiff": ["fractions", "Fractions"], + ".eer": ["EER"], + }, + }, + # Client directory setup + "analyse_created_directories": ["atlas"], + "gain_reference_directory": "C:/ProgramData/Gatan/Reference Images/", + # Data transfer keys + "data_transfer_enabled": True, + "substrings_blacklist": { + "directories": ["some_str"], + "files": ["some_str"], + }, + "rsync_module": "rsync", + "allow_removal": True, + "upstream_data_directories": { + "upstream_instrument": "/path/to/upstream_instrument", + }, + "upstream_data_download_directory": "/path/to/download/directory", + "upstream_data_search_strings": { + "upstream_instrument": ["some_string"], + }, + # Data processing keys + "processing_enabled": True, + "gain_directory_name": "some_directory", + "processed_directory_name": "some_directory", + "processed_extra_directory": "some_directory", + "recipes": { + "recipe_1": "recipe_1", + "recipe_2": "recipe_2", + }, + "default_model": "some_file", + "external_executables": { + "app_1": "/path/to/app_1", + "app_2": "/path/to/app_2", + "app_3": "/path/to/app_3", + }, + "external_executables_eer": { + "app_1": "/path/to/app_1", + "app_2": "/path/to/app_2", + "app_3": "/path/to/app_3", + }, + "external_environment": { + "ENV_1": "/path/to/env_1", + "ENV_2": "/path/to/env_2", + }, + "plugin_packages": { + "pkg_1": "/path/to/pkg_1", + "pkg_2": "/path/to/pkg_2", + }, + # Extra keys + "pkg_1": { + "file_path": "", + "command": [ + "/path/to/executable", + "--some_arg", + "-a", + "./path/to/file", + ], + "step_size": 100, + }, + } + + +@pytest.fixture +def mock_instrument_config(): + return { + # Extra key to point to hierarchical dictionary to use + "instrument_type": "tem", + # General information + "display_name": "Some TEM", + "image_path": "/path/to/tem.jpg", + # Hardware and software + "camera": "Some camera", + "superres": True, + "calibrations": { + "magnification": { + 100: 0.1, + 200: 0.05, + 400: 0.025, + }, + }, + # Client directory setup + "data_directories": ["C:"], + # Data transfer keys + "rsync_basepath": "/path/to/data", + "rsync_url": "http://123.45.678.90:8000", + # Server and network keys + "security_configuration_path": "/path/to/security-config.yaml", + "murfey_url": "https://www.murfey.com", + "instrument_server_url": "http://10.123.4.5:8000", + "node_creator_queue": "node_creator", + # Extra keys + "pkg_1": { + "file_path": "/path/to/pkg_1/file.txt", + }, + } + + +@pytest.fixture +def mock_hierarchical_machine_config_yaml( + mock_general_config: dict[str, Any], + mock_tem_shared_config: dict[str, Any], + mock_instrument_config: dict[str, Any], + tmp_path: Path, +): + # Create machine config (with all currently supported keys) for the instrument + hierarchical_config = { + "general": mock_general_config, + "tem": mock_tem_shared_config, + "m01": mock_instrument_config, + "m02": mock_instrument_config, + } + config_file = tmp_path / "config" / "murfey-machine-config-hierarchical.yaml" + config_file.parent.mkdir(parents=True, exist_ok=True) + with open(config_file, "w") as file: + yaml.safe_dump(hierarchical_config, file, indent=2) + return config_file + + +@pytest.fixture +def mock_standard_machine_config_yaml( + mock_general_config: dict[str, Any], + mock_tem_shared_config: dict[str, Any], + mock_instrument_config: dict[str, Any], + tmp_path: Path, +): + # Compile the different dictionaries into one dictionary for the instrument + machine_config = { + key: value + for config in ( + mock_general_config, + mock_tem_shared_config, + mock_instrument_config, + ) + for key, value in config.items() + } + + # Correct for nested dicts that would have been partially overwritten + machine_config["pkg_1"] = ( + { + "file_path": "/path/to/pkg_1/file.txt", + "command": [ + "/path/to/executable", + "--some_arg", + "-a", + "./path/to/file", + ], + "step_size": 100, + }, + ) + + master_config = { + "m01": machine_config, + "m02": machine_config, + } + config_file = tmp_path / "config" / "murfey-machine-config-standard.yaml" + config_file.parent.mkdir(parents=True, exist_ok=True) + with open(config_file, "w") as file: + yaml.safe_dump(master_config, file, indent=2) + return config_file + + +get_machine_config_test_matrix: tuple[tuple[str, list[str]], ...] = ( + # Config to test | Instrument names to pass to function + ("hierarchical", ["", "m01", "m02"]), + ("standard", ["", "m01", "m02"]), +) + + +@pytest.mark.parametrize("test_params", get_machine_config_test_matrix) +def test_get_machine_config( + mocker: MockerFixture, + mock_general_config: dict[str, Any], + mock_tem_shared_config: dict[str, Any], + mock_instrument_config: dict[str, Any], + mock_hierarchical_machine_config_yaml: Path, + mock_standard_machine_config_yaml: Path, + test_params: tuple[str, list[str]], +): + # Unpack test params + config_to_test, instrument_names = test_params + + # Set up mocks + mock_settings = mocker.patch("murfey.util.config.settings", spec=Settings) + + # Run 'get_machine_config' using different instrument name parameters + for i in instrument_names: + # Patch the 'settings' environment variable with the YAML file to test + mock_settings.murfey_machine_configuration = ( + str(mock_hierarchical_machine_config_yaml) + if config_to_test == "hierarchical" + else str(mock_standard_machine_config_yaml) + ) + # Run the function + config = get_machine_config(i) + + # Validate that the config was loaded correctly + assert config + + # Multiple configs should be returned if instrument name was "" + assert len(config) == 2 if i == "" else len(config) == 1 + + # When getting the config for individual microscopes, validate key-by-key + if i != "": + # General info + assert config[i].display_name == mock_instrument_config["display_name"] + assert config[i].image_path == Path(mock_instrument_config["image_path"]) + # Hardware & software + assert config[i].camera == mock_instrument_config["camera"] + assert config[i].superres == mock_instrument_config["superres"] + assert config[i].calibrations == mock_instrument_config["calibrations"] + assert ( + config[i].acquisition_software + == mock_tem_shared_config["acquisition_software"] + ) + assert ( + config[i].software_versions + == mock_tem_shared_config["software_versions"] + ) + assert ( + config[i].data_required_substrings + == mock_tem_shared_config["data_required_substrings"] + ) + # Client directory setup + assert config[i].data_directories == [ + Path(p) for p in mock_instrument_config["data_directories"] + ] + assert ( + config[i].analyse_created_directories + == mock_tem_shared_config["analyse_created_directories"] + ) + assert config[i].gain_reference_directory == Path( + mock_tem_shared_config["gain_reference_directory"] + ) + # Data transfer setup + assert ( + config[i].data_transfer_enabled + == mock_tem_shared_config["data_transfer_enabled"] + ) + assert ( + config[i].substrings_blacklist + == mock_tem_shared_config["substrings_blacklist"] + ) + assert config[i].rsync_url == mock_instrument_config["rsync_url"] + assert config[i].rsync_basepath == Path( + mock_instrument_config["rsync_basepath"] + ) + assert config[i].rsync_module == mock_tem_shared_config["rsync_module"] + assert config[i].allow_removal == mock_tem_shared_config["allow_removal"] + assert config[i].upstream_data_directories == { + key: Path(value) + for key, value in mock_tem_shared_config[ + "upstream_data_directories" + ].items() + } + assert config[i].upstream_data_download_directory == Path( + mock_tem_shared_config["upstream_data_download_directory"] + ) + assert ( + config[i].upstream_data_search_strings + == mock_tem_shared_config["upstream_data_search_strings"] + ) + # Data processing setup + assert ( + config[i].processing_enabled + == mock_tem_shared_config["processing_enabled"] + ) + assert ( + config[i].gain_directory_name + == mock_tem_shared_config["gain_directory_name"] + ) + assert ( + config[i].processed_directory_name + == mock_tem_shared_config["processed_directory_name"] + ) + assert ( + config[i].processed_extra_directory + == mock_tem_shared_config["processed_extra_directory"] + ) + assert config[i].recipes == mock_tem_shared_config["recipes"] + assert config[i].default_model == Path( + mock_tem_shared_config["default_model"] + ) + assert ( + config[i].external_executables + == mock_tem_shared_config["external_executables"] + ) + assert ( + config[i].external_executables_eer + == mock_tem_shared_config["external_executables_eer"] + ) + assert ( + config[i].external_environment + == mock_tem_shared_config["external_environment"] + ) + assert config[i].plugin_packages == { + key: Path(value) + for key, value in mock_tem_shared_config["plugin_packages"].items() + } + # Server and network setup + assert config[i].security_configuration_path == Path( + mock_instrument_config["security_configuration_path"] + ) + assert config[i].murfey_url == mock_instrument_config["murfey_url"] + assert ( + config[i].instrument_server_url + == mock_instrument_config["instrument_server_url"] + ) + assert ( + config[i].node_creator_queue + == mock_instrument_config["node_creator_queue"] + ) From e187d9b4a45c2217f48c535379aada99fab0b051 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 10 Dec 2025 10:03:16 +0000 Subject: [PATCH 12/14] Fixed bug in logic when stitching nested dictionaries together --- src/murfey/util/config.py | 4 +++- tests/util/test_config.py | 36 +++++++++++++++++++++++------------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index 343af72f..d4b0bab6 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -174,7 +174,9 @@ def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): if key in base and isinstance(base[key], dict) and isinstance(value, dict): base[key] = _update_nested_values(base[key], value) # If new values are lists and a list already exists, extend the list - if key in base and isinstance(base[key], list) and isinstance(value, list): + elif ( + key in base and isinstance(base[key], list) and isinstance(value, list) + ): base[key].extend(value) # Otherwise, overwrite values as normal else: diff --git a/tests/util/test_config.py b/tests/util/test_config.py index 847029b7..6ba97ee4 100644 --- a/tests/util/test_config.py +++ b/tests/util/test_config.py @@ -171,19 +171,17 @@ def mock_standard_machine_config_yaml( for key, value in config.items() } - # Correct for nested dicts that would have been partially overwritten - machine_config["pkg_1"] = ( - { - "file_path": "/path/to/pkg_1/file.txt", - "command": [ - "/path/to/executable", - "--some_arg", - "-a", - "./path/to/file", - ], - "step_size": 100, - }, - ) + # Correct nested dicts that would have been partially overwritten + machine_config["pkg_1"] = { + "file_path": "/path/to/pkg_1/file.txt", + "command": [ + "/path/to/executable", + "--some_arg", + "-a", + "./path/to/file", + ], + "step_size": 100, + } master_config = { "m01": machine_config, @@ -346,3 +344,15 @@ def test_get_machine_config( config[i].node_creator_queue == mock_instrument_config["node_creator_queue"] ) + # Extra keys + assert config[i].pkg_1 == { + "file_path": "/path/to/pkg_1/file.txt", + "command": [ + "/path/to/executable", + "--some_arg", + "-a", + "./path/to/file", + ], + "step_size": 100, + } + assert config[i].pkg_2 == mock_general_config["pkg_2"] From 28482ce7b5f187428d1ac471179c00f83aa52c95 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 10 Dec 2025 10:10:23 +0000 Subject: [PATCH 13/14] Renamed '_update_nested_values' to '_recursive_update', and updated comments --- src/murfey/util/config.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index d4b0bab6..39fca78a 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -161,24 +161,26 @@ def machine_config_from_file( dictionary, before finally updating the keys for that specific instrument. """ - def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): + def _recursive_update(base: dict[str, Any], new: dict[str, Any]): """ - Helper function to recursively update nested dictionary values. + Helper function to recursively update nested dictionaries. + If the old and new values are both dicts, it will add the new keys and values to the existing dictionary recursively without overwriting entries. + If the old and new values are both lists, it will extend the existing list. For all other values, it will overwrite the existing value with the new one. """ for key, value in new.items(): # If new values are dicts and dict values already exist, do recursive update if key in base and isinstance(base[key], dict) and isinstance(value, dict): - base[key] = _update_nested_values(base[key], value) + base[key] = _recursive_update(base[key], value) # If new values are lists and a list already exists, extend the list elif ( key in base and isinstance(base[key], list) and isinstance(value, list) ): base[key].extend(value) - # Otherwise, overwrite values as normal + # Otherwise, overwrite/add values as normal else: base[key] = value return base @@ -202,17 +204,17 @@ def _update_nested_values(base: dict[str, Any], new: dict[str, Any]): # Populate with general values general_config: dict[str, Any] = master_config.get("general", {}) - config = _update_nested_values(config, general_config) + config = _recursive_update(config, general_config) # Populate with shared instrument values instrument_config: dict[str, Any] = master_config.get(i, {}) instrument_shared_config: dict[str, Any] = master_config.get( str(instrument_config.get("instrument_type", "")).lower(), {} ) - config = _update_nested_values(config, instrument_shared_config) + config = _recursive_update(config, instrument_shared_config) # Insert instrument-specific values - config = _update_nested_values(config, instrument_config) + config = _recursive_update(config, instrument_config) # Add to master dictionary all_machine_configs[i] = MachineConfig(**config) From 86a4ad40ce621f927c62b9b8aa125f4b3cbb7c45 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Wed, 10 Dec 2025 10:16:03 +0000 Subject: [PATCH 14/14] Added 'instrument_type' as a MachineConfig key, for use in hierarchical config files --- src/murfey/util/config.py | 1 + tests/util/test_config.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/murfey/util/config.py b/src/murfey/util/config.py index 39fca78a..f989d5b3 100644 --- a/src/murfey/util/config.py +++ b/src/murfey/util/config.py @@ -30,6 +30,7 @@ class MachineConfig(BaseModel): # type: ignore # General info -------------------------------------------------------------------- display_name: str = "" instrument_name: str = "" + instrument_type: str = "" # For use with hierarchical config files image_path: Optional[Path] = None machine_override: str = "" diff --git a/tests/util/test_config.py b/tests/util/test_config.py index 6ba97ee4..82068a8c 100644 --- a/tests/util/test_config.py +++ b/tests/util/test_config.py @@ -183,6 +183,9 @@ def mock_standard_machine_config_yaml( "step_size": 100, } + # Remove 'instrument_type' value (not needed in standard config) + machine_config["instrument_type"] = "" + master_config = { "m01": machine_config, "m02": machine_config, @@ -239,6 +242,11 @@ def test_get_machine_config( # General info assert config[i].display_name == mock_instrument_config["display_name"] assert config[i].image_path == Path(mock_instrument_config["image_path"]) + assert ( + config[i].instrument_type == mock_instrument_config["instrument_type"] + if config_to_test == "hierarchical" + else not config[i].instrument_type + ) # Hardware & software assert config[i].camera == mock_instrument_config["camera"] assert config[i].superres == mock_instrument_config["superres"]