diff --git a/.gitignore b/.gitignore
index cdffe91..584d441 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,7 +24,6 @@ __pycache__/
 
 # Distribution / packaging
 .Python
-env/
 build/
 develop-eggs/
 dist/
@@ -37,6 +36,7 @@ parts/
 sdist/
 var/
 wheels/
+share/python-wheels/
 *.egg-info/
 .installed.cfg
 *.egg
@@ -55,13 +55,17 @@ pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
+.nox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
-*,cover
+*.cover
+*.py,cover
 .hypothesis/
+.pytest_cache/
+cover/
 
 # Translations
 *.mo
@@ -70,6 +74,8 @@ coverage.xml
 # Django stuff:
 *.log
 local_settings.py
+db.sqlite3
+db.sqlite3-journal
 
 # Flask stuff:
 instance/
@@ -82,32 +88,101 @@ instance/
 docs/_build/
 
 # PyBuilder
+.pybuilder/
 target/
 
 # Jupyter Notebook
 .ipynb_checkpoints
 
+# IPython
+profile_default/
+ipython_config.py
+
 # pyenv
 .python-version
 
-# celery beat schedule file
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
 celerybeat-schedule
+celerybeat.pid
 
-# dotenv
-.env
+# SageMath parsed files
+*.sage.py
 
-# virtualenv
-.venv/
+# Environments
+.env
+.venv
+env/
 venv/
 ENV/
+env.bak/
+venv.bak/
 
 # Spyder project settings
 .spyderproject
+.spyproject
 
 # Rope project settings
 .ropeproject
 
-# vscode
-.vscode/
-.vs/
-© 2019 GitHub, Inc.
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
diff --git a/deeplc/__init__.py b/deeplc/__init__.py
index 953aa36..e4fb659 100644
--- a/deeplc/__init__.py
+++ b/deeplc/__init__.py
@@ -1,14 +1,8 @@
-import sys
+__all__ = ["DeepLC"]
 
+from importlib.metadata import version
 
-if sys.version_info >= (3,8):
-    from importlib.metadata import version
-    __version__ = version('deeplc')
-else:
-    import pkg_resources
-    __version__ = pkg_resources.require("deeplc")[0].version
+__version__ = version("deeplc")
 
 
 from deeplc.deeplc import DeepLC
-from deeplc.feat_extractor import FeatExtractor
-
diff --git a/deeplc/__main__.py b/deeplc/__main__.py
index eb6079f..e76278b 100644
--- a/deeplc/__main__.py
+++ b/deeplc/__main__.py
@@ -1,7 +1,12 @@
 """Main command line interface to DeepLC."""
 
 __author__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
-__credits__ = ["Robbin Bouwmeester", "Ralf Gabriels", "Prof. Lennart Martens", "Sven Degroeve"]
+__credits__ = [
+    "Robbin Bouwmeester",
+    "Ralf Gabriels",
+    "Prof. Lennart Martens",
+    "Sven Degroeve",
+]
 __license__ = "Apache License, Version 2.0"
 __maintainer__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
 __email__ = ["Robbin.Bouwmeester@ugent.be", "Ralf.Gabriels@ugent.be"]
@@ -12,12 +17,12 @@
 import warnings
 
 import pandas as pd
+from psm_utils.io import read_file
 from psm_utils.io.peptide_record import peprec_to_proforma
 from psm_utils.psm import PSM
 from psm_utils.psm_list import PSMList
-from psm_utils.io import read_file
 
-from deeplc import __version__, DeepLC, FeatExtractor
+from deeplc import DeepLC, __version__
 from deeplc._argument_parser import parse_arguments
 from deeplc._exceptions import DeepLCError
 
@@ -26,27 +31,28 @@
 
 def setup_logging(passed_level):
     log_mapping = {
-        'critical': logging.CRITICAL,
-        'error': logging.ERROR,
-        'warning': logging.WARNING,
-        'info': logging.INFO,
-        'debug': logging.DEBUG,
+        "critical": logging.CRITICAL,
+        "error": logging.ERROR,
+        "warning": logging.WARNING,
+        "info": logging.INFO,
+        "debug": logging.DEBUG,
     }
 
     if passed_level.lower() not in log_mapping:
         print(
             "Invalid log level. Should be one of the following: ",
-            ', '.join(log_mapping.keys())
+            ", ".join(log_mapping.keys()),
         )
         exit(1)
 
     logging.basicConfig(
         stream=sys.stdout,
-        format='%(asctime)s - %(levelname)s - %(message)s',
-        datefmt='%Y-%m-%d %H:%M:%S',
-        level=log_mapping[passed_level.lower()]
+        format="%(asctime)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+        level=log_mapping[passed_level.lower()],
     )
 
+
 def main(gui=False):
     """Main function for the CLI."""
     argu = parse_arguments(gui=gui)
@@ -55,13 +61,13 @@ def main(gui=False):
 
     # Reset logging levels if DEBUG (see deeplc.py)
     if argu.log_level.lower() == "debug":
-        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
-        logging.getLogger('tensorflow').setLevel(logging.DEBUG)
-        warnings.filterwarnings('default', category=DeprecationWarning)
-        warnings.filterwarnings('default', category=FutureWarning)
-        warnings.filterwarnings('default', category=UserWarning)
+        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0"
+        logging.getLogger("tensorflow").setLevel(logging.DEBUG)
+        warnings.filterwarnings("default", category=DeprecationWarning)
+        warnings.filterwarnings("default", category=FutureWarning)
+        warnings.filterwarnings("default", category=UserWarning)
     else:
-        os.environ['KMP_WARNINGS'] = '0'
+        os.environ["KMP_WARNINGS"] = "0"
 
     try:
         run(**vars(argu))
@@ -101,13 +107,13 @@ def run(
         for fm in file_model:
             if len(sel_group) == 0:
                 sel_group = "_".join(fm.split("_")[:-1])
-                fm_dict[sel_group]= fm
+                fm_dict[sel_group] = fm
                 continue
             m_group = "_".join(fm.split("_")[:-1])
             if m_group == sel_group:
                 fm_dict[m_group] = fm
         file_model = fm_dict
-    
+
     with open(file_pred) as f:
         first_line_pred = f.readline().strip()
     if file_cal:
@@ -118,53 +124,68 @@ def run(
         # Read input files
         df_pred = pd.read_csv(file_pred)
         if len(df_pred.columns) < 2:
-            df_pred = pd.read_csv(file_pred,sep=" ")
+            df_pred = pd.read_csv(file_pred, sep=" ")
         df_pred = df_pred.fillna("")
         file_pred = ""
 
         list_of_psms = []
-        for seq,mod,ident in zip(df_pred["seq"],df_pred["modifications"],df_pred.index):
-            list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident))
+        for seq, mod, ident in zip(df_pred["seq"], df_pred["modifications"], df_pred.index):
+            list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq, mod), spectrum_id=ident))
         psm_list_pred = PSMList(psm_list=list_of_psms)
         df_pred = None
     else:
         psm_list_pred = read_file(file_pred)
         if "msms" in file_pred and ".txt" in file_pred:
-            mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
+            mapper = pd.read_csv(
+                os.path.join(
+                    os.path.dirname(os.path.realpath(__file__)),
+                    "unimod/map_mq_file.csv",
+                ),
+                index_col=0,
+            )["value"].to_dict()
             psm_list_pred.rename_modifications(mapper)
 
     # Allow for calibration file to be empty (undefined), fill in if/elif if present
     psm_list_cal = []
-    if "modifications" in first_line_cal.split(",") and "seq" in first_line_cal.split(",") and file_cal:
+    if (
+        "modifications" in first_line_cal.split(",")
+        and "seq" in first_line_cal.split(",")
+        and file_cal
+    ):
         df_cal = pd.read_csv(file_cal)
         if len(df_cal.columns) < 2:
-            df_cal = pd.read_csv(df_cal,sep=" ")
+            df_cal = pd.read_csv(df_cal, sep=" ")
         df_cal = df_cal.fillna("")
         file_cal = ""
 
         list_of_psms = []
-        for seq,mod,ident,tr in zip(df_cal["seq"],df_cal["modifications"],df_cal.index,df_cal["tr"]):
-            list_of_psms.append(PSM(peptidoform=peprec_to_proforma(seq,mod),spectrum_id=ident,retention_time=tr))
+        for seq, mod, ident, tr in zip(
+            df_cal["seq"], df_cal["modifications"], df_cal.index, df_cal["tr"]
+        ):
+            list_of_psms.append(
+                PSM(
+                    peptidoform=peprec_to_proforma(seq, mod),
+                    spectrum_id=ident,
+                    retention_time=tr,
+                )
+            )
         psm_list_cal = PSMList(psm_list=list_of_psms)
         df_cal = None
     elif file_cal:
         psm_list_cal = read_file(file_cal)
         if "msms" in file_cal and ".txt" in file_cal:
-            mapper = pd.read_csv(os.path.join(os.path.dirname(os.path.realpath(__file__)), "unimod/map_mq_file.csv"),index_col=0)["value"].to_dict()
+            mapper = pd.read_csv(
+                os.path.join(
+                    os.path.dirname(os.path.realpath(__file__)),
+                    "unimod/map_mq_file.csv",
+                ),
+                index_col=0,
+            )["value"].to_dict()
             psm_list_cal.rename_modifications(mapper)
-    # Make a feature extraction object; you can skip this if you do not want to
-    # use the default settings for DeepLC. Here we want to use a model that does
-    # not use RDKit features so we skip the chemical descriptor making
-    # procedure.
-    f_extractor = FeatExtractor(
-        cnn_feats=True,
-        verbose=verbose
-    )
-    
+
     # Make the DeepLC object that will handle making predictions and calibration
     dlc = DeepLC(
         path_model=file_model,
-        f_extractor=f_extractor,
         cnn_model=True,
         split_cal=split_cal,
         dict_cal_divider=dict_divider,
@@ -173,9 +194,9 @@ def run(
         batch_num=batch_num,
         n_jobs=n_threads,
         verbose=verbose,
-        deeplc_retrain=transfer_learning
+        deeplc_retrain=transfer_learning,
     )
-    
+
     # Calibrate the original model based on the new retention times
     if len(psm_list_cal) > 0:
         logger.info("Selecting best model and calibrating predictions...")
@@ -185,16 +206,21 @@ def run(
     # Make predictions; calibrated or uncalibrated
     logger.info("Making predictions using model: %s", dlc.model)
     if len(psm_list_cal) > 0:
-        preds = dlc.make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred)
+        preds = dlc._make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred)
     else:
-        preds = dlc.make_preds(seq_df=df_pred, infile=file_pred, psm_list=psm_list_pred, calibrate=False)
-    
-    #df_pred["predicted_tr"] = preds
+        preds = dlc._make_preds(
+            seq_df=df_pred,
+            infile=file_pred,
+            psm_list=psm_list_pred,
+            calibrate=False,
+        )
+
+    # df_pred["predicted_tr"] = preds
     logger.info("Writing predictions to file: %s", file_pred_out)
-    
-    file_pred_out = open(file_pred_out,"w")
+
+    file_pred_out = open(file_pred_out, "w")
     file_pred_out.write("Sequence proforma,predicted retention time\n")
-    for psm,tr in zip(psm_list_pred,preds):
+    for psm, tr in zip(psm_list_pred, preds):
         file_pred_out.write(f"{psm.peptidoform.proforma},{tr}\n")
     file_pred_out.close()
 
diff --git a/deeplc/_argument_parser.py b/deeplc/_argument_parser.py
index 09875cf..e12f526 100644
--- a/deeplc/_argument_parser.py
+++ b/deeplc/_argument_parser.py
@@ -74,17 +74,13 @@ def parse_arguments(gui=False):
 
     parser = ArgumentParser(
         prog="DeepLC",
-        description=(
-            "Retention time prediction for (modified) peptides using deep " "learning."
-        ),
+        description=("Retention time prediction for (modified) peptides using deep learning."),
         usage="deeplc [OPTIONS] --file_pred <peptide_file>",
         formatter_class=lambda prog: HelpFormatter(prog, max_help_position=42),
         add_help=False,
     )
 
-    io_args = parser.add_argument_group(
-        "Input and output files", **gooey_args["io_args"]
-    )
+    io_args = parser.add_argument_group("Input and output files", **gooey_args["io_args"])
     io_args.add_argument(
         "--file_pred",
         required=True,
@@ -97,9 +93,7 @@ def parse_arguments(gui=False):
         type=str,
         default=None,
         metavar="Input peptides for calibration" if gui else "",
-        help=(
-            "path to peptide CSV file with retention times to use for " "calibration"
-        ),
+        help=("path to peptide CSV file with retention times to use for calibration"),
         **gooey_args["file_cal"],
     )
     io_args.add_argument(
@@ -166,10 +160,7 @@ def parse_arguments(gui=False):
         dest="split_cal",
         default=50,
         metavar="split cal" if gui else "",
-        help=(
-            "number of splits in the chromatogram for piecewise linear "
-            "calibration fit"
-        ),
+        help=("number of splits in the chromatogram for piecewise linear calibration fit"),
         **gooey_args["split_cal"],
     )
     model_cal_args.add_argument(
@@ -265,8 +256,6 @@ def parse_arguments(gui=False):
     results = parser.parse_args()
 
     if not results.file_pred_out:
-        results.file_pred_out = (
-            os.path.splitext(results.file_pred)[0] + "_deeplc_predictions.csv"
-        )
+        results.file_pred_out = os.path.splitext(results.file_pred)[0] + "_deeplc_predictions.csv"
 
     return results
diff --git a/deeplc/_exceptions.py b/deeplc/_exceptions.py
index 1be41ae..c3cde3a 100644
--- a/deeplc/_exceptions.py
+++ b/deeplc/_exceptions.py
@@ -1,5 +1,6 @@
 """DeepLC exceptions."""
 
+
 class DeepLCError(Exception):
     pass
 
diff --git a/deeplc/deeplc.py b/deeplc/deeplc.py
index 589c040..22f1b92 100644
--- a/deeplc/deeplc.py
+++ b/deeplc/deeplc.py
@@ -4,6 +4,8 @@
 This provides the main interface. For the library versions see the .yml file
 """
 
+from __future__ import annotations
+
 __author__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
 __license__ = "Apache License, Version 2.0"
 __maintainer__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
@@ -12,192 +14,351 @@
     "Robbin Bouwmeester",
     "Ralf Gabriels",
     "Arthur Declercq",
+    "Alireza Nameni",
     "Lennart Martens",
     "Sven Degroeve",
 ]
 
-
 # Default models, will be used if no other is specified. If no best model is
 # selected during calibration, the first model in the list will be used.
-import os
-
-deeplc_dir = os.path.dirname(os.path.realpath(__file__))
-DEFAULT_MODELS = [
-    "mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.keras",
-    "mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.keras",
-    "mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.keras",
-]
-DEFAULT_MODELS = [os.path.join(deeplc_dir, dm) for dm in DEFAULT_MODELS]
-
-LIBRARY = {}
-
 import copy
 import gc
 import logging
-import math
 import multiprocessing
 import multiprocessing.dummy
+import os
 import random
 import sys
 import warnings
 from configparser import ConfigParser
-from itertools import chain
+from pathlib import Path
 from tempfile import TemporaryDirectory
 
-from sklearn.preprocessing import SplineTransformer
+import numpy as np
+import pandas as pd
+import torch
+from dask import compute, delayed
+from psm_utils import PSM, Peptidoform, PSMList
+from psm_utils.io import read_file
+from psm_utils.io.peptide_record import peprec_to_proforma
+from sklearn.base import BaseEstimator
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import SplineTransformer
+from torch.utils.data import DataLoader, Dataset
 
+from deeplc._exceptions import CalibrationError
+from deeplc.feat_extractor import aggregate_encodings, encode_peptidoform
+from deeplc.trainl3 import train_elastic_net
 
-# If CLI/GUI/frozen: disable Tensorflow info and warnings before importing
+# If CLI/GUI/frozen: disable warnings before importing
 IS_CLI_GUI = os.path.basename(sys.argv[0]) in ["deeplc", "deeplc-gui"]
 IS_FROZEN = getattr(sys, "frozen", False)
 if IS_CLI_GUI or IS_FROZEN:
-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-    logging.getLogger("tensorflow").setLevel(logging.CRITICAL)
-    warnings.filterwarnings("ignore", category=DeprecationWarning)
-    warnings.filterwarnings("ignore", category=FutureWarning)
     warnings.filterwarnings("ignore", category=UserWarning)
 
-# Supress warnings (or at least try...)
-logging.getLogger("tensorflow").setLevel(logging.ERROR)
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-
-import numpy as np
-import pandas as pd
-import tensorflow as tf
-from deeplcretrainer import deeplcretrainer
-from psm_utils.io import read_file
-from psm_utils.io.peptide_record import peprec_to_proforma
-from psm_utils.psm import PSM
-from psm_utils.psm_list import PSMList
+DEEPLC_DIR = os.path.dirname(os.path.realpath(__file__))
+DEFAULT_MODELS = [
+    "mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.pt",
+    "mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.pt",
+    "mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.pt",
+]
+DEFAULT_MODELS = [os.path.join(DEEPLC_DIR, dm) for dm in DEFAULT_MODELS]
 
-try:
-    from tensorflow.keras.models import load_model
-except:
-    from tensorflow.python.keras.models import load_model
-from tensorflow.python.eager import context
+LIBRARY = {}
 
-from deeplc._exceptions import CalibrationError
-from deeplc.trainl3 import train_en
 
+# TODO: Keep for torch?
 # Set to force CPU calculations
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
-# Feature extraction
-from deeplc.feat_extractor import FeatExtractor
 
+logger = logging.getLogger(__name__)
 
-def warn(*args, **kwargs):
-    pass
 
+def split_list(a, n):
+    k, m = divmod(len(a), n)
+    return (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n))
 
-import warnings
 
-warnings.warn = warn
+def divide_chunks(list_, n_chunks):
+    """Yield successive n-sized chunks from list_."""
+    for i in range(0, len(list_), n_chunks):
+        yield list_[i : i + n_chunks]
 
-warnings.filterwarnings("ignore", category=DeprecationWarning)
-warnings.filterwarnings("ignore", category=FutureWarning)
 
-logger = logging.getLogger(__name__)
+class DeepLCDataset(Dataset):
+    """
+    Custom Dataset class for DeepLC used for loading features from peptide sequences.
 
+    Parameters
+    ----------
+    X : ndarray
+        Feature matrix for input data.
+    X_sum : ndarray
+        Feature matrix for sum of input data.
+    X_global : ndarray
+        Feature matrix for global input data.
+    X_hc : ndarray
+        Feature matrix for high-order context features.
+    target : ndarray, optional
+        The target retention times. Default is None.
+    """
 
-def split_list(a, n):
-    k, m = divmod(len(a), n)
-    return (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n))
+    def __init__(self, X, X_sum, X_global, X_hc, target=None):
+        self.X = torch.from_numpy(X).float()
+        self.X_sum = torch.from_numpy(X_sum).float()
+        self.X_global = torch.from_numpy(X_global).float()
+        self.X_hc = torch.from_numpy(X_hc).float()
+
+        if target is not None:
+            self.target = torch.from_numpy(
+                target
+            ).float()  # Add target values if provided
+        else:
+            self.target = None  # If no target is provided, set it to None
+
+    def __len__(self):
+        return self.X.shape[0]
+
+    def __getitem__(self, idx):
+        if self.target is not None:
+            # Return both features and target during training
+            return (
+                self.X[idx],
+                self.X_sum[idx],
+                self.X_global[idx],
+                self.X_hc[idx],
+                self.target[idx],
+            )
+        else:
+            # Return only features during prediction
+            return (self.X[idx], self.X_sum[idx], self.X_global[idx], self.X_hc[idx])
 
 
-def divide_chunks(l, n):
-    for i in range(0, len(l), n):
-        yield l[i : i + n]
+class DeepLCFineTuner:
+    """
+    Class for fine-tuning a DeepLC model.
+
+    Parameters
+    ----------
+    model : torch.nn.Module
+        The model to fine-tune.
+    train_data : torch.utils.data.Dataset
+        Dataset containing the training data.
+    device : str, optional, default='cpu'
+        The device on which to run the model ('cpu' or 'cuda').
+    learning_rate : float, optional, default=0.001
+        The learning rate for the optimizer.
+    epochs : int, optional, default=10
+        Number of training epochs.
+    batch_size : int, optional, default=256
+        Batch size for training.
+    validation_data : torch.utils.data.Dataset or None, optional
+        If provided, used directly for validation. Otherwise, a fraction of
+        `train_data` will be held out.
+    validation_split : float, optional, default=0.1
+        Fraction of `train_data` to reserve for validation when
+        `validation_data` is None.
+    patience : int, optional, default=5
+        Number of epochs with no improvement on validation loss before stopping.
+    """
 
+    def __init__(
+        self,
+        model,
+        train_data,
+        device="cpu",
+        learning_rate=0.001,
+        epochs=10,
+        batch_size=256,
+        validation_data=None,
+        validation_split=0.1,
+        patience=5,
+    ):
+        self.model = model.to(device)
+        self.train_data = train_data
+        self.device = device
+        self.learning_rate = learning_rate
+        self.epochs = epochs
+        self.batch_size = batch_size
+        self.validation_data = validation_data
+        self.validation_split = validation_split
+        self.patience = patience
+
+    def _freeze_layers(self, unfreeze_keywords="33_1"):
+        """
+        Freezes all layers except those that contain the unfreeze_keyword
+        in their name.
+        """
+
+        for name, param in self.model.named_parameters():
+            param.requires_grad = unfreeze_keywords in name
+
+    def prepare_data(self, data, shuffle=True):
+        return DataLoader(data, batch_size=self.batch_size, shuffle=shuffle)
+
+    def fine_tune(self):
+        logger.debug("Starting fine-tuning...")
+        if self.validation_data is None:
+            # Split the training data into training and validation sets
+            val_size = int(len(self.train_data) * self.validation_split)
+            train_size = len(self.train_data) - val_size
+            train_dataset, val_dataset = torch.utils.data.random_split(
+                self.train_data, [train_size, val_size]
+            )
+        else:
+            train_dataset = self.train_data
+            val_dataset = self.validation_data
+        train_loader = self.prepare_data(train_dataset)
+        val_loader = self.prepare_data(val_dataset, shuffle=False)
+
+        optimizer = torch.optim.Adam(
+            filter(lambda p: p.requires_grad, self.model.parameters()),
+            lr=self.learning_rate,
+        )
+        loss_fn = torch.nn.L1Loss()
+        best_model_wts = copy.deepcopy(self.model.state_dict())
+        best_val_loss = float("inf")
+        epochs_no_improve = 0
+
+        for epoch in range(self.epochs):
+            running_loss = 0.0
+            self.model.train()
+            for batch in train_loader:
+                batch_X, batch_X_sum, batch_X_global, batch_X_hc, target = batch
+
+                target = target.view(-1, 1)
+
+                optimizer.zero_grad()
+                outputs = self.model(batch_X, batch_X_sum, batch_X_global, batch_X_hc)
+                loss = loss_fn(outputs, target)
+                loss.backward()
+                optimizer.step()
+                running_loss += loss.item()
+            avg_loss = running_loss / len(train_loader)
+
+            self.model.eval()
+            val_loss = 0.0
+            with torch.no_grad():
+                for batch in val_loader:
+                    batch_X, batch_X_sum, batch_X_global, batch_X_hc, target = batch
+                    target = target.view(-1, 1)
+                    outputs = self.model(
+                        batch_X, batch_X_sum, batch_X_global, batch_X_hc
+                    )
+                    val_loss += loss_fn(outputs, target).item()
+            avg_val_loss = val_loss / len(val_loader)
 
-def reset_keras():
-    """Reset Keras session."""
-    # sess = get_session()
-    # clear_session()
-    # sess.close()
-    # gc.collect()
-    # Set to force CPU calculations
-    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+            logger.debug(
+                f"Epoch {epoch + 1}/{self.epochs}, "
+                f"Loss: {avg_loss:.4f}, "
+                f"Validation Loss: {avg_val_loss:.4f}"
+            )
+            if avg_val_loss < best_val_loss:
+                best_val_loss = avg_val_loss
+                best_model_wts = copy.deepcopy(self.model.state_dict())
+                epochs_no_improve = 0
+            else:
+                epochs_no_improve += 1
+                if epochs_no_improve >= self.patience:
+                    logger.debug(f"Early stopping triggered {epoch + 1}")
+                    break
+        self.model.load_state_dict(best_model_wts)
+        return self.model
 
 
 class DeepLC:
     """
     DeepLC predictor.
 
-    Parameters
-    ----------
-    main_path : str
-        main path of module
-    path_model : str, optional
-        path to prediction model(s); leave empty to select the best of the
-        default models based on the calibration peptides
-    verbose : bool, default=True
-        turn logging on/off
-    bin_dist : float, default=2
-        TODO
-    dict_cal_divider : int, default=50
-        sets precision for fast-lookup of retention times for calibration; e.g.
-        10 means a precision of 0.1 between the calibration anchor points
-    split_cal : int, default=50
-        number of splits in the chromatogram for piecewise linear calibration
-        fit
-    n_jobs : int, optional
-        number of CPU threads to use
-    config_file : str, optional
-        path to configuration file
-    f_extractor : object :: deeplc.FeatExtractor, optional
-        deeplc.FeatExtractor object to use
-    cnn_model : bool, default=True
-        use CNN model or not
-    batch_num : int, default=250000
-        prediction batch size (in peptides); lower to decrease memory footprint
-    write_library : bool, default=False
-        append new predictions to library for faster future results; requires
-        `use_library` option
-    use_library : str, optional
-        library file with previous predictions for faster results to read from,
-        or to write to
-    reload_library : bool, default=False
-        reload prediction library
-
     Methods
     -------
-    calibrate_preds(seqs=[], mods=[], identifiers=[], measured_tr=[], correction_factor=1.0, seq_df=None, use_median=True)
+    calibrate_preds
         Find best model and calibrate
-    make_preds(seqs=[], mods=[], identifiers=[], calibrate=True, seq_df=None, correction_factor=1.0, mod_name=None)
+    make_preds
         Make predictions
 
     """
 
     library = {}
 
-    # TODO have a CCS flag here
     def __init__(
         self,
-        main_path=os.path.dirname(os.path.realpath(__file__)),
-        path_model=None,
-        verbose=True,
-        bin_dist=2,
-        dict_cal_divider=50,
-        split_cal=50,
-        n_jobs=None,
-        config_file=None,
-        f_extractor=None,
-        cnn_model=True,
-        batch_num=250000,
-        batch_num_tf=1024,
-        write_library=False,
-        use_library=None,
-        reload_library=False,
-        pygam_calibration=True,
-        deepcallc_mod=False,
-        deeplc_retrain=False,
-        predict_ccs=False,
-        n_epochs=20,
-        single_model_mode=True,
+        main_path: str | None = None,
+        path_model: str | None = None,
+        verbose: bool = True,
+        bin_distance: float = 2.0,
+        dict_cal_divider: int = 50,
+        split_cal: int = 50,
+        n_jobs: int | None = None,
+        config_file: str | None = None,
+        f_extractor: None = None,
+        cnn_model: bool = True,
+        # batch_num: int = 250000,
+        batch_num: int = int(1e6),
+        batch_num_tf: int = 1024,
+        write_library: bool = False,
+        use_library: str | None = None,
+        reload_library: bool = False,
+        pygam_calibration: bool = True,
+        deepcallc_mod: bool = False,
+        deeplc_retrain: bool = False,
+        predict_ccs: bool = False,
+        n_epochs: int = 20,
+        single_model_mode: bool = True,
     ):
+        """
+        Initialize the DeepLC predictor.
+
+        Parameters
+        ----------
+        main_path
+            Main path of the module.
+        path_model
+            Path to prediction model(s); if not provided, the best default model is selected based
+            on calibration peptides.
+        verbose
+            Turn logging on/off.
+        bin_dist
+            Precision factor for calibration lookup.
+        dict_cal_divider
+            Divider that sets the precision for fast lookup of retention times in calibration; e.g.
+            10 means a precision of 0.1 between the calibration anchor points
+        split_cal
+            Number of splits in the chromatogram for piecewise linear calibration.
+        n_jobs
+            Number of CPU threads to use.
+        config_file
+            Path to a configuration file.
+        f_extractor
+            Deprecated.
+        cnn_model
+            Flag indicating whether to use the CNN model.
+        batch_num
+            Prediction batch size (in peptides); lower values reduce memory footprint.
+        batch_num_tf
+            Batch size for TensorFlow predictions.
+        write_library
+            Whether to append new predictions to a library for faster future access.
+        use_library
+            Library file to read from or write to for prediction caching.
+        reload_library
+            Whether to reload the prediction library.
+        pygam_calibration
+            Flag to enable calibration using PyGAM.
+        deepcallc_mod
+            Flag specific to deepcallc mode.
+        deeplc_retrain
+            Flag indicating whether to perform retraining (transfer learning) of prediction models.
+        predict_ccs
+            Flag to control prediction of CCS values.
+        n_epochs
+            Number of epochs used in retraining if deeplc_retrain is enabled.
+        single_model_mode
+            Flag to use a single model instead of multiple default models.
+
+        """
         # if a config file is defined overwrite standard parameters
         if config_file:
             cparser = ConfigParser()
@@ -206,70 +367,40 @@ def __init__(
             split_cal = cparser.getint("DeepLC", "split_cal")
             n_jobs = cparser.getint("DeepLC", "n_jobs")
 
-        self.main_path = main_path
+        self.main_path = main_path or os.path.dirname(os.path.realpath(__file__))
+        self.path_model = self._get_model_paths(path_model, single_model_mode)
         self.verbose = verbose
-        self.bin_dist = bin_dist
-        self.calibrate_dict = {}
-        self.calibrate_min = float("inf")
-        self.calibrate_max = 0
-        self.n_epochs = n_epochs
+        self.bin_distance = bin_distance
+        self.dict_cal_divider = dict_cal_divider
+        self.split_cal = split_cal
+        self.n_jobs = multiprocessing.cpu_count() if n_jobs is None else n_jobs
+        self.config_file = config_file
         self.cnn_model = cnn_model
-
+        self.model_cache = {}
         self.batch_num = batch_num
         self.batch_num_tf = batch_num_tf
-        self.dict_cal_divider = dict_cal_divider
-        self.split_cal = split_cal
-        self.n_jobs = n_jobs
-
-        ################################################
-        #                                              #
-        #              !!!WARNING!!!                   #
-        #                                              #
-        #! DUE TO ISSUES WITH MULTITHREADING SET TO 1 !#
-        #                                              #
-        #                                              #
-        ################################################
-        
-        self.n_jobs = 1
-
-        if self.n_jobs == None:
-            max_threads = multiprocessing.cpu_count()
-            self.n_jobs = max_threads
-
-        self.use_library = use_library
         self.write_library = write_library
-
+        self.use_library = use_library
         self.reload_library = reload_library
+        self.pygam_calibration = pygam_calibration
+        self.deepcallc_mod = deepcallc_mod
+        self.deeplc_retrain = deeplc_retrain
+        self.predict_ccs = predict_ccs
+        self.n_epochs = n_epochs
 
-        try:
-            tf.config.threading.set_intra_op_parallelism_threads(n_jobs)
-        except RuntimeError:
-            logger.warning(
-                "DeepLC tried to set intra op threads, but was unable to do so."
-            )
-
-        if "NUMEXPR_MAX_THREADS" not in os.environ:
-            os.environ["NUMEXPR_MAX_THREADS"] = str(n_jobs)
-
-        if path_model:
-            self.model = path_model
-        else:
-            if single_model_mode:
-                self.model = [DEFAULT_MODELS[0]]
-            else:
-                self.model = DEFAULT_MODELS
+        # Apparently...
+        self.model = self.path_model
 
         if f_extractor:
-            self.f_extractor = f_extractor
-        else:
-            self.f_extractor = FeatExtractor()
-
-        self.pygam_calibration = pygam_calibration
-        self.deeplc_retrain = deeplc_retrain
+            warnings.DeprecationWarning("f_extractor argument is deprecated.")
 
-        self.deepcallc_mod = deepcallc_mod
+        # TODO REMOVE!!!
+        self.verbose = True
 
-        self.predict_ccs = predict_ccs
+        # Calibration variables
+        self.calibrate_dict = {}
+        self.calibrate_min = float("inf")
+        self.calibrate_max = 0
 
         if self.deepcallc_mod:
             self.write_library = False
@@ -288,214 +419,108 @@ def __str__(self):
                   |_|
               """
 
-    def do_f_extraction(self, seqs, mods, identifiers, charges=[]):
-        """
-        Extract all features we can extract; without parallelization; use if you
-        want to run feature extraction with a single core
+    @staticmethod
+    def _get_model_paths(
+        passed_model_path: str | None, single_model_mode: bool
+    ) -> list[str]:
+        """Get the model paths based on the passed model path and the single model mode."""
+        if passed_model_path:
+            return [passed_model_path]
 
-        Parameters
-        ----------
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods : list
-            naming of the mods; should correspond to seqs and identifiers
-        identifiers : list
-            identifiers of the peptides; should correspond to seqs and mods
+        if single_model_mode:
+            return [DEFAULT_MODELS[0]]
 
-        Returns
-        -------
-        pd.DataFrame
-            feature matrix
-        """
-        list_of_psms = []
+        return DEFAULT_MODELS
 
-        if not self.predict_ccs:
-            for seq, mod, ident in zip(seqs, mods, identifiers):
-                list_of_psms.append(
-                    PSM(peptide=peprec_to_proforma(seq, mod), spectrum_id=ident)
-                )
-        else:
-            for seq, mod, ident, z in zip(seqs, mods, identifiers, charges):
-                list_of_psms.append(
-                    PSM(peptide=peprec_to_proforma(seq, mod, z), spectrum_id=ident)
-                )
-
-        psm_list = PSMList(psm_list=list_of_psms)
-
-        return self.f_extractor.full_feat_extract(
-            psm_list, predict_ccs=self.predict_ccs
-        )
-
-    def do_f_extraction_pd(self, df_instances, charges=[]):
+    def _prepare_feature_matrices(self, psm_list):
         """
-        Extract all features we can extract; without parallelization; use if
-        you want to run feature extraction with a single thread; and use a
-        defined dataframe
+        Extract features in parallel and assemble the four input matrices.
 
         Parameters
         ----------
-        df_instances : object :: pd.DataFrame
-            dataframe containing the sequences (column:seq), modifications
-            (column:modifications) and naming (column:index)
+        psm_list : list of PSM
+            List of peptide‐spectrum matches for which to extract features.
 
         Returns
         -------
-        pd.DataFrame
-            feature matrix
+        X : ndarray, shape (n_peptides, n_features)
+        X_sum : ndarray, shape (n_peptides, n_sum_features)
+        X_global : ndarray, shape (n_peptides, n_global_features * 2)
+        X_hc : ndarray, shape (n_peptides, n_hc_features)
         """
-
-        list_of_psms = []
-        if len(charges) == 0:
-            for seq, mod, ident in zip(
-                df_instances["seq"], df_instances["modifications"], df_instances.index
-            ):
-                list_of_psms.append(
-                    PSM(peptide=peprec_to_proforma(seq, mod), spectrum_id=ident)
-                )
-        else:
-            for seq, mod, ident, z in zip(
-                df_instances["seq"],
-                df_instances["modifications"],
-                df_instances.index,
-                charges=df_instances["charges"],
-            ):
-                list_of_psms.append(
-                    PSM(
-                        peptide=peprec_to_proforma(seq, mod, charge=z),
-                        spectrum_id=ident,
-                    )
-                )
-
-        psm_list = PSMList(psm_list=list_of_psms)
-
-        return self.f_extractor.full_feat_extract(
-            psm_list, predict_ccs=self.predict_ccs
+        feats = self.do_f_extraction_psm_list_parallel(psm_list)
+        X = np.stack(list(feats["matrix"].values()))
+        X_sum = np.stack(list(feats["matrix_sum"].values()))
+        X_global = np.concatenate(
+            (
+                np.stack(list(feats["matrix_all"].values())),
+                np.stack(list(feats["pos_matrix"].values())),
+            ),
+            axis=1,
         )
+        X_hc = np.stack(list(feats["matrix_hc"].values()))
+        return X, X_sum, X_global, X_hc
 
-    def do_f_extraction_pd_parallel(self, df_instances):
-        """
-        Extract all features we can extract; with parallelization; use if you
-        want to run feature extraction with multiple threads; and use a defined
-        dataframe
-
-        Parameters
-        ----------
-        df_instances : object :: pd.DataFrame
-            dataframe containing the sequences (column:seq), modifications
-            (column:modifications) and naming (column:index)
-
-        Returns
-        -------
-        pd.DataFrame
-            feature matrix
-        """
-        # self.n_jobs = 1
-
-        df_instances_split = np.array_split(df_instances, math.ceil(self.n_jobs / 4.0))
-        if multiprocessing.current_process().daemon:
-            logger.warning(
-                "DeepLC is running in a daemon process. Disabling multiprocessing as daemonic processes can't have children."
-            )
-            pool = multiprocessing.dummy.Pool(1)
-        else:
-            pool = multiprocessing.Pool(math.ceil(self.n_jobs / 4.0))
+    def _extract_features(
+        self,
+        peptidoforms: list[str | Peptidoform] | PSMList,
+        chunk_size: int = 10000,
+    ) -> dict[str, dict[int, np.ndarray]]:
+        """Extract features for all peptidoforms."""
+        if isinstance(peptidoforms, PSMList):
+            peptidoforms = [psm.peptidoform for psm in peptidoforms]
+
+        logger.debug("Running feature extraction in single-threaded mode...")
+        if self.n_jobs <= 1:
+            encodings = [
+                encode_peptidoform(pf, predict_ccs=self.predict_ccs)
+                for pf in peptidoforms
+            ]
 
-        if self.n_jobs == 1:
-            df = self.do_f_extraction_pd(df_instances)
         else:
-            df = pd.concat(pool.map(self.do_f_extraction_pd, df_instances_split))
-            pool.close()
-            pool.join()
-        return df
-
-    def do_f_extraction_psm_list(self, psm_list):
-        """
-        Extract all features we can extract; without parallelization; use if
-        you want to run feature extraction with a single thread; and use a
-        defined dataframe
-
-        Parameters
-        ----------
-        df_instances : object :: pd.DataFrame
-            dataframe containing the sequences (column:seq), modifications
-            (column:modifications) and naming (column:index)
-
-        Returns
-        -------
-        pd.DataFrame
-            feature matrix
-        """
-        return self.f_extractor.full_feat_extract(
-            psm_list, predict_ccs=self.predict_ccs
-        )
-
-    def do_f_extraction_psm_list_parallel(self, psm_list):
-        """
-        Extract all features we can extract; without parallelization; use if
-        you want to run feature extraction with a single thread; and use a
-        defined dataframe
-
-        Parameters
-        ----------
-        df_instances : object :: pd.DataFrame
-            dataframe containing the sequences (column:seq), modifications
-            (column:modifications) and naming (column:index)
+            logger.debug("Preparing feature extraction with Dask")
+            # Process peptidoforms in larger chunks to reduce task overhead.
+            peptidoform_strings = [
+                str(pep) for pep in peptidoforms
+            ]  # Faster pickling of strings
+
+            def chunked_encode(chunk):
+                return [
+                    encode_peptidoform(pf, predict_ccs=self.predict_ccs) for pf in chunk
+                ]
+
+            tasks = [
+                delayed(chunked_encode)(peptidoform_strings[i : i + chunk_size])
+                for i in range(0, len(peptidoform_strings), chunk_size)
+            ]
 
-        Returns
-        -------
-        pd.DataFrame
-            feature matrix
-        """
-        # TODO for multiproc I am still expecting a pd dataframe, this is not the case anymore, they are dicts
-        # self.n_jobs = 1
-        logger.debug("prepare feature extraction")
-        if multiprocessing.current_process().daemon:
-            logger.warning(
-                "DeepLC is running in a daemon process. Disabling multiprocessing as daemonic processes can't have children."
-            )
-            psm_list_split = split_list(psm_list, self.n_jobs)
-            pool = multiprocessing.dummy.Pool(1)
-        elif self.n_jobs > 1:
-            psm_list_split = split_list(psm_list, self.n_jobs)
-            pool = multiprocessing.Pool(self.n_jobs)
-
-        if self.n_jobs == 1:
-            logger.debug("start feature extraction")
-            all_feats = self.do_f_extraction_psm_list(psm_list)
-            logger.debug("got feature extraction results")
-        else:
-            logger.debug("start feature extraction")
-            all_feats_async = pool.map_async(
-                self.do_f_extraction_psm_list, psm_list_split
+            logger.debug("Starting feature extraction with Dask")
+            chunks_encodings = compute(
+                *tasks, scheduler="processes", workers=self.n_jobs
             )
 
-            logger.debug("wait for feature extraction")
-            all_feats_async.wait()
-            logger.debug("get feature extraction results")
-            res = all_feats_async.get()
-            matrix_names = res[0].keys()
-            all_feats = {
-                matrix_name: dict(
-                    enumerate(
-                        chain.from_iterable((v[matrix_name].values() for v in res))
-                    )
-                )
-                for matrix_name in matrix_names
-            }
+            # Flatten the list of lists.
+            encodings = [enc for chunk in chunks_encodings for enc in chunk]
 
-            # all_feats = pd.concat(all_feats_async.get())
+        # Aggregate the encodings into a single dictionary.
+        aggregated_encodings = aggregate_encodings(encodings)
 
-            logger.debug("got feature extraction results")
+        logger.debug("Finished feature extraction")
 
-            pool.close()
-            pool.join()
+        return aggregated_encodings
 
-        return all_feats
+    def _apply_calibration_core(
+        self,
+        uncal_preds: np.ndarray,
+        cal_dict: dict | list[BaseEstimator],
+        cal_min: float,
+        cal_max: float,
+    ) -> np.ndarray:
+        """Apply calibration to the predictions."""
+        if len(uncal_preds) == 0:
+            return np.array([])
 
-    def calibration_core(self, uncal_preds, cal_dict, cal_min, cal_max):
         cal_preds = []
-        if len(uncal_preds) == 0:
-            return np.array(cal_preds)
         if self.pygam_calibration:
             linear_model_left, spline_model, linear_model_right = cal_dict
             y_pred_spline = spline_model.predict(uncal_preds.reshape(-1, 1))
@@ -521,334 +546,247 @@ def calibration_core(self, uncal_preds, cal_dict, cal_min, cal_max):
         else:
             for uncal_pred in uncal_preds:
                 try:
-                    slope, intercept = cal_dict[str(round(uncal_pred, self.bin_dist))]
+                    slope, intercept = cal_dict[
+                        str(round(uncal_pred, self.bin_distance))
+                    ]
                     cal_preds.append(slope * (uncal_pred) + intercept)
                 except KeyError:
                     # outside of the prediction range ... use the last
                     # calibration curve
                     if uncal_pred <= cal_min:
-                        slope, intercept = cal_dict[str(round(cal_min, self.bin_dist))]
+                        slope, intercept = cal_dict[
+                            str(round(cal_min, self.bin_distance))
+                        ]
                         cal_preds.append(slope * (uncal_pred) + intercept)
                     elif uncal_pred >= cal_max:
-                        slope, intercept = cal_dict[str(round(cal_max, self.bin_dist))]
+                        slope, intercept = cal_dict[
+                            str(round(cal_max, self.bin_distance))
+                        ]
                         cal_preds.append(slope * (uncal_pred) + intercept)
                     else:
-                        slope, intercept = cal_dict[str(round(cal_max, self.bin_dist))]
+                        slope, intercept = cal_dict[
+                            str(round(cal_max, self.bin_distance))
+                        ]
                         cal_preds.append(slope * (uncal_pred) + intercept)
+
         return np.array(cal_preds)
 
-    def make_preds_core_library(self, psm_list=[], calibrate=True, mod_name=None):
+    def _make_preds_core_library(self, psm_list=None, calibrate=True, mod_name=None):
+        """Get predictions stored in library and calibrate them if needed."""
+        psm_list = [] if psm_list is None else psm_list
         ret_preds = []
         for psm in psm_list:
             ret_preds.append(LIBRARY[psm.peptidoform.proforma + "|" + mod_name])
 
         if calibrate:
-            try:
-                ret_preds = self.calibration_core(
-                    ret_preds,
+            if isinstance(self.calibrate_min, dict):
+                # if multiple models are used, use the model name to get the
+                # calibration values (DeepCallC mode)
+                calibrate_dict, calibrate_min, calibrate_max = (
                     self.calibrate_dict[mod_name],
                     self.calibrate_min[mod_name],
                     self.calibrate_max[mod_name],
                 )
-            except:
-                ret_preds = self.calibration_core(
-                    ret_preds,
+            else:
+                # if only one model is used, use the same calibration values
+                calibrate_dict, calibrate_min, calibrate_max = (
                     self.calibrate_dict,
                     self.calibrate_min,
                     self.calibrate_max,
                 )
 
+            ret_preds = self._apply_calibration_core(
+                ret_preds, calibrate_dict, calibrate_min, calibrate_max
+            )
+
         return ret_preds
 
-    def make_preds_core(
+    def _make_preds_core(
         self,
-        X=[],
-        X_sum=[],
-        X_global=[],
-        X_hc=[],
-        psm_list=[],
+        X: np.ndarray | None = None,
+        X_sum: np.ndarray | None = None,
+        X_global: np.ndarray | None = None,
+        X_hc: np.ndarray | None = None,
         calibrate=True,
         mod_name=None,
-    ):
-        """
-        Make predictions for sequences
-        Parameters
-        ----------
-        seq_df : object :: pd.DataFrame
-            dataframe containing the sequences (column:seq), modifications
-            (column:modifications) and naming (column:index); will use parallel
-            by default!
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods : list
-            naming of the mods; should correspond to seqs and identifiers
-        identifiers : list
-            identifiers of the peptides; should correspond to seqs and mods
-        calibrate : boolean
-            calibrate predictions or just return the predictions
-        correction_factor : float
-            correction factor to apply to predictions
-        mod_name : str or None
-            specify a model to use instead of the model assigned originally to
-            this instance of the object
-        Returns
-        -------
-        np.array
-            predictions
-        """
+    ) -> np.ndarray:
+        """Make predictions."""
+        # Check calibration state
         if calibrate:
-            assert (
-                self.calibrate_dict
-            ), "DeepLC instance is not yet calibrated.\
-                                        Calibrate before making predictions, or use calibrate=False"
-
-        if len(X) == 0 and len(psm_list) > 0:
-            if self.verbose:
-                logger.debug("Extracting features for the CNN model ...")
-            X = self.do_f_extraction_psm_list_parallel(psm_list)
-
-            X_sum = np.stack(list(X["matrix_sum"].values()))
-            X_global = np.concatenate(
-                (
-                    np.stack(list(X["matrix_all"].values())),
-                    np.stack(list(X["pos_matrix"].values())),
-                ),
-                axis=1,
+            assert self.calibrate_dict, (
+                "DeepLC instance is not yet calibrated. Calibrate before making predictions, or "
+                "use `calibrate=False`"
             )
-            X_hc = np.stack(list(X["matrix_hc"].values()))
-            X = np.stack(list(X["matrix"].values()))
-        elif len(X) == 0 and len(psm_list) == 0:
-            return []
+
+        if len(X) == 0:
+            return np.array([])
+
+        dataset = DeepLCDataset(X, X_sum, X_global, X_hc)
+        loader = DataLoader(dataset, batch_size=self.batch_num_tf, shuffle=False)
 
         ret_preds = []
 
-        mod = load_model(mod_name)
+        mod = torch.load(mod_name, weights_only=False, map_location=torch.device("cpu"))
+        mod.eval()
         try:
-            X
-            ret_preds = mod.predict(
-                [X, X_sum, X_global, X_hc],
-                batch_size=self.batch_num_tf,
-                verbose=int(self.verbose),
-            ).flatten()
+            with torch.no_grad():
+                for batch in loader:
+                    batch_X, batch_X_sum, batch_X_global, batch_X_hc = batch
+                    batch_preds = mod(batch_X, batch_X_sum, batch_X_global, batch_X_hc)
+                    ret_preds.append(batch_preds.detach().cpu().numpy())
+
+            ret_preds = np.concatenate(ret_preds, axis=0)
         except UnboundLocalError:
             logger.debug("X is empty, skipping...")
             ret_preds = []
 
         if calibrate:
-            try:
-                ret_preds = self.calibration_core(
-                    ret_preds,
+            if isinstance(self.calibrate_min, dict):
+                # if multiple models are used, use the model name to get the
+                # calibration values (DeepCallC mode)
+                calibrate_dict, calibrate_min, calibrate_max = (
                     self.calibrate_dict[mod_name],
                     self.calibrate_min[mod_name],
                     self.calibrate_max[mod_name],
                 )
-            except:
-                ret_preds = self.calibration_core(
-                    ret_preds,
+            else:
+                # if only one model is used, use the same calibration values
+                calibrate_dict, calibrate_min, calibrate_max = (
                     self.calibrate_dict,
                     self.calibrate_min,
                     self.calibrate_max,
                 )
 
-        # clear_session()
+            ret_preds = self._apply_calibration_core(
+                ret_preds, calibrate_dict, calibrate_min, calibrate_max
+            )
+
         gc.collect()
         return ret_preds
 
     def make_preds(
-        self, psm_list=None, infile="", calibrate=True, seq_df=None, mod_name=None
+        self,
+        psm_list: PSMList | None = None,
+        infile: str | Path | None = None,
+        seq_df: pd.DataFrame | None = None,
+        calibrate: bool = True,
+        mod_name: str | None = None,
     ):
         """
         Make predictions for sequences, in batches if required.
 
         Parameters
         ----------
-        seq_df : object :: pd.DataFrame
-            dataframe containing the sequences (column:seq), modifications
-            (column:modifications) and naming (column:index); will use parallel
-            by default!
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods : list
-            naming of the mods; should correspond to seqs and identifiers
-        identifiers : list
-            identifiers of the peptides; should correspond to seqs and mods
-        calibrate : boolean
-            calibrate predictions or just return the predictions
-        correction_factor : float
-            correction factor to apply to predictions
-        mod_name : str or None
-            specify a model to use instead of the model assigned originally to
-            this instance of the object
+        psm_list
+            PSMList object containing the peptidoforms to predict for.
+        infile
+            Path to a file containing the peptidoforms to predict for.
+        seq_df
+            DataFrame containing the sequences (column:seq), modifications
+            (column:modifications) and naming (column:index).
+        calibrate
+            calibrate predictions or just return the predictions.
+        mod_name
+            specify a model to use instead of the model assigned originally to this instance of the
+            object.
 
         Returns
         -------
         np.array
             predictions
         """
-        if type(seq_df) == pd.core.frame.DataFrame:
-            list_of_psms = []
-            if self.predict_ccs:
-                for seq, mod, ident, z in zip(
-                    seq_df["seq"],
-                    seq_df["modifications"],
-                    seq_df.index,
-                    seq_df["charge"],
-                ):
-                    list_of_psms.append(
-                        PSM(
-                            peptidoform=peprec_to_proforma(seq, mod, charge=z),
-                            spectrum_id=ident,
-                        )
-                    )
+        if psm_list is None:
+            if seq_df is not None:
+                psm_list = _dataframe_to_psm_list(seq_df)
+            elif infile is not None:
+                psm_list = _file_to_psm_list(infile)
             else:
-                for seq, mod, ident in zip(
-                    seq_df["seq"], seq_df["modifications"], seq_df.index
-                ):
-                    list_of_psms.append(
-                        PSM(peptidoform=peprec_to_proforma(seq, mod), spectrum_id=ident)
-                    )
-            psm_list = PSMList(psm_list=list_of_psms)
-
-        if len(infile) > 0:
-            psm_list = read_file(infile)
-            if "msms" in infile and ".txt" in infile:
-                mapper = pd.read_csv(
-                    os.path.join(
-                        os.path.dirname(os.path.realpath(__file__)),
-                        "unimod/map_mq_file.csv",
-                    ),
-                    index_col=0,
-                )["value"].to_dict()
-                psm_list.rename_modifications(mapper)
+                raise ValueError(
+                    "Either `psm_list` or `seq_df` or `infile` must be provided."
+                )
+
+        if len(psm_list) == 0:
+            logger.warning("No PSMs to predict for.")
+            return []
 
         ret_preds_batches = []
         for psm_list_t in divide_chunks(psm_list, self.batch_num):
             ret_preds = []
-            if len(psm_list_t) > 0:
-                if self.verbose:
-                    logger.debug("Extracting features for the CNN model ...")
-
-                X = self.do_f_extraction_psm_list_parallel(psm_list_t)
-                X_sum = np.stack(list(X["matrix_sum"].values()))
-                X_global = np.concatenate(
-                    (
-                        np.stack(list(X["matrix_all"].values())),
-                        np.stack(list(X["pos_matrix"].values())),
-                    ),
-                    axis=1,
-                )
-                X_hc = np.stack(list(X["matrix_hc"].values()))
-                X = np.stack(list(X["matrix"].values()))
-            else:
-                return []
 
-            if isinstance(self.model, dict):
-                for m_group_name, m_name in self.model.items():
-                    ret_preds.append(
-                        self.make_preds_core(
-                            X=X,
-                            X_sum=X_sum,
-                            X_global=X_global,
-                            X_hc=X_hc,
-                            calibrate=calibrate,
-                            mod_name=m_name,
-                        )
-                    )
-                ret_preds = np.array([sum(a) / len(a) for a in zip(*ret_preds)])
-            elif mod_name is not None:
-                ret_preds = self.make_preds_core(
-                    X=X,
-                    X_sum=X_sum,
-                    X_global=X_global,
-                    X_hc=X_hc,
-                    calibrate=calibrate,
-                    mod_name=mod_name,
-                )
+            # Extract features for the CNN model
+            # features = self._extract_features(psm_list_t)
+            # X_sum, X_global, X_hc, X = unpack_features(features)
+            X, X_sum, X_global, X_hc = self._prepare_feature_matrices(psm_list_t)
+
+            # Check if model was provided, and if not, whether multiple models are selected in
+            # the DeepLC object or not.
+            if mod_name:
+                model_names = [mod_name]
+            elif isinstance(self.model, dict):
+                model_names = [m_name for m_group_name, m_name in self.model.items()]
             elif isinstance(self.model, list):
-                for m_name in self.model:
-                    ret_preds.append(
-                        self.make_preds_core(
+                model_names = self.model
+            elif isinstance(self.model, str):
+                model_names = [self.model]
+            else:
+                raise ValueError("Invalid model name provided.")
+
+            # Get predictions
+            if len(model_names) > 1:
+                # Iterate over models if multiple were selected
+                model_predictions = []
+                for model_name in model_names:
+                    model_predictions.append(
+                        self._make_preds_core(
                             X=X,
                             X_sum=X_sum,
                             X_global=X_global,
                             X_hc=X_hc,
                             calibrate=calibrate,
-                            mod_name=m_name,
+                            mod_name=model_name,
                         )
                     )
-                ret_preds = np.array([sum(a) / len(a) for a in zip(*ret_preds)])
+                # Average the predictions from all models
+                ret_preds = np.array(
+                    [sum(a) / len(a) for a in zip(*ret_preds, strict=True)]
+                )
+                # ret_preds = np.mean(model_predictions, axis=0)
+
             else:
-                ret_preds = self.make_preds_core(
+                # Use the single model
+                ret_preds = self._make_preds_core(
                     X=X,
                     X_sum=X_sum,
                     X_global=X_global,
                     X_hc=X_hc,
                     calibrate=calibrate,
-                    mod_name=self.model,
+                    mod_name=model_names[0],
                 )
-            ret_preds_batches.extend(ret_preds)
 
-        return ret_preds_batches
-        # TODO make this multithreaded
-        # should be possible with the batched list
+            ret_preds_batches.append(ret_preds)
 
-    def calibrate_preds_func_pygam(
-        self,
-        psm_list=None,
-        correction_factor=1.0,
-        seq_df=None,
-        measured_tr=None,
-        use_median=True,
-        mod_name=None,
-    ):
-        if type(seq_df) == pd.core.frame.DataFrame:
-            list_of_psms = []
-            # TODO include charge here
-            if self.predict_ccs:
-                for seq, mod, ident, tr, z in zip(
-                    seq_df["seq"],
-                    seq_df["modifications"],
-                    seq_df.index,
-                    seq_df["tr"],
-                    seq_df["charge"],
-                ):
-                    list_of_psms.append(
-                        PSM(
-                            peptidoform=peprec_to_proforma(seq, mod, charge=z),
-                            spectrum_id=ident,
-                            retention_time=tr,
-                        )
-                    )
-            else:
-                for seq, mod, ident, tr in zip(
-                    seq_df["seq"], seq_df["modifications"], seq_df.index, seq_df["tr"]
-                ):
-                    list_of_psms.append(
-                        PSM(
-                            peptidoform=peprec_to_proforma(seq, mod),
-                            spectrum_id=ident,
-                            retention_time=tr,
-                        )
-                    )
-            psm_list = PSMList(psm_list=list_of_psms)
+        all_ret_preds = np.concatenate(ret_preds_batches, axis=0)
 
-            measured_tr = [psm.retention_time for psm in psm_list]
+        return all_ret_preds
 
-        predicted_tr = self.make_preds(psm_list, calibrate=False, mod_name=mod_name)
+    def _calibrate_preds_pygam(
+        self,
+        measured_tr: np.ndarray,
+        predicted_tr: np.ndarray,
+    ) -> tuple[float, float, list[BaseEstimator]]:
+        """Make calibration curve for predictions using PyGAM."""
+        logger.debug("Getting predictions for calibration...")
 
         # sort two lists, predicted and observed based on measured tr
         tr_sort = [
             (mtr, ptr)
             for mtr, ptr in sorted(
-                zip(measured_tr, predicted_tr), key=lambda pair: pair[1]
+                zip(measured_tr, predicted_tr, strict=True), key=lambda pair: pair[1]
             )
         ]
         measured_tr = np.array([mtr for mtr, ptr in tr_sort], dtype=np.float32)
         predicted_tr = np.array([ptr for mtr, ptr in tr_sort], dtype=np.float32)
 
-        # predicted_tr = list(predicted_tr)
-        # measured_tr = list(measured_tr)
-
         # Fit a SplineTransformer model
         if self.deeplc_retrain:
             spline = SplineTransformer(degree=2, n_knots=10)
@@ -889,94 +827,18 @@ def calibrate_preds_func_pygam(
             [linear_model_left, spline_model, linear_model_right],
         )
 
-    def calibrate_preds_func(
+    def _calibrate_preds_piecewise_linear(
         self,
-        psm_list=None,
-        correction_factor=1.0,
-        seq_df=None,
-        use_median=True,
-        mod_name=None,
-    ):
-        """
-        Make calibration curve for predictions
-
-        Parameters
-        ----------
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods : list
-            naming of the mods; should correspond to seqs and identifiers
-        identifiers : list
-            identifiers of the peptides; should correspond to seqs and mods
-        measured_tr : list
-            measured tr of the peptides; should correspond to seqs, identifiers,
-            and mods
-        correction_factor : float
-            correction factor that needs to be applied to the supplied measured
-            trs
-        seq_df : object :: pd.DataFrame
-            a pd.DataFrame that contains the sequences, modifications and
-            observed retention times to fit a calibration curve
-        use_median : boolean
-            flag to indicate we need to use the median valuein a window to
-            perform calibration
-        mod_name
-            specify a model to use instead of the model assigned originally to
-            this instance of the object
-
-        Returns
-        -------
-        float
-            the minimum value where a calibration curve was fitted, lower values
-            will be extrapolated from the minimum fit of the calibration curve
-        float
-            the maximum value where a calibration curve was fitted, higher values
-            will be extrapolated from the maximum fit of the calibration curve
-        dict
-            dictionary with keys for rounded tr, and the values concern a linear
-            model that should be applied to do calibration (!!! what is the
-            shape of this?)
-        """
-        if type(seq_df) == pd.core.frame.DataFrame:
-            list_of_psms = []
-            # TODO include charge here
-            if self.predict_ccs:
-                for seq, mod, tr, ident, z in zip(
-                    seq_df["seq"],
-                    seq_df["modifications"],
-                    seq_df["tr"],
-                    seq_df.index,
-                    seq_df["charge"],
-                ):
-                    list_of_psms.append(
-                        PSM(
-                            peptidoform=peprec_to_proforma(seq, mod, charge=z),
-                            spectrum_id=ident,
-                            retention_time=tr,
-                        )
-                    )
-            else:
-                for seq, mod, tr, ident in zip(
-                    seq_df["seq"], seq_df["modifications"], seq_df["tr"], seq_df.index
-                ):
-                    list_of_psms.append(
-                        PSM(
-                            peptidoform=peprec_to_proforma(seq, mod),
-                            spectrum_id=ident,
-                            retention_time=tr,
-                        )
-                    )
-            psm_list = PSMList(psm_list=list_of_psms)
-
-        measured_tr = [psm.retention_time for psm in psm_list]
-
-        predicted_tr = self.make_preds(psm_list, calibrate=False, mod_name=mod_name)
-
+        measured_tr: np.ndarray,
+        predicted_tr: np.ndarray,
+        use_median: bool = True,
+    ) -> tuple[float, float, dict[str, tuple[float]]]:
+        """Make calibration curve for predictions."""
         # sort two lists, predicted and observed based on measured tr
         tr_sort = [
             (mtr, ptr)
             for mtr, ptr in sorted(
-                zip(measured_tr, predicted_tr), key=lambda pair: pair[1]
+                zip(measured_tr, predicted_tr, strict=False), key=lambda pair: pair[1]
             )
         ]
         measured_tr = np.array([mtr for mtr, ptr in tr_sort])
@@ -989,10 +851,9 @@ def calibrate_preds_func(
         calibrate_min = float("inf")
         calibrate_max = 0
 
-        if self.verbose:
-            logger.debug(
-                "Selecting the data points for calibration (used to fit the linear models between)"
-            )
+        logger.debug(
+            "Selecting the data points for calibration (used to fit the linear models between)"
+        )
         # smooth between observed and predicted
         split_val = predicted_tr[-1] / self.split_cal
 
@@ -1021,16 +882,13 @@ def calibrate_preds_func(
                 mtr_mean.append(sum(mtr) / len(mtr))
                 ptr_mean.append(sum(ptr) / len(ptr))
 
-        if self.verbose:
-            logger.debug("Fitting the linear models between the points")
+        logger.debug("Fitting the linear models between the points")
 
         if self.split_cal >= len(measured_tr):
             raise CalibrationError(
-                "Not enough measured tr ({}) for the chosen number of splits ({}). "
-                "Choose a smaller split_cal parameter or provide more peptides for "
-                "fitting the calibration curve.".format(
-                    len(measured_tr), self.split_cal
-                )
+                f"Not enough measured tr ({len(measured_tr)}) for the chosen number of splits "
+                f"({self.split_cal}). Choose a smaller split_cal parameter or provide more "
+                "peptides for fitting the calibration curve."
             )
         if len(mtr_mean) == 0:
             raise CalibrationError(
@@ -1054,112 +912,86 @@ def calibrate_preds_func(
             # optimized predictions using a dict to find calibration curve very
             # fast
             for v in np.arange(
-                round(ptr_mean[i], self.bin_dist),
-                round(ptr_mean[i + 1], self.bin_dist),
-                1 / ((self.bin_dist) * self.dict_cal_divider),
+                round(ptr_mean[i], self.bin_distance),
+                round(ptr_mean[i + 1], self.bin_distance),
+                1 / ((self.bin_distance) * self.dict_cal_divider),
             ):
                 if v < calibrate_min:
                     calibrate_min = v
                 if v > calibrate_max:
                     calibrate_max = v
-                calibrate_dict[str(round(v, self.bin_dist))] = [slope, intercept]
+                calibrate_dict[str(round(v, self.bin_distance))] = (slope, intercept)
 
         return calibrate_min, calibrate_max, calibrate_dict
 
     def calibrate_preds(
         self,
-        psm_list=None,
-        infile="",
-        measured_tr=[],
-        correction_factor=1.0,
-        location_retraining_models="",
-        psm_utils_obj=None,
-        sample_for_calibration_curve=None,
-        seq_df=None,
-        use_median=True,
+        psm_list: PSMList | None = None,
+        infile: str | Path | None = None,
+        seq_df: pd.DataFrame | None = None,
+        measured_tr: np.ndarray | None = None,
+        location_retraining_models: str = "",
+        sample_for_calibration_curve: int | None = None,
+        use_median: bool = True,
         return_plotly_report=False,
-    ):
+    ) -> dict | None:
         """
         Find best model and calibrate.
 
         Parameters
         ----------
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods : list
-            naming of the mods; should correspond to seqs and identifiers
-        identifiers : list
-            identifiers of the peptides; should correspond to seqs and mods
+        psm_list
+            PSMList object containing the peptidoforms to predict for.
+        infile
+            Path to a file containing the peptidoforms to predict for.
+        seq_df
+            DataFrame containing the sequences (column:seq), modifications (column:modifications),
+            naming (column:index), and optionally charge (column:charge) and measured retention
+            time (column:tr).
         measured_tr : list
-            measured tr of the peptides; should correspond to seqs, identifiers,
-            and mods
+            Measured retention time used for calibration. Should correspond to the PSMs in the
+            provided PSMs. If None, the measured retention time is taken from the PSMs.
         correction_factor : float
-            correction factor that needs to be applied to the supplied measured
-            trs
-        seq_df : object :: pd.DataFrame
-            a pd.DataFrame that contains the sequences, modifications and
-            observed retention times to fit a calibration curve
-        use_median : boolean
-            flag to indicate we need to use the median valuein a window to
-            perform calibration
+            correction factor that needs to be applied to the supplied measured tr's
+        location_retraining_models
+            Location to save the retraining models; if None, a temporary directory is used.
+        sample_for_calibration_curve
+            Number of PSMs to sample for calibration curve; if None, all provided PSMs are used.
+        use_median
+            Whether to use the median value in a window to perform calibration; only applies to
+            piecewise linear calibration, not to PyGAM calibration.
+        return_plotly_report
+            Whether to return a plotly report with the calibration results.
 
         Returns
         -------
+        dict | None
+            Dictionary with plotly report information or None.
 
         """
-        if type(seq_df) == pd.core.frame.DataFrame:
-            list_of_psms = []
-            if self.predict_ccs:
-                for seq, mod, ident, tr, z in zip(
-                    seq_df["seq"],
-                    seq_df["modifications"],
-                    seq_df.index,
-                    seq_df["tr"],
-                    seq_df["charge"],
-                ):
-                    list_of_psms.append(
-                        PSM(
-                            peptidoform=peprec_to_proforma(seq, mod, charge=z),
-                            spectrum_id=ident,
-                            retention_time=tr,
-                        )
-                    )
+        # Getting PSMs either from psm_list, seq_df, or infile
+        if psm_list is None:
+            if seq_df is not None:
+                psm_list = _dataframe_to_psm_list(seq_df)
+            elif infile is not None:
+                psm_list = _file_to_psm_list(infile)
             else:
-                for seq, mod, ident, tr in zip(
-                    seq_df["seq"], seq_df["modifications"], seq_df.index, seq_df["tr"]
-                ):
-                    list_of_psms.append(
-                        PSM(
-                            peptidoform=peprec_to_proforma(seq, mod),
-                            spectrum_id=ident,
-                            retention_time=tr,
-                        )
-                    )
-            psm_list = PSMList(psm_list=list_of_psms)
-        elif psm_utils_obj:
-            psm_list = psm_utils_obj
+                raise ValueError(
+                    "Either `psm_list` or `seq_df` or `infile` must be provided."
+                )
+
+        # Getting measured retention time either from measured_tr or provided PSMs
+        if not measured_tr:
+            measured_tr = [psm.retention_time for psm in psm_list]
+            if None in measured_tr:
+                raise ValueError("Not all PSMs have an observed retention time.")
 
+        # Ensuring self.model is list of strings
         if isinstance(self.model, str):
             self.model = [self.model]
 
-        if len(infile) > 0:
-            psm_list = read_file(infile)
-            if "msms" in infile and ".txt" in infile:
-                mapper = pd.read_csv(
-                    os.path.join(
-                        os.path.dirname(os.path.realpath(__file__)),
-                        "unimod/map_mq_file.csv",
-                    ),
-                    index_col=0,
-                )["value"].to_dict()
-                psm_list.rename_modifications(mapper)
-
-        measured_tr = [psm.retention_time for psm in psm_list]
-
-        if self.verbose:
-            logger.debug("Start to calibrate predictions ...")
-        if self.verbose:
-            logger.debug("Ready to find the best model out of: %s" % (self.model))
+        logger.debug("Start to calibrate predictions ...")
+        logger.debug(f"Ready to find the best model out of: {self.model}")
 
         best_perf = float("inf")
         best_calibrate_min = 0.0
@@ -1174,81 +1006,86 @@ def calibrate_preds(
         temp_pred = []
 
         if self.deeplc_retrain:
-            # The following code is not required in most cases, but here it is used to clear variables that might cause problems
-            _ = tf.Variable([1])
+            logger.debug("Preparing for model fine-tuning...")
 
-            context._context = None
-            context._create_context()
+            X, X_sum, X_global, X_hc = self._prepare_feature_matrices(psm_list)
+            dataset = DeepLCDataset(X, X_sum, X_global, X_hc, np.array(measured_tr))
 
-            tf.config.threading.set_inter_op_parallelism_threads(1)
+            base_model_path = (
+                self.model[0] if isinstance(self.model, list) else self.model
+            )
+            base_model = torch.load(
+                base_model_path, weights_only=False, map_location=torch.device("cpu")
+            )
+            base_model.eval()
 
-            if len(location_retraining_models) > 0:
-                t_dir_models = TemporaryDirectory().name
-                os.mkdir(t_dir_models)
+            fine_tuner = DeepLCFineTuner(
+                model=base_model,
+                train_data=dataset,
+                batch_size=self.batch_num_tf,
+                epochs=self.n_epochs,
+            )
+            # fine_tuner._freeze_layers()
+            fine_tuned_model = fine_tuner.fine_tune()
+
+            if location_retraining_models:
+                os.makedirs(location_retraining_models, exist_ok=True)
+                temp_dir_obj = TemporaryDirectory()
+                t_dir_models = temp_dir_obj.name
+                self._temp_dir_obj = temp_dir_obj
             else:
                 t_dir_models = location_retraining_models
-                try:
-                    os.mkdir(t_dir_models)
-                except:
-                    pass
-
-            # Here we will apply transfer learning we specify previously trained models in the 'mods_transfer_learning'
-            models = deeplcretrainer.retrain(
-                {"deeplc_transferlearn": psm_list},
-                outpath=t_dir_models,
-                mods_transfer_learning=self.model,
-                freeze_layers=True,
-                n_epochs=self.n_epochs,
-                freeze_after_concat=1,
-                verbose=self.verbose,
-            )
+                os.makedirs(t_dir_models, exist_ok=True)
 
-            self.model = models
+            # Define path to save fine-tuned model
+            fine_tuned_model_path = os.path.join(t_dir_models, "fine_tuned_model.pth")
+            torch.save(fine_tuned_model, fine_tuned_model_path)
+            self.model = [fine_tuned_model_path]
 
-        if isinstance(sample_for_calibration_curve, int):
+        # Limit calibration to a subset of PSMs if specified
+        if sample_for_calibration_curve:
             psm_list = random.sample(list(psm_list), sample_for_calibration_curve)
             measured_tr = [psm.retention_time for psm in psm_list]
 
-        for m in self.model:
-            if self.verbose:
-                logger.debug("Trying out the following model: %s" % (m))
+        for model_name in self.model:
+            logger.debug(f"Trying out the following model: {model_name}")
+            predicted_tr = self.make_preds(
+                psm_list, calibrate=False, mod_name=model_name
+            )
+
             if self.pygam_calibration:
-                calibrate_output = self.calibrate_preds_func_pygam(
-                    psm_list,
-                    measured_tr=measured_tr,
-                    correction_factor=correction_factor,
-                    seq_df=seq_df,
-                    use_median=use_median,
-                    mod_name=m,
+                calibrate_output = self._calibrate_preds_pygam(
+                    measured_tr, predicted_tr
                 )
             else:
-                calibrate_output = self.calibrate_preds_func(
-                    psm_list,
-                    correction_factor=correction_factor,
-                    seq_df=seq_df,
-                    use_median=use_median,
-                    mod_name=m,
+                calibrate_output = self._calibrate_preds_piecewise_linear(
+                    measured_tr, predicted_tr, use_median=use_median
                 )
+            self.calibrate_min, self.calibrate_max, self.calibrate_dict = (
+                calibrate_output
+            )
+            # TODO: Currently, calibration dict can be both a dict (linear) or a list of models
+            # (PyGAM)... This should be handled better in the future.
+
+            # Skip this model if calibrate_dict is empty
+            # TODO: Should this do something when using PyGAM and calibrate_dict is a list?
+            if (
+                isinstance(self.calibrate_dict, dict)
+                and len(self.calibrate_dict.keys()) == 0
+            ):
+                continue
 
-            (
-                self.calibrate_min,
-                self.calibrate_max,
-                self.calibrate_dict,
-            ) = calibrate_output
-
-            if type(self.calibrate_dict) == dict:
-                if len(self.calibrate_dict.keys()) == 0:
-                    continue
-
-            m_name = m.split("/")[-1]
-
-            preds = self.make_preds(psm_list, calibrate=True, seq_df=seq_df, mod_name=m)
+            m_name = model_name.split("/")[-1]
 
-            if self.deepcallc_mod:
-                m_group_name = "deepcallc"
-            else:
-                m_group_name = "_".join(m_name.split("_")[:-1])
+            # Get new predictions with calibration
+            preds = self.make_preds(
+                psm_list, calibrate=True, seq_df=seq_df, mod_name=model_name
+            )
 
+            m_group_name = (
+                "deepcallc" if self.deepcallc_mod else "_".join(m_name.split("_")[:-1])
+            )
+            m = model_name
             try:
                 pred_dict[m_group_name][m] = preds
                 mod_dict[m_group_name][m] = m
@@ -1268,26 +1105,24 @@ def calibrate_preds(
                 mod_calibrate_min_dict[m_group_name][m] = self.calibrate_min
                 mod_calibrate_max_dict[m_group_name][m] = self.calibrate_max
 
-        for m_name in pred_dict.keys():
-            preds = [sum(a) / len(a) for a in zip(*list(pred_dict[m_name].values()))]
+        for m_name in pred_dict:
+            preds = [
+                sum(a) / len(a)
+                for a in zip(*list(pred_dict[m_name].values()), strict=True)
+            ]
             if len(measured_tr) == 0:
                 perf = sum(abs(seq_df["tr"] - preds))
             else:
                 perf = sum(abs(np.array(measured_tr) - np.array(preds)))
 
-            if self.verbose:
-                logger.debug(
-                    "For %s model got a performance of: %s"
-                    % (m_name, perf / len(preds))
-                )
+            logger.debug(
+                f"For {m_name} model got a performance of: {perf / len(preds)}"
+            )
 
             if perf < best_perf:
-                if self.deepcallc_mod:
-                    m_group_name = "deepcallc"
-                else:
-                    m_group_name = m_name
-                    # TODO is deepcopy really required?
+                m_group_name = "deepcallc" if self.deepcallc_mod else m_name
 
+                # TODO is deepcopy really required?
                 best_calibrate_dict = copy.deepcopy(mod_calibrate_dict[m_group_name])
                 best_calibrate_min = copy.deepcopy(mod_calibrate_min_dict[m_group_name])
                 best_calibrate_max = copy.deepcopy(mod_calibrate_max_dict[m_group_name])
@@ -1304,21 +1139,22 @@ def calibrate_preds(
         self.model = best_model
 
         if self.deepcallc_mod:
-            self.deepcallc_model = train_en(
+            self.deepcallc_model = train_elastic_net(
                 pd.DataFrame(pred_dict["deepcallc"]), seq_df["tr"]
             )
 
-        # self.n_jobs = 1
-
-        logger.debug("Model with the best performance got selected: %s" % (best_model))
+        logger.debug(f"Model with the best performance got selected: {best_model}")
 
         if return_plotly_report:
             import deeplc.plot
 
             plotly_return_dict = {}
             plotly_df = pd.DataFrame(
-                list(zip(temp_obs, temp_pred)),
-                columns=["Observed retention time", "Predicted retention time"],
+                list(zip(temp_obs, temp_pred, strict=True)),
+                columns=[
+                    "Observed retention time",
+                    "Predicted retention time",
+                ],
             )
             plotly_return_dict["scatter"] = deeplc.plot.scatter(plotly_df)
             plotly_return_dict["baseline_dist"] = deeplc.plot.distribution_baseline(
@@ -1326,26 +1162,97 @@ def calibrate_preds(
             )
             return plotly_return_dict
 
-        return {}
+        return None
 
-    def split_seq(self, a, n):
-        """
-        Split a list (a) into multiple chunks (n)
-
-        Parameters
-        ----------
-        a : list
-            list to split
-        n : list
-            number of chunks
 
-        Returns
-        -------
-        list
-            chunked list
-        """
+def _get_pool(n_jobs: int) -> multiprocessing.Pool | multiprocessing.dummy.Pool:  # type: ignore
+    """Get a Pool object for parallel processing."""
+    if multiprocessing.current_process().daemon:
+        logger.warning(
+            "DeepLC is running in a daemon process. Disabling multiprocessing as daemonic "
+            "processes can't have children."
+        )
+        return multiprocessing.dummy.Pool(1)
+    elif n_jobs == 1:
+        return multiprocessing.dummy.Pool(1)
+    else:
+        max_n_jobs = multiprocessing.cpu_count()
+        if n_jobs > max_n_jobs:
+            logger.warning(
+                f"Number of jobs ({n_jobs}) exceeds the number of CPUs ({max_n_jobs}). "
+                f"Setting number of jobs to {max_n_jobs}."
+            )
+            return multiprocessing.Pool(max_n_jobs)
+        else:
+            return multiprocessing.Pool(n_jobs)
+
+
+def _lists_to_psm_list(
+    sequences: list[str],
+    modifications: list[str | None],
+    identifiers: list[str],
+    charges: list[int] | None,
+    retention_times: list[float] | None = None,
+    n_jobs: int = 1,
+) -> PSMList:
+    """Convert lists into a PSMList using Dask for parallel processing."""
+    if not charges:
+        charges = [None] * len(sequences)
+
+    if not retention_times:
+        retention_times = [None] * len(sequences)
+
+    def create_psm(args):
+        sequence, modifications, identifier, charge, retention_time = args
+        return PSM(
+            peptidoform=peprec_to_proforma(sequence, modifications, charge=charge),
+            spectrum_id=identifier,
+            retention_time=retention_time,
+        )
 
-        # since chunking is not alway possible do the modulo of residues
-        k, m = divmod(len(a), n)
-        result = (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n))
-        return result
+    args_list = list(
+        zip(
+            sequences, modifications, identifiers, charges, retention_times, strict=True
+        )
+    )
+    tasks = [delayed(create_psm)(args) for args in args_list]
+    list_of_psms = list(compute(*tasks, scheduler="processes"))
+    return PSMList(psm_list=list_of_psms)
+
+
+# TODO: I'm not sure what the expected behavior was for charges; they were parsed
+# from the dataframe, while the passed list was used to check whether it they should get
+# parsed. I'll allow both with a priority for the passed charges.
+def _dataframe_to_psm_list(
+    dataframe: pd.DataFrame,
+    charges: list[int] | None,
+    n_jobs: int = 1,
+) -> PSMList:
+    """Convert a DataFrame with sequences, modifications, and identifiers into a PSMList."""
+    sequences = dataframe["seq"]
+    modifications = dataframe["modifications"]
+    identifiers = dataframe.index
+    retention_times = dataframe["tr"] if "tr" in dataframe.columns else None
+
+    if not charges and "charge" in dataframe.columns:
+        charges = dataframe["charge"]
+
+    return _lists_to_psm_list(
+        sequences, modifications, identifiers, charges, retention_times, n_jobs=n_jobs
+    )
+
+
+def _file_to_psm_list(input_file: str | Path) -> PSMList:
+    """Read a file into a PSMList, optionally mapping MaxQuant modifications labels."""
+    psm_list = read_file(input_file)
+    if "msms" in input_file and ".txt" in input_file:
+        mapper = pd.read_csv(
+            os.path.join(
+                os.path.dirname(os.path.realpath(__file__)),
+                "unimod/map_mq_file.csv",
+            ),
+            index_col=0,
+        )["value"].to_dict()
+        psm_list.rename_modifications(mapper)
+
+    return psm_list
diff --git a/deeplc/feat_extractor.py b/deeplc/feat_extractor.py
index 1b12262..ecda640 100644
--- a/deeplc/feat_extractor.py
+++ b/deeplc/feat_extractor.py
@@ -1,659 +1,233 @@
-"""
-This code is used to extract features for peptides used in DeepLC (or seperately
-if required).
+"""Feature extraction for DeepLC."""
 
-For the library versions see the .yml file
-"""
+from __future__ import annotations
 
-__author__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
-__credits__ = [
-    "Robbin Bouwmeester",
-    "Ralf Gabriels",
-    "Arthur Declercq",
-    "Prof. Lennart Martens",
-    "Sven Degroeve",
-]
-__license__ = "Apache License, Version 2.0"
-__maintainer__ = ["Robbin Bouwmeester", "Ralf Gabriels"]
-__email__ = ["Robbin.Bouwmeester@ugent.be", "Ralf.Gabriels@ugent.be"]
-
-# Native imports
-import os
-import math
-import time
-from configparser import ConfigParser
-import ast
-from re import sub
 import logging
+import warnings
+from re import sub
 
 import numpy as np
-import pandas as pd
-from psm_utils.io.peptide_record import peprec_to_proforma
-from psm_utils.psm import PSM
-from psm_utils.psm_list import PSMList
+from psm_utils.psm import Peptidoform
 from pyteomics import mass
 
 logger = logging.getLogger(__name__)
 
 
-class FeatExtractor:
-    """
-    Place holder, fill later
-
-    Parameters
-    ----------
-
-    Returns
-    -------
-
-    """
-
-    def __init__(
-        self,
-        main_path=os.path.dirname(os.path.realpath(__file__)),
-        lib_path_mod=os.path.join(
-            os.path.dirname(os.path.realpath(__file__)), "unimod/"
-        ),
-        lib_aa_composition=os.path.join(
-            os.path.dirname(os.path.realpath(__file__)), "aa_comp_rel.csv"
-        ),
-        split_size=7,
-        verbose=True,
-        include_specific_posses=[0, 1, 2, 3, 4, 5, 6, -1, -2, -3, -4, -5, -6, -7],
-        add_sum_feat=False,
-        ptm_add_feat=False,
-        ptm_subtract_feat=False,
-        add_rolling_feat=False,
-        include_unnormalized=True,
-        add_comp_feat=False,
-        cnn_feats=True,
-        ignore_mods=False,
-        config_file=None,
-    ):
-        # if a config file is defined overwrite standard parameters
-        if config_file:
-            cparser = ConfigParser()
-            cparser.read(config_file)
-            lib_path_mod = cparser.get("featExtractor", "lib_path_mod").strip('"')
-            split_size = cparser.getint("featExtractor", "split_size")
-            verbose = cparser.getboolean("featExtractor", "verbose")
-            add_sum_feat = cparser.getboolean("featExtractor", "add_sum_feat")
-            ptm_add_feat = cparser.getboolean("featExtractor", "ptm_add_feat")
-            ptm_subtract_feat = cparser.getboolean("featExtractor", "ptm_subtract_feat")
-            add_rolling_feat = cparser.getboolean("featExtractor", "add_rolling_feat")
-            include_unnormalized = cparser.getboolean(
-                "featExtractor", "include_unnormalized"
-            )
-            include_specific_posses = ast.literal_eval(
-                cparser.get("featExtractor", "include_specific_posses")
-            )
-
-        self.main_path = main_path
-
-        # Get the atomic composition of AAs
-        self.lib_aa_composition = self.get_aa_composition(lib_aa_composition)
-
-        self.split_size = split_size
-        self.verbose = verbose
-
-        self.add_sum_feat = add_sum_feat
-        self.ptm_add_feat = ptm_add_feat
-        self.ptm_subtract_feat = ptm_subtract_feat
-        self.add_rolling_feat = add_rolling_feat
-        self.cnn_feats = cnn_feats
-        self.include_unnormalized = include_unnormalized
-        self.include_specific_posses = include_specific_posses
-        self.add_comp_feat = add_comp_feat
-        self.ignore_mods = ignore_mods
-
-    def __str__(self):
-        return """
-  _____                  _      _____       __           _               _                  _
- |  __ \                | |    / ____|     / _|         | |             | |                | |
- | |  | | ___  ___ _ __ | |   | |   ______| |_ ___  __ _| |_    _____  _| |_ _ __ __ _  ___| |_ ___  _ __
- | |  | |/ _ \/ _ \ '_ \| |   | |  |______|  _/ _ \/ _` | __|  / _ \ \/ / __| '__/ _` |/ __| __/ _ \| '__|
- | |__| |  __/  __/ |_) | |___| |____     | ||  __/ (_| | |_  |  __/>  <| |_| | | (_| | (__| || (_) | |
- |_____/ \___|\___| .__/|______\_____|    |_| \___|\__,_|\__|  \___/_/\_\\__|_|  \__,_|\___|\__\___/|_|
-                  | |
-                  |_|
-        """
-
-    def get_aa_composition(self, file_loc):
-        """
-        Read amino acid atomic composition and return a dictionary
-
-        Parameters
-        ----------
-        file_loc : str
-            location of the (csv) file that contains the atomic compositions of AAs. The first column must contain
-            the one-letter AA code. The remaining columns contain counts for each atom (each atom in seperate
-            column). An example is:
-
-                aa,C,H,N,O,S
-                A,1,2,0,0,0
-                R,4,9,3,0,0
-                N,2,3,1,1,0
-
-        Returns
-        -------
-        dict
-            dictionary that goes from one-letter AA to atomic composition
-        """
-        return pd.read_csv(file_loc, index_col=0).T.to_dict()
-
-    def split_seq(self, a, n):
-        """
-        Split a list (a) into multiple chunks (n)
-
-        Parameters
-        ----------
-        a : list
-            list to split
-        n : list
-            number of chunks
-
-        Returns
-        -------
-        list
-            chunked list
-        """
-        # since chunking is not alway possible do the modulo of residues
-        k, m = divmod(len(a), n)
-        return (a[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n))
-
-    def calc_feats_mods(self, formula):
-        """
-        Chemical formula to atom addition/subtraction
-
-        Parameters
-        ----------
-        formula : str
-            chemical formula
-
-        Returns
-        -------
-        list
-            atom naming
-        list
-            number of atom added/subtracted
-        """
-        if not formula:
-            return [], []
-        if len(str(formula)) == 0:
-            return [], []
-        if not isinstance(formula, str):
-            if math.isnan(formula):
-                return [], []
-
-        new_atoms = []
-        new_num_atoms = []
-        for atom in formula.split(" "):
-            if "(" not in atom:
-                atom_symbol = atom
-                num_atom = 1
-            else:
-                atom_symbol = atom.split("(")[0]
-                num_atom = atom.split("(")[1].rstrip(")")
-            new_atoms.append(atom_symbol)
-            new_num_atoms.append(int(num_atom))
-        return new_atoms, new_num_atoms
-
-    def get_feats_mods(
-        self,
-        seqs,
-        mods,
-        identifiers,
-        split_size=False,
-        atoms_order=set(["H", "C", "N", "O", "P", "S"]),
-        add_str="_sum",
-        subtract_mods=False,
-    ):
-        """
-        Chemical formula to atom addition/subtraction
-
-        Parameters
-        ----------
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods : list
-            naming of the mods; should correspond to seqs and identifiers
-        identifiers : str
-            identifiers of the peptides; should correspond to seqs and mods
-        split_size : int
-            overwrite the set split size if needed
-        atoms_order : set
-            atoms to include and the order
-        add_str : str
-            add this substring to feature naming
-        subtract_mods : boolean
-            calculate the atom that are substracted in the PTM
-
-        Returns
-        -------
-        object :: pd.DataFrame
-            feature matrix for peptide PTMs
-        """
-        if not split_size:
-            split_size = self.split_size
-        if self.verbose:
-            t0 = time.time()
-        mod_dict = {}
-        look_up_mod_subtract = {}
-        look_up_mod_add = {}
-
-        len_init = len(
-            [ao + str(spl_s) for spl_s in range(split_size) for ao in atoms_order]
-        )
-        for index_name, mod, seq in zip(identifiers, mods, seqs):
-            mod_dict[index_name] = dict(
-                zip(
-                    [
-                        ao + str(spl_s) + add_str
-                        for spl_s in range(split_size)
-                        for ao in atoms_order
-                    ],
-                    [0] * len_init,
-                )
-            )
-
-            if not mod:
-                continue
-            if len(str(mod)) == 0:
-                continue
-            if not isinstance(mod, str):
-                if math.isnan(mod):
-                    continue
-
-        if self.verbose:
-            logger.debug(
-                "Time to calculate mod features: %s seconds" % (time.time() - t0)
-            )
-        df_ret = pd.DataFrame(mod_dict, dtype=int).T
-        del mod_dict
-        return df_ret
-
-    def encode_atoms(
-        self,
-        psm_list,
-        indexes,
-        charges=[],
-        predict_ccs=False,
-        padding_length=60,
-        positions=set([0, 1, 2, 3, -1, -2, -3, -4]),
-        positions_pos=set([0, 1, 2, 3]),
-        positions_neg=set([-1, -2, -3, -4]),
-        sum_mods=2,
-        dict_aa={
-            "K": 0,
-            "R": 1,
-            "P": 2,
-            "T": 3,
-            "N": 4,
-            "A": 5,
-            "Q": 6,
-            "V": 7,
-            "S": 8,
-            "G": 9,
-            "I": 10,
-            "L": 11,
-            "C": 12,
-            "M": 13,
-            "H": 14,
-            "F": 15,
-            "Y": 16,
-            "W": 17,
-            "E": 18,
-            "D": 19,
-        },
-        dict_index_pos={"C": 0, "H": 1, "N": 2, "O": 3, "S": 4, "P": 5},
-        dict_index_all={"C": 0, "H": 1, "N": 2, "O": 3, "S": 4, "P": 5},
-        dict_index={"C": 0, "H": 1, "N": 2, "O": 3, "S": 4, "P": 5},
-    ):
-        """
-        Extract all features we can extract... Probably the function you want to call by default
-
-        Parameters
-        ----------
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods_all : list
-            naming of the mods; should correspond to seqs and identifiers
-        indexes : list
-            identifiers of the peptides; should correspond to seqs and mods
-        padding_length : int
-            indicates padding length with 'X'-characters. Shorter sequences are padded. Longer sequences
-            are sliced shorter (C-terminal > than padding length will be missing)
-        positions : list
-            list of positions to include separately, for the C-terminus
-            provide negative indices
-        sum_mods : int
-            value that is used to feed the second head of cerberus with summed information, for example,
-            a value of 2 would sum the composition of each 2 AAs (i.e. 1+2, 3+4, 5+6 ...)
-        dict_index_pos : dict
-            index position of atom for positional features
-        dict_index_all : dict
-            index position of atom for overall compositional features
-        dict_index : dict
-            index position of atom for compositional features for the whole peptide (each position)
-        charges : list
-            optional list with charges, keep empty if these will not effect the predicted value
-
-        Returns
-        -------
-        object :: pd.DataFrame
-            feature matrix (np.matrix) of all positions (up till padding length)
-        object :: pd.DataFrame
-            feature matrix (np.matrix) of summed positions (up till padding length / sum_mods)
-        object :: pd.DataFrame
-            feature matrix (np.matrix) of specific positions (from positions argument)
-        object :: pd.DataFrame
-            feature matrix (np.matrix) of summed composition
-        """
-
-        # Local helper to ensure each unique warning is logged only once.
-        logged_warnings = set()
-
-        def warn_once(message):
-            if message not in logged_warnings:
-                logged_warnings.add(message)
-                logger.warning(message)
-
-        # TODO param flag for CCS prediction
-        def rolling_sum(a, n=2):
-            ret = np.cumsum(a, axis=1, dtype=np.float32)
-            ret[:, n:] = ret[:, n:] - ret[:, :-n]
-            return ret[:, n - 1 :]
-
-        ret_list = {}
-        ret_list["matrix"] = {}
-        ret_list["matrix_sum"] = {}
-        ret_list["matrix_all"] = {}
-        ret_list["pos_matrix"] = {}
-        ret_list["matrix_hc"] = {}
-
-        # Iterate over all instances
-        for psm, row_index in zip(psm_list, indexes):
-            peptidoform = psm.peptidoform
-            seq = peptidoform.sequence
-            seq_len = len(seq)
-            charge = psm.get_precursor_charge()
-
-            # For now anything longer than padding length is cut away
-            # (C-terminal cutting)
-            if seq_len > padding_length:
-                seq = seq[0:padding_length]
-                seq_len = len(seq)
-                warn_once("Truncating peptide (too long): %s" % (seq))
-
-            peptide_composition = [mass.std_aa_comp[aa] for aa in seq]
-
-            # Initialize all feature matrices
-            matrix = np.zeros(
-                (padding_length, len(dict_index.keys())), dtype=np.float16
-            )
-            matrix_hc = np.zeros(
-                (padding_length, len(dict_aa.keys())), dtype=np.float16
-            )
-            matrix_pos = np.zeros(
-                (len(positions), len(dict_index.keys())), dtype=np.float16
-            )
-
-            for i, position_composition in enumerate(peptide_composition):
-                for k, v in position_composition.items():
-                    try:
-                        matrix[i, dict_index[k]] = v
-                    except IndexError:
-                        warn_once(
-                            f"Could not add the following value: pos {i} for atom {k} with value {v}"
-                        )
-                    except KeyError:
-                        warn_once(
-                            f"Could not add the following value: pos {i} for atom {k} with value {v}"
-                        )
-
-            for p in positions_pos:
-                try:
-                    aa = seq[p]
-                except:
-                    warn_once(f"Unable to get the following position: {p}")
-                    continue
-                for atom, val in mass.std_aa_comp[aa].items():
-                    try:
-                        matrix_pos[p, dict_index_pos[atom]] = val
-                    except KeyError:
-                        warn_once(f"Could not add the following atom: {atom}")
-                    except IndexError:
-                        warn_once(f"Could not add the following atom: {p} {atom} {val}")
-
-            for pn in positions_neg:
-                try:
-                    aa = seq[seq_len + pn]
-                except:
-                    warn_once(f"Unable to get the following position: {p}")
-                    continue
-                for atom, val in mass.std_aa_comp[aa].items():
-                    try:
-                        matrix_pos[pn, dict_index_pos[atom]] = val
-                    except KeyError:
-                        warn_once(f"Could not add the following atom: {atom}")
-                    except IndexError:
-                        warn_once(
-                            f"Could not add the following atom: {pn} {atom} {val}"
-                        )
-
-            for i, peptide_position in enumerate(peptidoform.parsed_sequence):
-                try:
-                    matrix_hc[i, dict_aa[peptide_position[0]]] = 1.0
-                except KeyError:
-                    warn_once(
-                        f"Skipping the following (not in library): {i} {peptide_position}"
-                    )
-                except IndexError:
-                    warn_once(
-                        f"Could not add the following atom: {i} {peptide_position}"
-                    )
-
-                if peptide_position[1] is not None:
-                    try:
-                        modification_composition = peptide_position[1][0].composition
-                    except KeyError:
-                        warn_once(
-                            f"Skipping the following (not in library): {peptide_position[1]}"
-                        )
-                        continue
-                    except:
-                        warn_once(
-                            f"Skipping the following (not in library): {peptide_position[1]}"
-                        )
-                        continue
-
-                    for (
-                        atom_position_composition,
-                        atom_change,
-                    ) in modification_composition.items():
-                        try:
-                            matrix[
-                                i, dict_index[atom_position_composition]
-                            ] += atom_change
-                            if i in positions:
-                                matrix_pos[
-                                    i, dict_index_pos[atom_position_composition]
-                                ] += atom_change
-                            elif i - seq_len in positions:
-                                matrix_pos[
-                                    i - seq_len,
-                                    dict_index_pos[atom_position_composition],
-                                ] += atom_change
-                        except KeyError:
-                            try:
-                                warn_once(
-                                    f"Could not add the following atom: {atom_position_composition}, attempting to replace the [] part"
-                                )
-                                atom_position_composition = sub(
-                                    "\[.*?\]", "", atom_position_composition
-                                )
-                                matrix[
-                                    i, dict_index[atom_position_composition]
-                                ] += atom_change
-                                if i in positions:
-                                    matrix_pos[
-                                        i, dict_index_pos[atom_position_composition]
-                                    ] += atom_change
-                                elif i - seq_len in positions:
-                                    matrix_pos[
-                                        i - seq_len,
-                                        dict_index_pos[atom_position_composition],
-                                    ] += atom_change
-                            except KeyError:
-                                warn_once(
-                                    f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored"
-                                )
-                                continue
-                            except:
-                                warn_once(
-                                    f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored"
-                                )
-                                continue
-                        except IndexError:
-                            warn_once(
-                                f"Could not add the following atom: {i} {atom_position_composition} {atom_change}"
-                            )
-                        except:
-                            warn_once(
-                                f"Could not add the following atom: {atom_position_composition}, second attempt, now ignored"
-                            )
-                            continue
-
-            matrix_all = np.sum(matrix, axis=0)
-            matrix_all = np.append(matrix_all, seq_len)
-
-            if predict_ccs:
-                matrix_all = np.append(matrix_all, (seq.count("H")) / float(seq_len))
-                matrix_all = np.append(
-                    matrix_all,
-                    (seq.count("F") + seq.count("W") + seq.count("Y")) / float(seq_len),
-                )
-                matrix_all = np.append(
-                    matrix_all, (seq.count("D") + seq.count("E")) / float(seq_len)
-                )
-                matrix_all = np.append(
-                    matrix_all, (seq.count("K") + seq.count("R")) / float(seq_len)
-                )
-                matrix_all = np.append(matrix_all, charge)
-
-            matrix_sum = rolling_sum(matrix.T, n=2)[:, ::2].T
-
-            ret_list["matrix"][row_index] = matrix
-            ret_list["matrix_sum"][row_index] = matrix_sum
-            ret_list["matrix_all"][row_index] = matrix_all
-            ret_list["pos_matrix"][row_index] = matrix_pos.flatten()
-            ret_list["matrix_hc"][row_index] = matrix_hc
-
-        return ret_list
-
-    def full_feat_extract(
-        self,
-        psm_list=[],
-        seqs=[],
-        mods=[],
-        predict_ccs=False,
-        identifiers=[],
-        charges=[],
-    ):
-        """
-        Extract all features we can extract... Probably the function your want to call by default
-
-        Parameters
-        ----------
-        seqs : list
-            peptide sequence list; should correspond to mods and identifiers
-        mods : list
-            naming of the mods; should correspond to seqs and identifiers
-        identifiers : str
-            identifiers of the peptides; should correspond to seqs and mods
-        charges : list
-            optional list with charges, keep emtpy if these will not effect the predicted value
-
-        Returns
-        -------
-        pd.DataFrame
-            feature matrix
-        """
-        # TODO Reintroduce for CCS, check CCS flag
-        if len(seqs) > 0:
-            list_of_psms = []
-            for seq, mod, id in zip(seqs, mods, identifiers):
-                list_of_psms.append(
-                    PSM(peptidoform=peprec_to_proforma(seq, mod), spectrum_id=id)
-                )
-            psm_list = PSMList(psm_list=list_of_psms)
-
-        if self.verbose:
-            t0 = time.time()
-
-        # TODO pass CCS flag
-        if self.add_sum_feat:
-            if self.verbose:
-                logger.debug("Extracting compositional sum features for modifications")
-            X_feats_sum = self.get_feats_mods(psm_list, split_size=1, add_str="_sum")
-        if self.ptm_add_feat:
-            if self.verbose:
-                logger.debug("Extracting compositional add features for modifications")
-            X_feats_add = self.get_feats_mods(
-                psm_list, split_size=self.split_size, add_str="_add"
-            )
-        if self.ptm_subtract_feat:
-            if self.verbose:
-                logger.debug(
-                    "Extracting compositional subtract features for modifications"
-                )
-            X_feats_neg = self.get_feats_mods(
-                psm_list,
-                split_size=self.split_size,
-                add_str="_subtract",
-                subtract_mods=True,
-            )
-        if self.cnn_feats:
-            if self.verbose:
-                logger.debug("Extracting CNN features")
-            X_cnn = self.encode_atoms(
-                psm_list,
-                list(range(len(psm_list))),
-                charges=charges,
-                predict_ccs=predict_ccs,
-            )
-
-        if self.cnn_feats:
-            X = X_cnn
-        if self.add_sum_feat:
+# fmt: off
+DEFAULT_POSITIONS: set[int] = {0, 1, 2, 3, -1, -2, -3, -4}
+DEFAULT_POSITIONS_POS: set[int] = {0, 1, 2, 3}
+DEFAULT_POSITIONS_NEG: set[int] = {-1, -2, -3, -4}
+DEFAULT_DICT_AA: dict[str, int] = {
+    "K": 0, "R": 1, "P": 2, "T": 3, "N": 4, "A": 5, "Q": 6, "V": 7, "S": 8, "G": 9, "I": 10,
+    "L": 11, "C": 12, "M": 13, "H": 14, "F": 15, "Y": 16, "W": 17, "E": 18, "D": 19,
+}
+DEFAULT_DICT_INDEX_POS: dict[str, int] = {"C": 0, "H": 1, "N": 2, "O": 3, "S": 4, "P": 5}
+DEFAULT_DICT_INDEX: dict[str, int] = {"C": 0, "H": 1, "N": 2, "O": 3, "S": 4, "P": 5}
+# fmt: on
+
+
+def _truncate_sequence(seq: str, max_length: int) -> tuple[str, int]:
+    """Truncate the sequence if it exceeds the max_length."""
+    if len(seq) > max_length:
+        warnings.warn(f"Truncating peptide (too long): {seq}", stacklevel=2)
+        seq = seq[:max_length]
+    return seq, len(seq)
+
+
+def _fill_standard_matrix(seq: str, padding_length: int, dict_index: dict[str, int]) -> np.ndarray:
+    """Fill the standard composition matrix using mass.std_aa_comp."""
+    mat = np.zeros((padding_length, len(dict_index)), dtype=np.float16)
+    for i, aa in enumerate(seq):
+        for atom, value in mass.std_aa_comp[aa].items():
             try:
-                X = pd.concat([X, X_feats_sum], axis=1)
-            except BaseException:
-                X = X_feats_sum
-        if self.ptm_add_feat:
+                mat[i, dict_index[atom]] = value
+            except (KeyError, IndexError):
+                warnings.warn(f"Skipping atom {atom} at pos {i}", stacklevel=2)
+    return mat
+
+
+def _fill_onehot_matrix(
+    parsed_seq: list, padding_length: int, dict_aa: dict[str, int]
+) -> np.ndarray:
+    """Fill a one-hot matrix based on the parsed sequence tokens."""
+    onehot = np.zeros((padding_length, len(dict_aa)), dtype=np.float16)
+    for i, token in enumerate(parsed_seq):
+        try:
+            onehot[i, dict_aa[token[0]]] = 1.0
+        except (KeyError, IndexError):
+            warnings.warn(f"One-hot skip: {i} {token}", stacklevel=2)
+    return onehot
+
+
+def _fill_pos_matrix(
+    seq: str,
+    seq_len: int,
+    positions_pos: set[int],
+    positions_neg: set[int],
+    dict_index: dict[str, int],
+    dict_index_pos: dict[str, int],
+) -> np.ndarray:
+    """Fill positional matrix for atoms at specific positions."""
+    pos_total = positions_pos.union(positions_neg)
+    pos_mat = np.zeros((max(pos_total) - min(pos_total) + 1, len(dict_index)), dtype=np.float16)
+    # For positive positions
+    for pos in positions_pos:
+        try:
+            aa = seq[pos]
+        except Exception:
+            warnings.warn(f"Unable to get pos {pos}", stacklevel=2)
+            continue
+        for atom, value in mass.std_aa_comp[aa].items():
             try:
-                X = pd.concat([X, X_feats_add], axis=1)
-            except BaseException:
-                X = X_feats_add
-        if self.ptm_subtract_feat:
+                # shift index for matrix row since positions may be negative.
+                pos_mat[pos - min(pos_total), dict_index_pos[atom]] = value
+            except (KeyError, IndexError):
+                warnings.warn(f"Pos matrix skip: {atom} at pos {pos}", stacklevel=2)
+    # For negative positions
+    for pos in positions_neg:
+        try:
+            aa = seq[seq_len + pos]
+        except Exception:
+            warnings.warn(f"Unable to get pos {pos}", stacklevel=2)
+            continue
+        for atom, value in mass.std_aa_comp[aa].items():
             try:
-                X = pd.concat([X, X_feats_neg], axis=1)
-            except BaseException:
-                X = X_feats_neg
-
-        if self.verbose:
-            logger.debug(
-                "Time to calculate all features: %s seconds" % (time.time() - t0)
-            )
-        return X
-
-
-def main(verbose=True):
-    f_extractor = FeatExtractor(config_file="config.ini")
-    df = pd.read_csv("parse_pride/seqs_exp.csv")
-    df.index = ["Pep_" + str(dfi) for dfi in df.index]
-    print(f_extractor.full_feat_extract(df["seq"], df["modifications"], df.index))
-
+                pos_mat[pos - min(pos_total), dict_index_pos[atom]] = value
+            except (KeyError, IndexError):
+                warnings.warn(f"Pos matrix skip: {atom} at neg pos {pos}", stacklevel=2)
+    return pos_mat
+
+
+def _apply_modifications(
+    mat: np.ndarray,
+    pos_mat: np.ndarray,
+    parsed_seq: list,
+    seq_len: int,
+    dict_index: dict[str, int],
+    dict_index_pos: dict[str, int],
+    positions: set[int],
+) -> None:
+    """Apply modification changes to the matrices."""
+    for i, token in enumerate(parsed_seq):
+        if token[1] is None:
+            continue
+        try:
+            mod_comp = token[1][0].composition
+        except Exception:
+            warnings.warn(f"Skipping mod at pos {i}: {token[1]}", stacklevel=2)
+            continue
+        for atom_comp, change in mod_comp.items():
+            try:
+                mat[i, dict_index[atom_comp]] += change
+                if i in positions:
+                    pos_mat[i, dict_index_pos[atom_comp]] += change
+                elif (i - seq_len) in positions:
+                    pos_mat[i - seq_len, dict_index_pos[atom_comp]] += change
+            except KeyError:
+                try:
+                    warnings.warn(f"Replacing pattern for atom: {atom_comp}", stacklevel=2)
+                    atom_comp_clean = sub(r"\[.*?\]", "", atom_comp)
+                    mat[i, dict_index[atom_comp_clean]] += change
+                    if i in positions:
+                        pos_mat[i, dict_index_pos[atom_comp_clean]] += change
+                    elif (i - seq_len) in positions:
+                        pos_mat[i - seq_len, dict_index_pos[atom_comp_clean]] += change
+                except KeyError:
+                    warnings.warn(f"Ignoring atom {atom_comp} at pos {i}", stacklevel=2)
+                    continue
+            except IndexError:
+                warnings.warn(f"Index error for atom {atom_comp} at pos {i}", stacklevel=2)
+
+
+def _compute_rolling_sum(matrix: np.ndarray, n: int = 2) -> np.ndarray:
+    """Compute a rolling sum over the matrix."""
+    ret = np.cumsum(matrix, axis=1, dtype=np.float32)
+    ret[:, n:] = ret[:, n:] - ret[:, :-n]
+    return ret[:, n - 1 :]
+
+
+def encode_peptidoform(
+    peptidoform: Peptidoform | str,
+    predict_ccs: bool = False,
+    padding_length: int = 60,
+    positions: set[int] | None = None,
+    positions_pos: set[int] | None = None,
+    positions_neg: set[int] | None = None,
+    dict_aa: dict[str, int] | None = None,
+    dict_index_pos: dict[str, int] | None = None,
+    dict_index: dict[str, int] | None = None,
+) -> dict[str, np.ndarray]:
+    """Extract features from a single peptidoform."""
+    positions = positions or DEFAULT_POSITIONS
+    positions_pos = positions_pos or DEFAULT_POSITIONS_POS
+    positions_neg = positions_neg or DEFAULT_POSITIONS_NEG
+    dict_aa = dict_aa or DEFAULT_DICT_AA
+    dict_index_pos = dict_index_pos or DEFAULT_DICT_INDEX_POS
+    dict_index = dict_index or DEFAULT_DICT_INDEX
+
+    if isinstance(peptidoform, str):
+        peptidoform = Peptidoform(peptidoform)
+    seq = peptidoform.sequence
+    charge = peptidoform.precursor_charge
+    seq, seq_len = _truncate_sequence(seq, padding_length)
+
+    std_matrix = _fill_standard_matrix(seq, padding_length, dict_index)
+    onehot_matrix = _fill_onehot_matrix(peptidoform.parsed_sequence, padding_length, dict_aa)
+    pos_matrix = _fill_pos_matrix(
+        seq, seq_len, positions_pos, positions_neg, dict_index, dict_index_pos
+    )
+    _apply_modifications(
+        std_matrix,
+        pos_matrix,
+        peptidoform.parsed_sequence,
+        seq_len,
+        dict_index,
+        dict_index_pos,
+        positions,
+    )
+
+    matrix_all = np.sum(std_matrix, axis=0)
+    matrix_all = np.append(matrix_all, seq_len)
+    if predict_ccs:
+        matrix_all = np.append(matrix_all, (seq.count("H")) / seq_len)
+        matrix_all = np.append(
+            matrix_all, (seq.count("F") + seq.count("W") + seq.count("Y")) / seq_len
+        )
+        matrix_all = np.append(matrix_all, (seq.count("D") + seq.count("E")) / seq_len)
+        matrix_all = np.append(matrix_all, (seq.count("K") + seq.count("R")) / seq_len)
+        matrix_all = np.append(matrix_all, charge)
+
+    matrix_sum = _compute_rolling_sum(std_matrix.T, n=2)[:, ::2].T
+
+    return {
+        "matrix": std_matrix,
+        "matrix_sum": matrix_sum,
+        "matrix_all": matrix_all,
+        "pos_matrix": pos_matrix.flatten(),
+        "matrix_hc": onehot_matrix,
+    }
+
+
+def aggregate_encodings(
+    encodings: list[dict[str, np.ndarray]],
+) -> dict[str, dict[int, np.ndarray]]:
+    """Aggregate list of encodings into single dictionary."""
+    return {key: {i: enc[key] for i, enc in enumerate(encodings)} for key in encodings[0]}
+
+
+def unpack_features(
+    features: dict[str, np.ndarray],
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Unpack dictionary with features to numpy arrays."""
+    X_sum = np.stack(list(features["matrix_sum"].values()))
+    X_global = np.concatenate(
+        (
+            np.stack(list(features["matrix_all"].values())),
+            np.stack(list(features["pos_matrix"].values())),
+        ),
+        axis=1,
+    )
+    X_hc = np.stack(list(features["matrix_hc"].values()))
+    X_main = np.stack(list(features["matrix"].values()))
 
-if __name__ == "__main__":
-    main()
+    return X_sum, X_global, X_hc, X_main
diff --git a/deeplc/gui.py b/deeplc/gui.py
index ae4cf27..47ee007 100644
--- a/deeplc/gui.py
+++ b/deeplc/gui.py
@@ -1,8 +1,10 @@
 """Graphical user interface."""
+
 import sys
 from multiprocessing import freeze_support
 from pathlib import Path
 import importlib.resources
+
 try:
     from gooey import Gooey, local_resource_path
 except ImportError:
@@ -17,7 +19,7 @@
 
 # Get path to package_data/images
 # Workaround with parent of specific file required for Python 3.9+ support
-with importlib.resources.path(img_module, 'config_icon.png') as resource:
+with importlib.resources.path(img_module, "config_icon.png") as resource:
     _IMG_DIR = Path(resource).parent
 
 
@@ -27,12 +29,13 @@
     tabbed_groups=True,
     default_size=(720, 480),
     monospace_display=True,
-    target=None if getattr(sys, 'frozen', False) else "deeplc-gui"
+    target=None if getattr(sys, "frozen", False) else "deeplc-gui",
 )
 def start_gui():
     """Run main with GUI enabled."""
     freeze_support()  # Required for multiprocessing with PyInstaller
     main(gui=True)
 
+
 if __name__ == "__main__":
     start_gui()
diff --git a/deeplc/mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.keras b/deeplc/mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.keras
deleted file mode 100644
index 1a1c6af..0000000
Binary files a/deeplc/mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.keras and /dev/null differ
diff --git a/deeplc/mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.pt b/deeplc/mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.pt
new file mode 100644
index 0000000..82399a4
Binary files /dev/null and b/deeplc/mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.pt differ
diff --git a/deeplc/mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.keras b/deeplc/mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.keras
deleted file mode 100644
index 7016886..0000000
Binary files a/deeplc/mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.keras and /dev/null differ
diff --git a/deeplc/mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.pt b/deeplc/mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.pt
new file mode 100644
index 0000000..2d67b07
Binary files /dev/null and b/deeplc/mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.pt differ
diff --git a/deeplc/mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.keras b/deeplc/mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.keras
deleted file mode 100644
index e2d23ba..0000000
Binary files a/deeplc/mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.keras and /dev/null differ
diff --git a/deeplc/mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.pt b/deeplc/mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.pt
new file mode 100644
index 0000000..292a21a
Binary files /dev/null and b/deeplc/mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.pt differ
diff --git a/deeplc/onnx_model.py b/deeplc/onnx_model.py
new file mode 100644
index 0000000..bef6e9b
--- /dev/null
+++ b/deeplc/onnx_model.py
@@ -0,0 +1,31 @@
+
+import os
+from tensorflow.keras.models import load_model
+import tensorflow as tf
+import tf2onnx
+from onnx2torch import convert
+import torch
+
+deeplc_dir = os.path.dirname(os.path.realpath(__file__))
+DEFAULT_MODELS = [
+    "mods/full_hc_PXD005573_pub_1fd8363d9af9dcad3be7553c39396960.keras",
+    "mods/full_hc_PXD005573_pub_8c22d89667368f2f02ad996469ba157e.keras",
+    "mods/full_hc_PXD005573_pub_cb975cfdd4105f97efa0b3afffe075cc.keras",
+]
+DEFAULT_MODELS = [os.path.join(deeplc_dir, dm) for dm in DEFAULT_MODELS]
+def _convert_to_onnx():
+    for model_path in DEFAULT_MODELS:
+        if os.path.exists(model_path):
+            mod = load_model(model_path)
+            spec = [
+                tf.TensorSpec([None, 60, 6], tf.float32, name="input_1"),
+                tf.TensorSpec([None, 30, 6], tf.float32, name="input_2"),
+                tf.TensorSpec([None, 55], tf.float32, name="input_3"),
+                tf.TensorSpec([None, 60, 20], tf.float32, name="input_4"),
+            ]
+            onnx_model, _ = tf2onnx.convert.from_keras(mod, input_signature=spec, opset=13)
+            torch_model = convert(onnx_model)
+            torch.save(torch_model, model_path.replace(".keras", ".pt"))
+
+if __name__ == "__main__":
+    _convert_to_onnx()
\ No newline at end of file
diff --git a/deeplc/plot.py b/deeplc/plot.py
index fd337d0..a4b2a37 100644
--- a/deeplc/plot.py
+++ b/deeplc/plot.py
@@ -12,7 +12,7 @@ def scatter(
     observed_column: str = "Observed retention time",
     xaxis_label: str = "Observed retention time",
     yaxis_label: str = "Predicted retention time",
-    plot_title: str = "Predicted vs. observed retention times"
+    plot_title: str = "Predicted vs. observed retention times",
 ) -> go.Figure:
     """
     Plot a scatter plot of the predicted vs. observed retention times.
@@ -42,15 +42,15 @@ def scatter(
         y=predicted_column,
         opacity=0.3,
     )
-    
+
     # Draw diagonal line
     fig.add_scatter(
         x=[min(df[observed_column]), max(df[observed_column])],
         y=[min(df[observed_column]), max(df[observed_column])],
-        mode='lines',
-        line=dict(color='red', width=3, dash='dash'),
+        mode="lines",
+        line=dict(color="red", width=3, dash="dash"),
     )
-    
+
     # Hide legend
     fig.update_layout(
         title=plot_title,
@@ -58,7 +58,7 @@ def scatter(
         xaxis_title=xaxis_label,
         yaxis_title=yaxis_label,
     )
-    
+
     return fig
 
 
@@ -85,9 +85,7 @@ def distribution_baseline(
     """
     # Get baseline data
     baseline_df = pd.read_csv(
-        Path(__file__)
-        .absolute()
-        .parent.joinpath("baseline_performance/baseline_predictions.csv")
+        Path(__file__).absolute().parent.joinpath("baseline_performance/baseline_predictions.csv")
     )
     baseline_df["rel_mae_best"] = baseline_df[
         ["rel_mae_transfer_learning", "rel_mae_new_model", "rel_mae_calibrate"]
@@ -97,9 +95,7 @@ def distribution_baseline(
     # Calculate current RMAE and percentile compared to baseline
     mae = sum(abs(df[observed_column] - df[predicted_column])) / len(df.index)
     mae_rel = (mae / max(df[observed_column])) * 100
-    percentile = round(
-        (baseline_df["rel_mae_transfer_learning"] < mae_rel).mean() * 100, 1
-    )
+    percentile = round((baseline_df["rel_mae_transfer_learning"] < mae_rel).mean() * 100, 1)
 
     # Calculate x-axis range with 5% padding
     all_values = np.append(baseline_df["rel_mae_transfer_learning"].values, mae_rel)
@@ -140,10 +136,7 @@ def distribution_baseline(
     )
     fig.update_xaxes(range=[x_min, x_max])
     fig.update_layout(
-        title=(
-            f"Current DeepLC performance compared to {len(baseline_df.index)} "
-            "datasets"
-        ),
+        title=(f"Current DeepLC performance compared to {len(baseline_df.index)} datasets"),
         xaxis_title="Relative mean absolute error (%)",
     )
 
diff --git a/deeplc/trainl3.py b/deeplc/trainl3.py
index a28e083..1682485 100644
--- a/deeplc/trainl3.py
+++ b/deeplc/trainl3.py
@@ -13,8 +13,8 @@
 See the License for the specific language governing permissions and
 limitations under the License.
 
-This project was made possible by MASSTRPLAN. MASSTRPLAN received funding 
-from the Marie Sklodowska-Curie EU Framework for Research and Innovation 
+This project was made possible by MASSTRPLAN. MASSTRPLAN received funding
+from the Marie Sklodowska-Curie EU Framework for Research and Innovation
 Horizon 2020, under Grant Agreement No. 675132.
 """
 
@@ -28,7 +28,7 @@
     _has_sklearn = True
 
 
-def train_en(X, y, n_jobs=16, cv=None):
+def train_elastic_net(X, y, n_jobs=16, cv=None):
     """
     Function that trains Layer 3 of CALLC (elastic net)
 
@@ -99,7 +99,7 @@ def train_en(X, y, n_jobs=16, cv=None):
     grid.fit(X, y)
 
     crossv_mod.set_params(**grid.best_params_)
-    
+
     ret_mod.set_params(**grid.best_params_)
     ret_mod.fit(X, y)
 
diff --git a/examples/deeplc_example.py b/examples/deeplc_example.py
index 95a5c5d..2a44b2b 100644
--- a/examples/deeplc_example.py
+++ b/examples/deeplc_example.py
@@ -1,9 +1,10 @@
 # Imports
 import pandas as pd
 from matplotlib import pyplot as plt
+
 from deeplc import DeepLC, FeatExtractor
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # Input files
     peptide_file = "examples/datasets/dia.csv"
 
@@ -14,14 +15,14 @@
     df = df.fillna("")
 
     # Generate some identifiers, any kind of identifiers will do
-    df.index = ["Pep_"+str(dfi) for dfi in df.index]
+    df.index = ["Pep_" + str(dfi) for dfi in df.index]
 
     # Make a feature extraction object.
     # This step can be skipped if you want to use the default feature extraction
     # settings. In this example we will use a model that does not use RDKit features
     # so we skip the chemical descriptor making procedure.
 
-    #f_extractor = FeatExtractor(chem_descr_feat=False,verbose=False)
+    # f_extractor = FeatExtractor(chem_descr_feat=False,verbose=False)
     f_extractor = FeatExtractor(
         add_sum_feat=False,
         ptm_add_feat=False,
@@ -30,26 +31,28 @@
         chem_descr_feat=False,
         add_comp_feat=False,
         cnn_feats=True,
-        verbose=True
+        verbose=True,
     )
     # Initiate a DeepLC instance that will perform the calibration and predictions
     dlc = DeepLC(
-        path_model=["deeplc/mods/full_hc_hela_hf_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-                    "deeplc/mods/full_hc_hela_hf_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-                    "deeplc/mods/full_hc_hela_hf_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-                    "deeplc/mods/full_hc_PXD005573_mcp_cb975cfdd4105f97efa0b3afffe075cc.hdf5"],
+        path_model=[
+            "deeplc/mods/full_hc_hela_hf_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+            "deeplc/mods/full_hc_hela_hf_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+            "deeplc/mods/full_hc_hela_hf_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+            "deeplc/mods/full_hc_PXD005573_mcp_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        ],
         cnn_model=True,
         f_extractor=f_extractor,
         verbose=True,
         write_library=True,
-        use_library="deeplc/library/library.csv"
+        use_library="deeplc/library/library.csv",
     )
 
     df["tr"] = list(range(len(df.index)))
 
     # To demonstrate DeepLC's callibration, we'll induce some an artificial
     # transformation into the retention times
-    df["tr"] = df["tr"]**0.85
+    df["tr"] = df["tr"] ** 0.85
 
     # Calibrate the original model based on the new retention times
     dlc.calibrate_preds(seq_df=df)
@@ -57,19 +60,19 @@
     print("a")
 
     # Make predictions; calibrated and uncalibrated
-    preds_cal = dlc.make_preds(seq_df=df)
+    preds_cal = dlc._make_preds(seq_df=df)
 
     print("b")
 
-    preds_uncal = dlc.make_preds(seq_df=df, calibrate=False)
+    preds_uncal = dlc._make_preds(seq_df=df, calibrate=False)
 
     print("c")
 
     # Compare calibrated and uncalibrated predictions
-    #print("Predictions (calibrated): ", preds_cal)
-    #print("Predictions (uncalibrated): ", preds_uncal)
+    # print("Predictions (calibrated): ", preds_cal)
+    # print("Predictions (uncalibrated): ", preds_uncal)
 
-    plt.scatter(df["tr"],preds_cal,label="Calibrated",s=1)
-    plt.scatter(df["tr"],preds_uncal,label="Uncalibrated",s=1)
+    plt.scatter(df["tr"], preds_cal, label="Calibrated", s=1)
+    plt.scatter(df["tr"], preds_uncal, label="Uncalibrated", s=1)
     plt.legend()
-    plt.savefig('deeplc_calibrated_vs_uncalibrated.png')
+    plt.savefig("deeplc_calibrated_vs_uncalibrated.png")
diff --git a/pyproject.toml b/pyproject.toml
index 31e8fe3..49df3f8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "deeplc"
-version = "3.1.8"
+version = "4.0.0-dev1"
 description = "DeepLC: Retention time prediction for (modified) peptides using Deep Learning."
 readme = "README.md"
 license = { file = "LICENSE" }
@@ -9,6 +9,7 @@ authors = [
     { name = "Niels Hulstaert" },
     { name = "Arthur Declercq" },
     { name = "Ralf Gabriels" },
+    { name = "Alireza Nameni" },
     { name = "Lennart Martens" },
     { name = "Sven Degroeve" },
 ]
@@ -31,11 +32,12 @@ keywords = [
 ]
 
 dependencies = [
-    "tensorflow>=2.15.0,<3",
+    "dask>2023",
+    "torch>=2.6.0,<3",
+    "onnx2torch>=0.1.5",
     "numpy>=1.17",
     "pandas>=0.25",
     "scikit-learn>=1.2.0",
-    "deeplcretrainer>=1,<2",
     "psm_utils>=0.2.3"
 ]
 
@@ -61,3 +63,13 @@ build-backend = "setuptools.build_meta"
 [tool.setuptools]
 packages = ["deeplc"]
 include-package-data = true
+
+[tool.ruff]
+line-length = 99
+target-version = 'py311'
+
+[tool.ruff.lint]
+select = ["E", "F", "UP", "B", "SIM", "I"]
+
+[tool.ruff.format]
+docstring-code-format = true
diff --git a/streamlit/deeplc_streamlit.py b/streamlit/deeplc_streamlit.py
index 399dc5d..fcd574d 100644
--- a/streamlit/deeplc_streamlit.py
+++ b/streamlit/deeplc_streamlit.py
@@ -109,7 +109,8 @@ def _main_page(self):
 
             st.subheader("Prediction speed boost")
             self.user_input["use_library"] = st.checkbox(
-                "Use prediction library for speed-up", help=self.texts.Help.use_library
+                "Use prediction library for speed-up",
+                help=self.texts.Help.use_library,
             )
             st.markdown(self.texts.Help.use_library_agreement)
 
@@ -334,9 +335,9 @@ class Sidebar:
             ---
 
             Currently using the following package versions: <br />
-            [![DeepLC](https://img.shields.io/badge/deeplc-{version('deeplc')}-blue?style=flat-square&logoColor=white&logo=pypi)](https://github.com/compomics/deeplc)
-            [![Tensorflow](https://img.shields.io/badge/tensorflow-{version('tensorflow')}-blue?style=flat-square&logoColor=white&logo=pypi)](https://github.com/tensorflow/tensorflow)
-            [![Streamlit](https://img.shields.io/badge/streamlit-{version('streamlit')}-blue?style=flat-square&logoColor=white&logo=pypi)](https://github.com/streamlit/streamlit)
+            [![DeepLC](https://img.shields.io/badge/deeplc-{version("deeplc")}-blue?style=flat-square&logoColor=white&logo=pypi)](https://github.com/compomics/deeplc)
+            [![Tensorflow](https://img.shields.io/badge/tensorflow-{version("tensorflow")}-blue?style=flat-square&logoColor=white&logo=pypi)](https://github.com/tensorflow/tensorflow)
+            [![Streamlit](https://img.shields.io/badge/streamlit-{version("streamlit")}-blue?style=flat-square&logoColor=white&logo=pypi)](https://github.com/streamlit/streamlit)
 
             Latest DeepLC version:<br />
             ![PyPI](https://img.shields.io/pypi/v/deeplc?style=flat-square&logoColor=white&logo=pypi)
diff --git a/streamlit/streamlit_utils.py b/streamlit/streamlit_utils.py
index 3e6bec5..c3a7df7 100644
--- a/streamlit/streamlit_utils.py
+++ b/streamlit/streamlit_utils.py
@@ -80,4 +80,4 @@ def hide_streamlit_menu():
 @st.cache
 def save_dataframe(df):
     """Save dataframe to file object, with streamlit cache."""
-    return df.to_csv().encode('utf-8')
+    return df.to_csv().encode("utf-8")
diff --git a/test.png b/test.png
deleted file mode 100644
index 4e3138e..0000000
Binary files a/test.png and /dev/null differ
diff --git a/tests/test_deepcallc.py b/tests/test_deepcallc.py
index 5505b51..798be6e 100644
--- a/tests/test_deepcallc.py
+++ b/tests/test_deepcallc.py
@@ -1,77 +1,84 @@
 import pandas as pd
-from deeplc import DeepLC
 from matplotlib import pyplot as plt
 
+from deeplc import DeepLC
+
+
 def main():
     peptide_file = "temp_data/PXD005573_mcp.csv"
     calibration_file = "temp_data/PXD005573_mcp.csv"
 
     pep_df = pd.read_csv(peptide_file, sep=",")
-    pep_df['modifications'] = pep_df['modifications'].fillna("")
+    pep_df["modifications"] = pep_df["modifications"].fillna("")
 
     cal_df = pd.read_csv(calibration_file, sep=",")
-    cal_df['modifications'] = cal_df['modifications'].fillna("")
-    
+    cal_df["modifications"] = cal_df["modifications"].fillna("")
+
     pep_df = pep_df.sample(50)
     cal_df = cal_df.sample(50)
 
-    mods = ["C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_arabidopsis_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            #"C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD005573_mcp_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            #"C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD005573_mcp_8c22d89667368f2f02ad996469ba157e.hdf5",
-            #"C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD005573_mcp_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD008783_median_calibrate_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD008783_median_calibrate_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD008783_median_calibrate_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_dia_fixed_mods_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_dia_fixed_mods_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_dia_fixed_mods_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_phospho_kai_li_5ee8aaa41d387bfffb8cda966348937c.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_phospho_kai_li_8c488fed5e0d0b07cf217fe3c30e55c6.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_phospho_kai_li_f3c75e74dd7b16180edf6f6f0d78a4a6.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_prosit_ptm_2020_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_prosit_ptm_2020_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_prosit_ptm_2020_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_tmt_data_consensus_ticnum_filtered_5ee8aaa41d387bfffb8cda966348937c.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_tmt_data_consensus_ticnum_filtered_8c488fed5e0d0b07cf217fe3c30e55c6.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_120min_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_120min_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_120min_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_arabidopsis_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_arabidopsis_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",            
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_hf_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_hf_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_hf_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_1h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_1h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_1h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_2h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_2h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_2h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_pancreas_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_pancreas_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_pancreas_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_1h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_1h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_1h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_2h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_2h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_2h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_60min_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_60min_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
-            "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_60min_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5"]
-    
-    dlc = DeepLC(write_library=False,
-                 use_library="",
-                 pygam_calibration=True,
-                 deepcallc_mod=True,
-                 path_model=mods,
-                 reload_library=False)
+    mods = [
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_arabidopsis_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        # "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD005573_mcp_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        # "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD005573_mcp_8c22d89667368f2f02ad996469ba157e.hdf5",
+        # "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD005573_mcp_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD008783_median_calibrate_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD008783_median_calibrate_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_PXD008783_median_calibrate_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_dia_fixed_mods_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_dia_fixed_mods_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_dia_fixed_mods_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_phospho_kai_li_5ee8aaa41d387bfffb8cda966348937c.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_phospho_kai_li_8c488fed5e0d0b07cf217fe3c30e55c6.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_phospho_kai_li_f3c75e74dd7b16180edf6f6f0d78a4a6.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_prosit_ptm_2020_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_prosit_ptm_2020_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_prosit_ptm_2020_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_tmt_data_consensus_ticnum_filtered_5ee8aaa41d387bfffb8cda966348937c.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_tmt_data_consensus_ticnum_filtered_8c488fed5e0d0b07cf217fe3c30e55c6.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_120min_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_120min_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_120min_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_arabidopsis_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_arabidopsis_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_hf_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_hf_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_hf_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_1h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_1h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_1h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_2h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_2h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_hela_lumos_2h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_pancreas_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_pancreas_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_pancreas_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_1h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_1h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_1h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_2h_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_2h_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_plasma_lumos_2h_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_60min_psms_aligned_1fd8363d9af9dcad3be7553c39396960.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_60min_psms_aligned_8c22d89667368f2f02ad996469ba157e.hdf5",
+        "C:/Users/robbin/Documents/Github/DeepLCModels/full_hc_yeast_60min_psms_aligned_cb975cfdd4105f97efa0b3afffe075cc.hdf5",
+    ]
+
+    dlc = DeepLC(
+        write_library=False,
+        use_library="",
+        pygam_calibration=True,
+        deepcallc_mod=True,
+        path_model=mods,
+        reload_library=False,
+    )
 
     dlc.calibrate_preds(seq_df=cal_df)
-    preds = dlc.make_preds(seq_df=cal_df)
+    preds = dlc._make_preds(seq_df=cal_df)
 
-    plt.scatter(cal_df["tr"],preds)
+    plt.scatter(cal_df["tr"], preds)
     plt.show()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/tests/test_deeplc.py b/tests/test_deeplc.py
index 4a5af2f..ed60f92 100644
--- a/tests/test_deeplc.py
+++ b/tests/test_deeplc.py
@@ -22,9 +22,9 @@ def _r2_score(y_true, y_pred):
 
 def test_cli_basic():
     """Test command line interface help message."""
-    assert (
-        subprocess.getstatusoutput("deeplc -h")[0] == 0
-    ), "`deeplc -h` returned non-zero status code"
+    assert subprocess.getstatusoutput("deeplc -h")[0] == 0, (
+        "`deeplc -h` returned non-zero status code"
+    )
 
 
 def test_cli_full():
@@ -48,8 +48,10 @@ def test_cli_full():
     train_df = pd.read_csv(file_path_pred)
     model_r2 = _r2_score(train_df["tr"], preds_df["predicted retention time"])
     logging.info("DeepLC R2 score on %s: %f", file_path_pred, model_r2)
-    assert model_r2 > 0.90, f"DeepLC R2 score on {file_path_pred} below 0.9 \
+    assert model_r2 > 0.90, (
+        f"DeepLC R2 score on {file_path_pred} below 0.9 \
 (was {model_r2})"
+    )
 
 
 if __name__ == "__main__":
diff --git a/tests/test_deeplc_lib.py b/tests/test_deeplc_lib.py
index a17748c..c984196 100644
--- a/tests/test_deeplc_lib.py
+++ b/tests/test_deeplc_lib.py
@@ -1,28 +1,29 @@
 import pandas as pd
-from deeplc import DeepLC
 from matplotlib import pyplot as plt
 
+from deeplc import DeepLC
+
+
 def main():
     peptide_file = "examples/datasets/test_pred.csv"
     calibration_file = "examples/datasets/test_train.csv"
 
     pep_df = pd.read_csv(peptide_file, sep=",")
-    pep_df['modifications'] = pep_df['modifications'].fillna("")
+    pep_df["modifications"] = pep_df["modifications"].fillna("")
 
     cal_df = pd.read_csv(calibration_file, sep=",")
-    cal_df['modifications'] = cal_df['modifications'].fillna("")
-
-    dlc = DeepLC(write_library=False,
-                 use_library="",
-                 reload_library=False)
-                 #write_library=True,
-                 #use_library="lib.csv",
-                 #reload_library=True)
+    cal_df["modifications"] = cal_df["modifications"].fillna("")
+
+    dlc = DeepLC(write_library=False, use_library="", reload_library=False)
+    # write_library=True,
+    # use_library="lib.csv",
+    # reload_library=True)
     dlc.calibrate_preds(seq_df=cal_df)
-    preds = dlc.make_preds(seq_df=cal_df)
+    preds = dlc._make_preds(seq_df=cal_df)
 
-    plt.scatter(pep_df["tr"],preds)
+    plt.scatter(pep_df["tr"], preds)
     plt.show()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/tests/test_deeplc_retrain.py b/tests/test_deeplc_retrain.py
index a67c7a4..365819b 100644
--- a/tests/test_deeplc_retrain.py
+++ b/tests/test_deeplc_retrain.py
@@ -1,28 +1,29 @@
 import pandas as pd
-from deeplc import DeepLC
 from matplotlib import pyplot as plt
 
+from deeplc import DeepLC
+
+
 def main():
     peptide_file = "examples/datasets/test_pred.csv"
     calibration_file = "examples/datasets/test_train.csv"
 
     pep_df = pd.read_csv(peptide_file, sep=",")
-    pep_df['modifications'] = pep_df['modifications'].fillna("")
+    pep_df["modifications"] = pep_df["modifications"].fillna("")
 
     cal_df = pd.read_csv(calibration_file, sep=",")
-    cal_df['modifications'] = cal_df['modifications'].fillna("")
-
-    dlc = DeepLC(write_library=False,
-                 deeplc_retrain=True,
-                 reload_library=False)
-                 #write_library=True,
-                 #use_library="lib.csv",
-                 #reload_library=True)
+    cal_df["modifications"] = cal_df["modifications"].fillna("")
+
+    dlc = DeepLC(write_library=False, deeplc_retrain=True, reload_library=False)
+    # write_library=True,
+    # use_library="lib.csv",
+    # reload_library=True)
     dlc.calibrate_preds(seq_df=cal_df)
-    preds = dlc.make_preds(seq_df=cal_df)
+    preds = dlc._make_preds(seq_df=cal_df)
 
-    plt.scatter(cal_df["tr"],preds)
+    plt.scatter(cal_df["tr"], preds)
     plt.show()
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/tests/test_feat_extractor.py b/tests/test_feat_extractor.py
new file mode 100644
index 0000000..8ecc39e
--- /dev/null
+++ b/tests/test_feat_extractor.py
@@ -0,0 +1,93 @@
+import numpy as np
+from psm_utils.psm import Peptidoform
+
+from deeplc.feat_extractor import encode_peptidoform
+
+
+def _check_result_structure(result: dict[str, np.ndarray]) -> None:
+    expected_keys = {"matrix", "matrix_sum", "matrix_all", "pos_matrix", "matrix_hc"}
+    assert expected_keys.issubset(result.keys())
+
+
+def test_encode_peptidoform_without_modification():
+    # Test using a simple peptidoform string without modifications.
+    peptide = "ACDE/2"
+    result = encode_peptidoform(peptide)
+    _check_result_structure(result)
+
+    # Default padding_length is 60 and there are 6 atoms in DEFAULT_DICT_INDEX.
+    assert result["matrix"].shape == (60, 6)
+    # matrix_all is formed by summing matrix (length 6) and appending the sequence length.
+    # So without CCS prediction, matrix_all length should be 7.
+    assert result["matrix_all"].shape == (7,)
+
+    # The sequence is taken from the part before the '/'.
+    expected_seq = "ACDE"
+    # The last element of matrix_all should be the sequence length.
+    assert result["matrix_all"][-1] == len(expected_seq)
+
+
+def test_encode_peptidoform_with_modification():
+    # Test using a peptidoform string with a modification.
+    peptide = "AC[Carbamidomethyl]DE/2"
+    result = encode_peptidoform(peptide)
+    _check_result_structure(result)
+
+    # Check basic structure as before.
+    assert result["matrix"].shape == (60, 6)
+    expected_seq = "ACDE"  # Modification does not alter the base sequence.
+    assert result["matrix_all"][-1] == len(expected_seq)
+
+
+def test_encode_peptidoform_with_ccs_prediction():
+    # Test with predict_ccs=True.
+    peptide = "ACDE/2"
+    result = encode_peptidoform(peptide, predict_ccs=True)
+    _check_result_structure(result)
+
+    # Without predict_ccs, matrix_all has 7 elements (6 sums + sequence length).
+    # With predict_ccs, 5 additional values are appended (ratios and charge),
+    # resulting in a total length of 12.
+    assert result["matrix_all"].shape == (12,)
+
+    # Verify that the last element (charge) matches the precursor charge from Peptidoform.
+    pf = Peptidoform(peptide)
+    assert result["matrix_all"][-1] == pf.precursor_charge
+
+
+def test_encode_peptidoform_feature_values() -> None:
+    """
+    Test that the returned feature values match the expected results
+    for a known peptidoform.
+    """
+    peptide = "ACDE/2"
+    result = encode_peptidoform(peptide, predict_ccs=False, padding_length=60)
+
+    # For peptide "ACDE", using DEFAULT_DICT_INDEX = {"C":0,"H":1,"N":2,"O":3,"S":4,"P":5}
+    # Based on pyteomics.mass.std_aa_comp, the expected composition is:
+    # For 'A': { "C": 3, "H": 5, "N": 1, "O": 1 } -> row0: [3,5,1,1,0,0]
+    # For 'C': { "C": 3, "H": 5, "N": 1, "O": 1, "S": 1 } -> row1: [3,5,1,1,1,0]
+    # For 'D': { "C": 4, "H": 5, "N": 1, "O": 3 } -> row2: [4,5,1,3,0,0]
+    # For 'E': { "C": 5, "H": 7, "N": 1, "O": 3 } -> row3: [5,7,1,3,0,0]
+    #
+    # Sum each column over positions 0 to 3:
+    #   C: 3+3+4+5 = 15
+    #   H: 5+5+5+7 = 22
+    #   N: 1+1+1+1 = 4
+    #   O: 1+1+3+3 = 8
+    #   S: 0+1+0+0 = 1
+    #   P: 0+0+0+0 = 0
+    # Then sequence length appended: 4
+    expected_matrix_all = np.array([15, 22, 4, 8, 1, 0, 4], dtype=np.float32)
+
+    # Check matrix_all values using np.allclose.
+    assert np.allclose(result["matrix_all"], expected_matrix_all), (
+        f"Expected matrix_all: {expected_matrix_all}, got: {result['matrix_all']}"
+    )
+
+    # Also check the first row of the matrix for letter 'A'.
+    # Expected for 'A': [3,5,1,1,0,0]
+    expected_row_A = np.array([3, 5, 1, 1, 0, 0], dtype=np.float16)
+    assert np.allclose(result["matrix"][0][:6], expected_row_A), (
+        f"Expected row for A: {expected_row_A}, got: {result['matrix'][0][:6]}"
+    )