Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
50 commits
Select commit Hold shift + click to select a range
46650a9
wip first commit
franzpoeschel Sep 26, 2025
22a6881
Some cleaning
franzpoeschel Sep 26, 2025
8c4dbb3
Hmm maybe usable
franzpoeschel Sep 26, 2025
50d97fb
Quickly use this in the JSON backend
franzpoeschel Sep 26, 2025
a0b820a
Better and generalized handling for datatypes
franzpoeschel Sep 29, 2025
a45e3b3
structure for aws
franzpoeschel Sep 29, 2025
c68ff32
first untested implementation for S3
franzpoeschel Sep 29, 2025
6ed78e5
Reordering
franzpoeschel Sep 29, 2025
3b19467
continue restructuring
franzpoeschel Sep 29, 2025
a9ab886
Some first little MPI awareness
franzpoeschel Sep 29, 2025
74ac7ee
WIP: Config for external block storage from JSON
franzpoeschel Sep 29, 2025
408ebd6
Add configuration
franzpoeschel Sep 30, 2025
22e5f84
Add option to init AWS API
franzpoeschel Sep 30, 2025
50397c1
Add verifySSL parameter
franzpoeschel Oct 1, 2025
f89a5db
Add TODO comment
franzpoeschel Oct 1, 2025
ee201a4
Add meta information object
franzpoeschel Oct 1, 2025
5956c15
Prepare reloading ext block storage from old file
franzpoeschel Dec 5, 2025
abe149f
Reload config when reading from a JSON file
franzpoeschel Dec 5, 2025
25053c5
WIP: Read from EBS
franzpoeschel Dec 8, 2025
056982c
Base implementation for get()
franzpoeschel Dec 8, 2025
a850643
Untested read impl
franzpoeschel Dec 8, 2025
4bbd101
Basically working reading
franzpoeschel Dec 8, 2025
0e112ea
cleanup
franzpoeschel Dec 8, 2025
e23b35e
Naming fixes
franzpoeschel Dec 8, 2025
3c4c992
wahhh?
franzpoeschel Dec 8, 2025
9990819
Fix double initialization of EBS
franzpoeschel Dec 8, 2025
296470f
Revert "wahhh?"
franzpoeschel Dec 8, 2025
4b8bc2b
Actually use slashes in S3
franzpoeschel Dec 8, 2025
d198f98
Warn on unused restart config
franzpoeschel Dec 8, 2025
33b7e6b
Reapply "wahhh?"
franzpoeschel Dec 8, 2025
61b63fd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Dec 8, 2025
638f105
Use rank identifier also in JSON strings
franzpoeschel Dec 9, 2025
4ed50b8
Extract implementation of convert-toml-json to header
franzpoeschel Dec 9, 2025
1703370
Implement merging
franzpoeschel Dec 9, 2025
36ec4d7
Add merge-json
franzpoeschel Dec 9, 2025
1865b10
Create merge script in parallel json output
franzpoeschel Dec 9, 2025
50c3d9b
Read files from stdin
franzpoeschel Dec 11, 2025
27febb0
Update documentation
franzpoeschel Dec 11, 2025
9c6e825
WIP Async writing
franzpoeschel Jan 5, 2026
66d25e0
Use WriteBuffer type for smart pointers
franzpoeschel Jan 6, 2026
7fa32c0
Async writing works
franzpoeschel Jan 6, 2026
9f50260
Be less verbose
franzpoeschel Jan 6, 2026
a1adc36
Fix late operations in Aws
franzpoeschel Jan 6, 2026
f16bb4a
Async reading
franzpoeschel Jan 6, 2026
62f3c15
Make asyncIO configurable
franzpoeschel Jan 7, 2026
00b0716
Split syncMandatoryOperations and syncAllOperations
franzpoeschel Jan 7, 2026
3645181
Sync unique_ptrs before next step/file
franzpoeschel Jan 7, 2026
d33fe04
Conditional compilation
franzpoeschel Jan 7, 2026
a95a8ce
License headers
franzpoeschel Jan 7, 2026
364eeea
Fix openPMD_USE_AWS=AUTO
franzpoeschel Jan 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ openpmd_option(MPI "Parallel, Multi-Node I/O for clusters" AUTO)
openpmd_option(HDF5 "HDF5 backend (.h5 files)" AUTO)
openpmd_option(ADIOS2 "ADIOS2 backend (.bp files)" AUTO)
openpmd_option(PYTHON "Enable Python bindings" AUTO)
openpmd_option(AWS "Enable AWS/S3 storage" AUTO)

option(openPMD_INSTALL "Add installation targets" ON)
option(openPMD_INSTALL_RPATH "Add RPATHs to installed binaries" ON)
Expand Down Expand Up @@ -385,9 +386,22 @@ else()
endif()
unset(openPMD_REQUIRED_ADIOS2_COMPONENTS)

# external library: pybind11 (optional)
include(${openPMD_SOURCE_DIR}/cmake/dependencies/pybind11.cmake)
if(openPMD_USE_AWS STREQUAL AUTO)
find_package(AWSSDK COMPONENTS s3)
if(AWSSDK_FOUND)
set(openPMD_HAVE_AWS TRUE)
else()
set(openPMD_HAVE_AWS FALSE)
endif()
elseif(openPMD_USE_AWS)
find_package(AWSSDK REQUIRED COMPONENTS s3)
set(openPMD_HAVE_AWS TRUE)
else()
set(openPMD_HAVE_AWS FALSE)
endif()

#external library : pybind11(optional)
include(${openPMD_SOURCE_DIR}/cmake/dependencies/pybind11.cmake)

# Targets #####################################################################
#
Expand Down Expand Up @@ -434,7 +448,12 @@ set(CORE_SOURCE
src/snapshots/IteratorTraits.cpp
src/snapshots/RandomAccessIterator.cpp
src/snapshots/Snapshots.cpp
src/snapshots/StatefulIterator.cpp)
src/snapshots/StatefulIterator.cpp
src/toolkit/ExternalBlockStorage.cpp
src/toolkit/AwsBuilder.cpp
src/toolkit/Aws.cpp
src/toolkit/StdioBuilder.cpp
src/toolkit/Stdio.cpp)
set(IO_SOURCE
src/IO/AbstractIOHandler.cpp
src/IO/AbstractIOHandlerImpl.cpp
Expand Down Expand Up @@ -562,7 +581,11 @@ if(openPMD_HAVE_ADIOS2)
endif()
endif()

# Runtime parameter and API status checks ("asserts")
if(openPMD_HAVE_AWS)
target_link_libraries(openPMD PUBLIC ${AWSSDK_LIBRARIES})
endif()

#Runtime parameter and API status checks("asserts")
if(openPMD_USE_VERIFY)
target_compile_definitions(openPMD PRIVATE openPMD_USE_VERIFY=1)
else()
Expand Down Expand Up @@ -704,6 +727,7 @@ set(openPMD_TEST_NAMES
set(openPMD_CLI_TOOL_NAMES
ls
convert-toml-json
merge-json
)
set(openPMD_PYTHON_CLI_TOOL_NAMES
pipe
Expand Down
209 changes: 161 additions & 48 deletions include/openPMD/IO/JSON/JSONIOHandlerImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@
#include "openPMD/auxiliary/JSON_internal.hpp"
#include "openPMD/backend/Variant_internal.hpp"
#include "openPMD/config.hpp"
#include "openPMD/toolkit/ExternalBlockStorage.hpp"

#include <istream>
#include <nlohmann/json.hpp>
#include <variant>
#if openPMD_HAVE_MPI
#include <mpi.h>
#endif
Expand Down Expand Up @@ -153,8 +155,72 @@ void from_json(const nlohmann::json &j, std::complex<T> &p)
}
} // namespace std

namespace openPMD::internal
{
auto jsonDatatypeToString(Datatype dt) -> std::string;

struct JsonDatatypeHandling
{
template <typename T>
static auto encodeDatatype(nlohmann::json &j) -> bool
{
auto const &needed_datatype =
jsonDatatypeToString(determineDatatype<T>());
if (auto it = j.find("datatype"); it != j.end())
{
return it.value().get<std::string>() == needed_datatype;
}
else
{
j["datatype"] = needed_datatype;
return true;
}
}

template <typename T_required>
static auto checkDatatype(nlohmann::json const &j) -> bool
{
auto const &needed_datatype =
jsonDatatypeToString(determineDatatype<T_required>());
if (auto it = j.find("datatype"); it != j.end())
{
return it.value().get<std::string>() == needed_datatype;
}
else
{
return false;
}
}

template <typename Functor, typename... Args>
static auto decodeDatatype(nlohmann::json const &j, Args &&...args) -> bool
{
if (auto it = j.find("datatype"); it != j.end())
{
switchDatasetType<Functor>(
stringToDatatype(it.value().get<std::string>()),
std::forward<Args>(args)...);
return true;
}
else
{
return false;
}
}
};
} // namespace openPMD::internal

namespace openPMD
{
namespace dataset_mode_types
{
struct Dataset_t
{};
struct Template_t
{};
using External_t = std::shared_ptr<ExternalBlockStorage>;
} // namespace dataset_mode_types

class JSONIOHandlerImpl : public AbstractIOHandlerImpl
{
using json = nlohmann::json;
Expand Down Expand Up @@ -241,43 +307,9 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl

void touch(Writable *, Parameter<Operation::TOUCH> const &) override;

std::future<void> flush();

private:
#if openPMD_HAVE_MPI
std::optional<MPI_Comm> m_communicator;
#endif

using FILEHANDLE = std::fstream;

// map each Writable to its associated file
// contains only the filename, without the OS path
std::unordered_map<Writable *, File> m_files;

std::unordered_map<File, std::shared_ptr<nlohmann::json>> m_jsonVals;

// files that have logically, but not physically been written to
std::unordered_set<File> m_dirty;

/*
* Is set by constructor.
*/
FileFormat m_fileFormat{};
void advance(Writable *, Parameter<Operation::ADVANCE> &) override;

/*
* Under which key do we find the backend configuration?
* -> "json" for the JSON backend, "toml" for the TOML backend.
*/
std::string backendConfigKey() const;

/*
* First return value: The location of the JSON value (either "json" or
* "toml") Second return value: The value that was maybe found at this place
*/
std::pair<std::string, std::optional<openPMD::json::TracingJSON>>
getBackendConfig(openPMD::json::TracingJSON &) const;

std::string m_originalExtension;
std::future<void> flush();

/*
* Was the config value explicitly user-chosen, or are we still working with
Expand All @@ -293,17 +325,36 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
// Dataset IO mode //
/////////////////////

enum class DatasetMode
struct DatasetMode
: std::variant<
dataset_mode_types::Dataset_t,
dataset_mode_types::Template_t,
dataset_mode_types::External_t>
{
Dataset,
Template
using Dataset_t = dataset_mode_types::Dataset_t;
using Template_t = dataset_mode_types::Template_t;
using External_t = dataset_mode_types::External_t;
constexpr static Dataset_t Dataset{};
constexpr static Template_t Template{};

using variant_t = std::variant<
dataset_mode_types::Dataset_t,
dataset_mode_types::Template_t,
External_t>;
using variant_t ::operator=;

// casts needed because of
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90943
inline auto as_base() const -> variant_t const &
{
return *this;
}
inline auto as_base() -> variant_t &
{
return *this;
}
};

// IOMode m_mode{};
// SpecificationVia m_IOModeSpecificationVia =
// SpecificationVia::DefaultValue; bool m_printedSkippedWriteWarningAlready
// = false;

struct DatasetMode_s
{
// Initialized in init()
Expand All @@ -317,9 +368,20 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
return std::tuple<A, B, C>{
m_mode, m_specificationVia, m_skipWarnings};
}

template <typename F>
auto mapExternalStorage(F &&functor)
{
std::visit(
auxiliary::overloaded{
[&functor](DatasetMode::External_t &externalStorage) {
return static_cast<decltype(functor)>(functor)(
externalStorage);
},
[](auto &&) {}},
m_mode.as_base());
}
};
DatasetMode_s m_datasetMode;
DatasetMode_s retrieveDatasetMode(openPMD::json::TracingJSON &config) const;

///////////////////////
// Attribute IO mode //
Expand All @@ -338,8 +400,58 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
AttributeMode m_mode{};
SpecificationVia m_specificationVia = SpecificationVia::DefaultValue;
};
AttributeMode_s m_attributeMode;

private:
#if openPMD_HAVE_MPI
std::optional<MPI_Comm> m_communicator;
#endif

using FILEHANDLE = std::fstream;

// map each Writable to its associated file
// contains only the filename, without the OS path
std::unordered_map<Writable *, File> m_files;

std::unordered_map<File, std::shared_ptr<nlohmann::json>> m_jsonVals;

// files that have logically, but not physically been written to
std::unordered_set<File> m_dirty;

/*
* Is set by constructor.
*/
FileFormat m_fileFormat{};

/*
* Under which key do we find the backend configuration?
* -> "json" for the JSON backend, "toml" for the TOML backend.
*/
std::string backendConfigKey() const;

/*
* First return value: The location of the JSON value (either "json" or
* "toml") Second return value: The value that was maybe found at this place
*/
std::pair<std::string, std::optional<openPMD::json::TracingJSON>>
getBackendConfig(openPMD::json::TracingJSON &) const;
static std::pair<std::string, std::optional<openPMD::json::TracingJSON>>
getBackendConfig(
openPMD::json::TracingJSON &, std::string const &configLocation);

std::string m_originalExtension;

/*
* In read mode, we can only open the external block storage backend upon
* opening the JSON file, because it contains meta information relevant
* for configuring the backend.
*/
std::optional<openPMD::json::TracingJSON>
m_deferredExternalBlockstorageConfig;
DatasetMode_s m_datasetMode;
DatasetMode_s
retrieveDatasetMode(openPMD::json::TracingJSON &config, bool do_init);

AttributeMode_s m_attributeMode;
AttributeMode_s
retrieveAttributeMode(openPMD::json::TracingJSON &config) const;

Expand Down Expand Up @@ -389,7 +501,8 @@ class JSONIOHandlerImpl : public AbstractIOHandlerImpl
// essentially: m_i = \prod_{j=0}^{i-1} extent_j
static Extent getMultiplicators(Extent const &extent);

static std::pair<Extent, DatasetMode> getExtent(nlohmann::json &j);
static std::pair<Extent, DatasetMode>
getExtent(nlohmann::json &j, DatasetMode const &baseMode);

// remove single '/' in the beginning and end of a string
static std::string removeSlashes(std::string);
Expand Down
8 changes: 8 additions & 0 deletions include/openPMD/Series.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
#include "openPMD/snapshots/Snapshots.hpp"
#include "openPMD/version.hpp"

#if openPMD_HAVE_AWS
#include <aws/core/Aws.h>
#endif

#if openPMD_HAVE_MPI
#include <mpi.h>
#endif
Expand Down Expand Up @@ -239,6 +243,10 @@ namespace internal
std::optional<std::function<AbstractIOHandler *(Series &)>>
m_deferred_initialization = std::nullopt;

#if openPMD_HAVE_AWS
std::optional<Aws::SDKOptions> m_manageAwsAPI = std::nullopt;
#endif

void close();

#if openPMD_HAVE_MPI
Expand Down
Loading
Loading