From 8e79825d32a8c9e3e8ded0e83fe8daa334e378f9 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 28 Nov 2025 13:53:16 +0100 Subject: [PATCH 01/26] add ZipStore tests diff --git c/src/main/java/dev/zarr/zarrjava/store/ZipStore.java i/src/main/java/dev/zarr/zarrjava/store/ZipStore.java new file mode 100644 index 0000000..054917f --- /dev/null +++ i/src/main/java/dev/zarr/zarrjava/store/ZipStore.java @@ -0,0 +1,72 @@ +package dev.zarr.zarrjava.store; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; + +public class ZipStore implements Store, Store.ListableStore { + @Nonnull + private final Path path; + + public ZipStore(@Nonnull Path path) { + this.path = path; + } + + public ZipStore(@Nonnull String path) { + this.path = Paths.get(path); + } + + + @Override + public Stream list(String[] keys) { + return Stream.empty(); + } + + @Override + public boolean exists(String[] keys) { + return false; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys) { + return null; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start) { + return null; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start, long end) { + return null; + } + + @Override + public void set(String[] keys, ByteBuffer bytes) { + + } + + @Override + public void delete(String[] keys) { + + } + + @Nonnull + @Override + public StoreHandle resolve(String... keys) { + return new StoreHandle(this, keys); + } + + @Override + public String toString() { + return this.path.toUri().toString().replaceAll("\\/$", ""); + } + +} diff --git c/src/test/java/dev/zarr/zarrjava/Utils.java i/src/test/java/dev/zarr/zarrjava/Utils.java new file mode 100644 index 0000000..0026200 --- /dev/null +++ i/src/test/java/dev/zarr/zarrjava/Utils.java @@ -0,0 +1,40 @@ +package dev.zarr.zarrjava; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +public class Utils { + + static void zipFile(File fileToZip, String fileName, ZipOutputStream zipOut) throws IOException { + if (fileToZip.isHidden()) { + return; + } + if (fileToZip.isDirectory()) { + if (fileName.endsWith("/")) { + zipOut.putNextEntry(new ZipEntry(fileName)); + zipOut.closeEntry(); + } else { + zipOut.putNextEntry(new ZipEntry(fileName + "/")); + zipOut.closeEntry(); + } + File[] children = fileToZip.listFiles(); + for (File childFile : children) { + zipFile(childFile, fileName + "/" + childFile.getName(), zipOut); + } + return; + } + FileInputStream fis = new FileInputStream(fileToZip); + ZipEntry zipEntry = new ZipEntry(fileName); + zipOut.putNextEntry(zipEntry); + byte[] bytes = new byte[1024]; + int length; + while ((length = fis.read(bytes)) >= 0) { + zipOut.write(bytes, 0, length); + } + fis.close(); + } + +} diff --git c/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java i/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 4a369c9..c7d2ab4 100644 --- c/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ i/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -7,16 +7,22 @@ import dev.zarr.zarrjava.v3.*; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.CsvSource; import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; import java.util.Arrays; import java.util.stream.Stream; +import java.nio.file.Path; +import java.util.zip.ZipOutputStream; +import static dev.zarr.zarrjava.Utils.zipFile; import static dev.zarr.zarrjava.v3.Node.makeObjectMapper; public class ZarrStoreTest extends ZarrTest { @@ -132,4 +138,72 @@ public class ZarrStoreTest extends ZarrTest { Assertions.assertEquals("test group", attrs.getString("description")); } + + @Test + public void testZipStore() throws ZarrException, IOException { + Path sourceDir = TESTOUTPUT.resolve("testZipStore"); + Path targetDir = TESTOUTPUT.resolve("testZipStore.zip"); + FilesystemStore fsStore = new FilesystemStore(sourceDir); + writeTestGroupV3(fsStore, true); + + FileOutputStream fos = new FileOutputStream(targetDir.toFile()); + ZipOutputStream zipOut = new ZipOutputStream(fos); + + File fileToZip = new File(sourceDir.toUri()); + zipFile(fileToZip, fileToZip.getName(), zipOut); + zipOut.close(); + fos.close(); + + ZipStore zipStore = new ZipStore(targetDir); + assertIsTestGroupV3(Group.open(zipStore.resolve()), true); + } + + static Stream localStores() { + return Stream.of( +// new ConcurrentMemoryStore(), + new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")), + new ZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) + ); + } + + @ParameterizedTest + @MethodSource("localStores") + public void testLocalStores(Store store) throws IOException, ZarrException { + boolean useParallel = true; + Group group = writeTestGroupV3(store, useParallel); + assertIsTestGroupV3(group, useParallel); + } + + int[] testData(){ + int[] testData = new int[1024 * 1024]; + Arrays.setAll(testData, p -> p); + return testData; + } + + Group writeTestGroupV3(Store store, boolean useParallel) throws ZarrException, IOException { + StoreHandle storeHandle = store.resolve(); + + Group group = Group.create(storeHandle); + Array array = group.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(DataType.UINT32) + .withChunkShape(5, 5) + ); + array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData()), useParallel); + group.createGroup("subgroup"); + group.setAttributes(new Attributes(b -> b.set("some", "value"))); + return group; + } + + void assertIsTestGroupV3(Group group, boolean useParallel) throws ZarrException { + Stream nodes = group.list(); + Assertions.assertEquals(2, nodes.count()); + Array array = (Array) group.get("array"); + Assertions.assertNotNull(array); + ucar.ma2.Array result = array.read(useParallel); + Assertions.assertArrayEquals(testData(), (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); + Attributes attrs = group.metadata().attributes; + Assertions.assertNotNull(attrs); + Assertions.assertEquals("value", attrs.getString("some")); + } } --- .../dev/zarr/zarrjava/store/ZipStore.java | 72 ++++++++++++++++++ src/test/java/dev/zarr/zarrjava/Utils.java | 40 ++++++++++ .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 74 +++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 src/main/java/dev/zarr/zarrjava/store/ZipStore.java create mode 100644 src/test/java/dev/zarr/zarrjava/Utils.java diff --git a/src/main/java/dev/zarr/zarrjava/store/ZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ZipStore.java new file mode 100644 index 0000000..054917f --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/store/ZipStore.java @@ -0,0 +1,72 @@ +package dev.zarr.zarrjava.store; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; + +public class ZipStore implements Store, Store.ListableStore { + @Nonnull + private final Path path; + + public ZipStore(@Nonnull Path path) { + this.path = path; + } + + public ZipStore(@Nonnull String path) { + this.path = Paths.get(path); + } + + + @Override + public Stream list(String[] keys) { + return Stream.empty(); + } + + @Override + public boolean exists(String[] keys) { + return false; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys) { + return null; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start) { + return null; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start, long end) { + return null; + } + + @Override + public void set(String[] keys, ByteBuffer bytes) { + + } + + @Override + public void delete(String[] keys) { + + } + + @Nonnull + @Override + public StoreHandle resolve(String... keys) { + return new StoreHandle(this, keys); + } + + @Override + public String toString() { + return this.path.toUri().toString().replaceAll("\\/$", ""); + } + +} diff --git a/src/test/java/dev/zarr/zarrjava/Utils.java b/src/test/java/dev/zarr/zarrjava/Utils.java new file mode 100644 index 0000000..0026200 --- /dev/null +++ b/src/test/java/dev/zarr/zarrjava/Utils.java @@ -0,0 +1,40 @@ +package dev.zarr.zarrjava; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +public class Utils { + + static void zipFile(File fileToZip, String fileName, ZipOutputStream zipOut) throws IOException { + if (fileToZip.isHidden()) { + return; + } + if (fileToZip.isDirectory()) { + if (fileName.endsWith("/")) { + zipOut.putNextEntry(new ZipEntry(fileName)); + zipOut.closeEntry(); + } else { + zipOut.putNextEntry(new ZipEntry(fileName + "/")); + zipOut.closeEntry(); + } + File[] children = fileToZip.listFiles(); + for (File childFile : children) { + zipFile(childFile, fileName + "/" + childFile.getName(), zipOut); + } + return; + } + FileInputStream fis = new FileInputStream(fileToZip); + ZipEntry zipEntry = new ZipEntry(fileName); + zipOut.putNextEntry(zipEntry); + byte[] bytes = new byte[1024]; + int length; + while ((length = fis.read(bytes)) >= 0) { + zipOut.write(bytes, 0, length); + } + fis.close(); + } + +} diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 4a369c9..c7d2ab4 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -7,16 +7,22 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.CsvSource; import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; +import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; import java.util.Arrays; import java.util.stream.Stream; +import java.nio.file.Path; +import java.util.zip.ZipOutputStream; +import static dev.zarr.zarrjava.Utils.zipFile; import static dev.zarr.zarrjava.v3.Node.makeObjectMapper; public class ZarrStoreTest extends ZarrTest { @@ -132,4 +138,72 @@ public void testMemoryStoreV2(boolean useParallel) throws ZarrException, IOExcep Assertions.assertEquals("test group", attrs.getString("description")); } + + @Test + public void testZipStore() throws ZarrException, IOException { + Path sourceDir = TESTOUTPUT.resolve("testZipStore"); + Path targetDir = TESTOUTPUT.resolve("testZipStore.zip"); + FilesystemStore fsStore = new FilesystemStore(sourceDir); + writeTestGroupV3(fsStore, true); + + FileOutputStream fos = new FileOutputStream(targetDir.toFile()); + ZipOutputStream zipOut = new ZipOutputStream(fos); + + File fileToZip = new File(sourceDir.toUri()); + zipFile(fileToZip, fileToZip.getName(), zipOut); + zipOut.close(); + fos.close(); + + ZipStore zipStore = new ZipStore(targetDir); + assertIsTestGroupV3(Group.open(zipStore.resolve()), true); + } + + static Stream localStores() { + return Stream.of( +// new ConcurrentMemoryStore(), + new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")), + new ZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) + ); + } + + @ParameterizedTest + @MethodSource("localStores") + public void testLocalStores(Store store) throws IOException, ZarrException { + boolean useParallel = true; + Group group = writeTestGroupV3(store, useParallel); + assertIsTestGroupV3(group, useParallel); + } + + int[] testData(){ + int[] testData = new int[1024 * 1024]; + Arrays.setAll(testData, p -> p); + return testData; + } + + Group writeTestGroupV3(Store store, boolean useParallel) throws ZarrException, IOException { + StoreHandle storeHandle = store.resolve(); + + Group group = Group.create(storeHandle); + Array array = group.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(DataType.UINT32) + .withChunkShape(5, 5) + ); + array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData()), useParallel); + group.createGroup("subgroup"); + group.setAttributes(new Attributes(b -> b.set("some", "value"))); + return group; + } + + void assertIsTestGroupV3(Group group, boolean useParallel) throws ZarrException { + Stream nodes = group.list(); + Assertions.assertEquals(2, nodes.count()); + Array array = (Array) group.get("array"); + Assertions.assertNotNull(array); + ucar.ma2.Array result = array.read(useParallel); + Assertions.assertArrayEquals(testData(), (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); + Attributes attrs = group.metadata().attributes; + Assertions.assertNotNull(attrs); + Assertions.assertEquals("value", attrs.getString("some")); + } } From 2611738824f132a28aeff456e0ee41f3733e162f Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Mon, 1 Dec 2025 20:32:32 +0100 Subject: [PATCH 02/26] refactor and unify outputs of Store.list --- .../java/dev/zarr/zarrjava/core/Group.java | 7 ++++++- .../zarr/zarrjava/store/FilesystemStore.java | 12 ++++++++--- .../dev/zarr/zarrjava/store/MemoryStore.java | 21 +++++++++---------- .../java/dev/zarr/zarrjava/store/S3Store.java | 4 ++-- .../java/dev/zarr/zarrjava/store/Store.java | 15 ++++++++++++- .../dev/zarr/zarrjava/store/StoreHandle.java | 2 +- src/main/java/dev/zarr/zarrjava/v2/Group.java | 2 +- src/main/java/dev/zarr/zarrjava/v3/Group.java | 2 +- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 4 ++-- 9 files changed, 46 insertions(+), 23 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/core/Group.java b/src/main/java/dev/zarr/zarrjava/core/Group.java index d8b9a6b..6f8a4d0 100644 --- a/src/main/java/dev/zarr/zarrjava/core/Group.java +++ b/src/main/java/dev/zarr/zarrjava/core/Group.java @@ -64,7 +64,12 @@ public static Group open(String path) throws IOException, ZarrException { } @Nullable - public abstract Node get(String key) throws ZarrException, IOException; + public abstract Node get(String[] key) throws ZarrException, IOException; + + @Nullable + public Node get(String key) throws ZarrException, IOException { + return get(new String[]{key}); + } public Stream list() { return storeHandle.list() diff --git a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java index f0b01cc..5640a1a 100644 --- a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java @@ -120,10 +120,16 @@ public void delete(String[] keys) { throw new RuntimeException(e); } } - - public Stream list(String[] keys) { + public Stream list(String[] keys) { try { - return Files.list(resolveKeys(keys)).map(p -> p.toFile().getName()); + return Files.list(resolveKeys(keys)).map(path -> { + Path relativePath = resolveKeys(keys).relativize(path); + String[] parts = new String[relativePath.getNameCount()]; + for (int i = 0; i < relativePath.getNameCount(); i++) { + parts[i] = relativePath.getName(i).toString(); + } + return parts; + }); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java index c1bbb9d..d97cffe 100644 --- a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java @@ -59,19 +59,18 @@ public void delete(String[] keys) { map.remove(resolveKeys(keys)); } - public Stream list(String[] keys) { - List prefix = resolveKeys(keys); - Set allKeys = new HashSet<>(); + public Stream list(String[] keys) { + List prefix = resolveKeys(keys); + Set> allKeys = new HashSet<>(); - for (List k : map.keySet()) { - if (k.size() <= prefix.size() || ! k.subList(0, prefix.size()).equals(prefix)) - continue; - for (int i = 0; i < k.size(); i++) { - List subKey = k.subList(0, i+1); - allKeys.add(String.join("/", subKey)); + for (List k : map.keySet()) { + if (k.size() <= prefix.size() || ! k.subList(0, prefix.size()).equals(prefix)) + continue; + for (int i = prefix.size(); i < k.size(); i++) { + allKeys.add(k.subList(0, i+1)); + } } - } - return allKeys.stream(); + return allKeys.stream().map(k -> k.toArray(new String[0])); } @Nonnull diff --git a/src/main/java/dev/zarr/zarrjava/store/S3Store.java b/src/main/java/dev/zarr/zarrjava/store/S3Store.java index 27aef77..58c8d08 100644 --- a/src/main/java/dev/zarr/zarrjava/store/S3Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/S3Store.java @@ -104,7 +104,7 @@ public void delete(String[] keys) { } @Override - public Stream list(String[] keys) { + public Stream list(String[] keys) { final String fullKey = resolveKeys(keys); ListObjectsRequest req = ListObjectsRequest.builder() .bucket(bucketName).prefix(fullKey) @@ -112,7 +112,7 @@ public Stream list(String[] keys) { ListObjectsResponse res = s3client.listObjects(req); return res.contents() .stream() - .map(p -> p.key().substring(fullKey.length() + 1)); + .map(p -> p.key().substring(fullKey.length() + 1).split("/")); } @Nonnull diff --git a/src/main/java/dev/zarr/zarrjava/store/Store.java b/src/main/java/dev/zarr/zarrjava/store/Store.java index c92906d..451bf79 100644 --- a/src/main/java/dev/zarr/zarrjava/store/Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/Store.java @@ -27,6 +27,19 @@ public interface Store { interface ListableStore extends Store { - Stream list(String[] keys); + /** + * Lists all keys in the store that match the given prefix keys. Keys are represented as arrays of strings, + * where each string is a segment of the key path. + * Keys that are exactly equal to the prefix are not included in the results. + * Keys that do not contain data (i.e. "directories") are included in the results. + * + * @param keys The prefix keys to match. + * @return A stream of key arrays that match the given prefix. Prefixed keys are not included in the results. + */ + Stream list(String[] keys); + + default Stream list() { + return list(new String[]{}); + } } } diff --git a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java index b82424f..e731a39 100644 --- a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java +++ b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java @@ -56,7 +56,7 @@ public boolean exists() { return store.exists(keys); } - public Stream list() { + public Stream list() { if (!(store instanceof Store.ListableStore)) { throw new UnsupportedOperationException("The underlying store does not support listing."); } diff --git a/src/main/java/dev/zarr/zarrjava/v2/Group.java b/src/main/java/dev/zarr/zarrjava/v2/Group.java index c568294..8551a76 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Group.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Group.java @@ -169,7 +169,7 @@ public static Group create(String path, Attributes attributes) throws IOExceptio * @throws IOException if there is an error accessing the storage */ @Nullable - public Node get(String key) throws ZarrException, IOException { + public Node get(String[] key) throws ZarrException, IOException { StoreHandle keyHandle = storeHandle.resolve(key); try { return Node.open(keyHandle); diff --git a/src/main/java/dev/zarr/zarrjava/v3/Group.java b/src/main/java/dev/zarr/zarrjava/v3/Group.java index d17eb77..2436051 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/Group.java +++ b/src/main/java/dev/zarr/zarrjava/v3/Group.java @@ -182,7 +182,7 @@ public static Group create(String path) throws IOException, ZarrException { * @throws IOException if there is an error accessing the storage */ @Nullable - public Node get(String key) throws ZarrException, IOException{ + public Node get(String[] key) throws ZarrException, IOException{ StoreHandle keyHandle = storeHandle.resolve(key); try { return Node.open(keyHandle); diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index c7d2ab4..2cb0888 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -123,7 +123,7 @@ public void testMemoryStoreV2(boolean useParallel) throws ZarrException, IOExcep dev.zarr.zarrjava.v2.Array array = group.createArray("array", b -> b .withShape(1024, 1024) .withDataType(dev.zarr.zarrjava.v2.DataType.UINT32) - .withChunks(5, 5) + .withChunks(512, 512) ); array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData), useParallel); group.createGroup("subgroup"); @@ -195,7 +195,7 @@ Group writeTestGroupV3(Store store, boolean useParallel) throws ZarrException, I return group; } - void assertIsTestGroupV3(Group group, boolean useParallel) throws ZarrException { + void assertIsTestGroupV3(Group group, boolean useParallel) throws ZarrException, IOException { Stream nodes = group.list(); Assertions.assertEquals(2, nodes.count()); Array array = (Array) group.get("array"); From 5e2e017c6657e5707ee1d212bd3f6fb866b183d3 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Mon, 1 Dec 2025 21:18:54 +0100 Subject: [PATCH 03/26] read zip store --- .../zarr/zarrjava/store/BufferedZipStore.java | 185 ++++++++++++++++++ .../dev/zarr/zarrjava/store/ZipStore.java | 72 ------- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 23 ++- 3 files changed, 202 insertions(+), 78 deletions(-) create mode 100644 src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java delete mode 100644 src/main/java/dev/zarr/zarrjava/store/ZipStore.java diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java new file mode 100644 index 0000000..93ab126 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -0,0 +1,185 @@ +package dev.zarr.zarrjava.store; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + + +/** A Store implementation that buffers reads and writes and flushes them to an underlying Store as a zip file. + */ +public class BufferedZipStore implements Store, Store.ListableStore { + + private final StoreHandle underlyingStore; + private final Store.ListableStore bufferStore; + + private void writeBuffer() throws IOException{ + // create zip file bytes from buffer store and write to underlying store + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + +// try (ZipOutputStream zos = new ZipOutputStream(baos)) { +// // iterate over bufferStore.list() +// for (String entry: bufferStore.list(new String[]{}).toArray(String[]::new)) { +// List pathComponents = entry.getKey(); +// byte[] data = entry.getValue(); +// +// // Build the ZIP path (e.g. ["dir", "sub", "file.txt"] → "dir/sub/file.txt") +// String path = String.join("/", pathComponents); +// +// ZipEntry zipEntry = new ZipEntry(path); +// zos.putNextEntry(zipEntry); +// +// zos.write(data); +// zos.closeEntry(); +// } +// } + +// byte[] zipBytes = baos.toByteArray(); +// return ByteBuffer.wrap(zipBytes); +// +// underlyingStore.set(); + } + + private void loadBuffer() throws IOException{ + // read zip file bytes from underlying store and populate buffer store + ByteBuffer buffer = underlyingStore.read(); + if (buffer == null) { + return; + } + try (ZipInputStream zis = new ZipInputStream(new ByteBufferBackedInputStream(buffer))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (entry.isDirectory()) { + zis.closeEntry(); + continue; + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] tmp = new byte[8192]; + int read; + while ((read = zis.read(tmp)) != -1) { + baos.write(tmp, 0, read); + } + + byte[] bytes = baos.toByteArray(); + System.out.println("Loading entry: " + entry.getName() + " (" + bytes.length + " bytes)"); + + bufferStore.set(new String[]{entry.getName()}, ByteBuffer.wrap(bytes)); + + zis.closeEntry(); + } + } + + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore) { + this.underlyingStore = underlyingStore; + this.bufferStore = bufferStore; + try { + loadBuffer(); + } catch (IOException e) { + throw new RuntimeException("Failed to load buffer from underlying store", e); + } + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore) { + this(underlyingStore, new MemoryStore()); + } + + public BufferedZipStore(@Nonnull Path underlyingStore) { + this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString())); + System.out.println("Created BufferedZipStore with underlying path: " + this.underlyingStore.toString()); + + } + + public BufferedZipStore(@Nonnull String underlyingStorePath) { + this(Paths.get(underlyingStorePath)); + } + + /** + * Flushes the buffer to the underlying store as a zip file. + */ + public void flush() throws IOException { + writeBuffer(); + } + + @Override + public Stream list(String[] keys) { + return bufferStore.list(keys); + } + + @Override + public boolean exists(String[] keys) { + return bufferStore.exists(keys); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys) { + return bufferStore.get(keys); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start) { + return bufferStore.get(keys, start); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start, long end) { + return bufferStore.get(keys, start, end); + } + + @Override + public void set(String[] keys, ByteBuffer bytes) { + bufferStore.set(keys, bytes); + } + + @Override + public void delete(String[] keys) { + bufferStore.delete(keys); + } + + @Nonnull + @Override + public StoreHandle resolve(String... keys) { + return new StoreHandle(this, keys); + } + + @Override + public String toString() { + return "BufferedZipStore(" + underlyingStore.toString() + ")"; + } + + static class ByteBufferBackedInputStream extends InputStream { + private final ByteBuffer buf; + + public ByteBufferBackedInputStream(ByteBuffer buf) { + this.buf = buf; + } + + @Override + public int read() { + return buf.hasRemaining() ? (buf.get() & 0xFF) : -1; + } + + @Override + public int read(byte[] bytes, int off, int len) { + if (!buf.hasRemaining()) { + return -1; + } + + int toRead = Math.min(len, buf.remaining()); + buf.get(bytes, off, toRead); + return toRead; + } + } + +} diff --git a/src/main/java/dev/zarr/zarrjava/store/ZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ZipStore.java deleted file mode 100644 index 054917f..0000000 --- a/src/main/java/dev/zarr/zarrjava/store/ZipStore.java +++ /dev/null @@ -1,72 +0,0 @@ -package dev.zarr.zarrjava.store; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; -import java.nio.ByteBuffer; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.stream.Stream; - -public class ZipStore implements Store, Store.ListableStore { - @Nonnull - private final Path path; - - public ZipStore(@Nonnull Path path) { - this.path = path; - } - - public ZipStore(@Nonnull String path) { - this.path = Paths.get(path); - } - - - @Override - public Stream list(String[] keys) { - return Stream.empty(); - } - - @Override - public boolean exists(String[] keys) { - return false; - } - - @Nullable - @Override - public ByteBuffer get(String[] keys) { - return null; - } - - @Nullable - @Override - public ByteBuffer get(String[] keys, long start) { - return null; - } - - @Nullable - @Override - public ByteBuffer get(String[] keys, long start, long end) { - return null; - } - - @Override - public void set(String[] keys, ByteBuffer bytes) { - - } - - @Override - public void delete(String[] keys) { - - } - - @Nonnull - @Override - public StoreHandle resolve(String... keys) { - return new StoreHandle(this, keys); - } - - @Override - public String toString() { - return this.path.toUri().toString().replaceAll("\\/$", ""); - } - -} diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 2cb0888..93813fa 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -140,7 +140,7 @@ public void testMemoryStoreV2(boolean useParallel) throws ZarrException, IOExcep } @Test - public void testZipStore() throws ZarrException, IOException { + public void testOpenZipStore() throws ZarrException, IOException { Path sourceDir = TESTOUTPUT.resolve("testZipStore"); Path targetDir = TESTOUTPUT.resolve("testZipStore.zip"); FilesystemStore fsStore = new FilesystemStore(sourceDir); @@ -150,19 +150,30 @@ public void testZipStore() throws ZarrException, IOException { ZipOutputStream zipOut = new ZipOutputStream(fos); File fileToZip = new File(sourceDir.toUri()); - zipFile(fileToZip, fileToZip.getName(), zipOut); + zipFile(fileToZip, "", zipOut); zipOut.close(); fos.close(); - ZipStore zipStore = new ZipStore(targetDir); + BufferedZipStore zipStore = new BufferedZipStore(targetDir); assertIsTestGroupV3(Group.open(zipStore.resolve()), true); } + @Test + public void testWriteZipStore() throws ZarrException, IOException { + Path targetDir = TESTOUTPUT.resolve("testWriteZipStore.zip"); + BufferedZipStore zipStore = new BufferedZipStore(targetDir); + writeTestGroupV3(zipStore, true); + zipStore.flush(); + + BufferedZipStore zipStoreRead = new BufferedZipStore(targetDir); + assertIsTestGroupV3(Group.open(zipStoreRead.resolve()), true); + } + static Stream localStores() { return Stream.of( -// new ConcurrentMemoryStore(), + new MemoryStore(), new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")), - new ZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) + new BufferedZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) ); } @@ -187,7 +198,7 @@ Group writeTestGroupV3(Store store, boolean useParallel) throws ZarrException, I Array array = group.createArray("array", b -> b .withShape(1024, 1024) .withDataType(DataType.UINT32) - .withChunkShape(5, 5) + .withChunkShape(512, 512) ); array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData()), useParallel); group.createGroup("subgroup"); From 4a9f7f0e09fbf30949a1a0eb76bbeb46dcd213e5 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Thu, 4 Dec 2025 10:45:22 +0100 Subject: [PATCH 04/26] Bump to 0.0.6 to trigger release There are apparently cases where release: [created] leads to the deploy not being triggered. Attempting an expansion to [created, published]. diff --git c/pom.xml i/pom.xml index f4c1091..9c9d45d 100644 --- c/pom.xml +++ i/pom.xml @@ -6,7 +6,7 @@ dev.zarr zarr-java - 0.0.9 + 0.0.6 zarr-java @@ -123,6 +123,17 @@ + + + ossrh + https://s01.oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ + + + unidata-all @@ -221,16 +232,6 @@ - - org.sonatype.central - central-publishing-maven-plugin - 0.8.0 - true - - central - true - - --- pom.xml | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/pom.xml b/pom.xml index f4c1091..9c9d45d 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ dev.zarr zarr-java - 0.0.9 + 0.0.6 zarr-java @@ -123,6 +123,17 @@ + + + ossrh + https://s01.oss.sonatype.org/content/repositories/snapshots + + + ossrh + https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ + + + unidata-all @@ -221,16 +232,6 @@ - - org.sonatype.central - central-publishing-maven-plugin - 0.8.0 - true - - central - true - - From 99081e542773fb9e28140f5477685bb900093916 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Thu, 4 Dec 2025 15:33:11 +0100 Subject: [PATCH 05/26] Bump to 0.0.7 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9c9d45d..b88c855 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ dev.zarr zarr-java - 0.0.6 + 0.0.7 zarr-java From 268890e4ae9a03bdd34da65f1f762716f7322f58 Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Thu, 4 Dec 2025 16:15:07 +0100 Subject: [PATCH 06/26] Bump to 0.0.8 Use new sonatype plugin for upload --- pom.xml | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pom.xml b/pom.xml index b88c855..685393b 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ dev.zarr zarr-java - 0.0.7 + 0.0.8 zarr-java @@ -123,17 +123,6 @@ - - - ossrh - https://s01.oss.sonatype.org/content/repositories/snapshots - - - ossrh - https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/ - - - unidata-all @@ -232,6 +221,16 @@ + + org.sonatype.central + central-publishing-maven-plugin + 0.8.0 + true + + central + true + + From b9e6db43a97660beda94969968004b0b04a1f26c Mon Sep 17 00:00:00 2001 From: Josh Moore Date: Thu, 4 Dec 2025 17:06:42 +0100 Subject: [PATCH 07/26] Bump to 0.0.9 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 685393b..f4c1091 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ dev.zarr zarr-java - 0.0.8 + 0.0.9 zarr-java From 08afc3682c7bf576a2f9c7f605cd18c19616338c Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 5 Dec 2025 13:13:59 +0100 Subject: [PATCH 08/26] write buffer of zip store --- .../zarr/zarrjava/store/BufferedZipStore.java | 65 +++++++++++++------ src/test/java/dev/zarr/zarrjava/Utils.java | 48 ++++++++++++++ .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 28 ++++---- 3 files changed, 105 insertions(+), 36 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index 93ab126..bf55e1b 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -11,6 +11,7 @@ import java.util.stream.Stream; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; /** A Store implementation that buffers reads and writes and flushes them to an underlying Store as a zip file. @@ -23,28 +24,50 @@ public class BufferedZipStore implements Store, Store.ListableStore { private void writeBuffer() throws IOException{ // create zip file bytes from buffer store and write to underlying store ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try (ZipOutputStream zos = new ZipOutputStream(baos)) { + // iterate all entries provided by bufferStore.list() + bufferStore.list().forEach(keys -> { + try { + if (keys == null || keys.length == 0) { + // skip root entry + return; + } + String entryName = String.join("/", keys); + ByteBuffer bb = bufferStore.get(keys); + if (bb == null) { + // directory entry: ensure trailing slash + if (!entryName.endsWith("/")) { + entryName = entryName + "/"; + } + zos.putNextEntry(new ZipEntry(entryName)); + zos.closeEntry(); + } else { + // read bytes from ByteBuffer without modifying original + ByteBuffer dup = bb.duplicate(); + int len = dup.remaining(); + byte[] bytes = new byte[len]; + dup.get(bytes); + zos.putNextEntry(new ZipEntry(entryName)); + zos.write(bytes); + zos.closeEntry(); + } + } catch (IOException e) { + // wrap checked exception so it can be rethrown from stream for handling below + throw new RuntimeException(e); + } + }); + zos.finish(); + } catch (RuntimeException e) { + // unwrap and rethrow IOExceptions thrown inside the lambda + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw e; + } -// try (ZipOutputStream zos = new ZipOutputStream(baos)) { -// // iterate over bufferStore.list() -// for (String entry: bufferStore.list(new String[]{}).toArray(String[]::new)) { -// List pathComponents = entry.getKey(); -// byte[] data = entry.getValue(); -// -// // Build the ZIP path (e.g. ["dir", "sub", "file.txt"] → "dir/sub/file.txt") -// String path = String.join("/", pathComponents); -// -// ZipEntry zipEntry = new ZipEntry(path); -// zos.putNextEntry(zipEntry); -// -// zos.write(data); -// zos.closeEntry(); -// } -// } - -// byte[] zipBytes = baos.toByteArray(); -// return ByteBuffer.wrap(zipBytes); -// -// underlyingStore.set(); + byte[] zipBytes = baos.toByteArray(); + // write zip bytes back to underlying store + underlyingStore.set(ByteBuffer.wrap(zipBytes)); } private void loadBuffer() throws IOException{ diff --git a/src/test/java/dev/zarr/zarrjava/Utils.java b/src/test/java/dev/zarr/zarrjava/Utils.java index 0026200..da57f0d 100644 --- a/src/test/java/dev/zarr/zarrjava/Utils.java +++ b/src/test/java/dev/zarr/zarrjava/Utils.java @@ -2,12 +2,28 @@ import java.io.File; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.BufferedOutputStream; +import java.nio.file.Path; +import java.nio.file.Files; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +import java.util.zip.ZipInputStream; public class Utils { + static void zipFile(Path sourceDir, Path targetDir) throws IOException { + FileOutputStream fos = new FileOutputStream(targetDir.toFile()); + ZipOutputStream zipOut = new ZipOutputStream(fos); + + File fileToZip = new File(sourceDir.toUri()); + + zipFile(fileToZip, "", zipOut); + zipOut.close(); + fos.close(); + } + static void zipFile(File fileToZip, String fileName, ZipOutputStream zipOut) throws IOException { if (fileToZip.isHidden()) { return; @@ -37,4 +53,36 @@ static void zipFile(File fileToZip, String fileName, ZipOutputStream zipOut) thr fis.close(); } + /** + * Unzip sourceZip into targetDir. + * Protects against Zip Slip by ensuring extracted paths remain inside targetDir. + */ + static void unzipFile(Path sourceZip, Path targetDir) throws IOException { + Files.createDirectories(targetDir); + try (FileInputStream fis = new FileInputStream(sourceZip.toFile()); + ZipInputStream zis = new ZipInputStream(fis)) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + Path outPath = targetDir.resolve(entry.getName()).normalize(); + Path targetDirNorm = targetDir.normalize(); + if (!outPath.startsWith(targetDirNorm)) { + throw new IOException("Zip entry is outside of the target dir: " + entry.getName()); + } + if (entry.isDirectory() || entry.getName().endsWith("/")) { + Files.createDirectories(outPath); + } else { + Files.createDirectories(outPath.getParent()); + try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outPath.toFile()))) { + byte[] buffer = new byte[1024]; + int len; + while ((len = zis.read(buffer)) > 0) { + bos.write(buffer, 0, len); + } + } + } + zis.closeEntry(); + } + } + } + } diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 93813fa..4a26215 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -13,15 +13,13 @@ import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3Client; -import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Files; import java.util.Arrays; import java.util.stream.Stream; import java.nio.file.Path; -import java.util.zip.ZipOutputStream; +import static dev.zarr.zarrjava.Utils.unzipFile; import static dev.zarr.zarrjava.Utils.zipFile; import static dev.zarr.zarrjava.v3.Node.makeObjectMapper; @@ -146,13 +144,7 @@ public void testOpenZipStore() throws ZarrException, IOException { FilesystemStore fsStore = new FilesystemStore(sourceDir); writeTestGroupV3(fsStore, true); - FileOutputStream fos = new FileOutputStream(targetDir.toFile()); - ZipOutputStream zipOut = new ZipOutputStream(fos); - - File fileToZip = new File(sourceDir.toUri()); - zipFile(fileToZip, "", zipOut); - zipOut.close(); - fos.close(); + zipFile(sourceDir, targetDir); BufferedZipStore zipStore = new BufferedZipStore(targetDir); assertIsTestGroupV3(Group.open(zipStore.resolve()), true); @@ -160,20 +152,26 @@ public void testOpenZipStore() throws ZarrException, IOException { @Test public void testWriteZipStore() throws ZarrException, IOException { - Path targetDir = TESTOUTPUT.resolve("testWriteZipStore.zip"); - BufferedZipStore zipStore = new BufferedZipStore(targetDir); + Path path = TESTOUTPUT.resolve("testWriteZipStore.zip"); + BufferedZipStore zipStore = new BufferedZipStore(path); writeTestGroupV3(zipStore, true); zipStore.flush(); - BufferedZipStore zipStoreRead = new BufferedZipStore(targetDir); + BufferedZipStore zipStoreRead = new BufferedZipStore(path); assertIsTestGroupV3(Group.open(zipStoreRead.resolve()), true); + + Path unzippedPath = TESTOUTPUT.resolve("testWriteZipStoreUnzipped"); + + unzipFile(path, unzippedPath); + FilesystemStore fsStore = new FilesystemStore(unzippedPath); + assertIsTestGroupV3(Group.open(fsStore.resolve()), true); } static Stream localStores() { return Stream.of( new MemoryStore(), - new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")), - new BufferedZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) + new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")) +// new BufferedZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) ); } From 0cacc5bc2e86f90b3f1f0fc52ee218987dfdb08f Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 5 Dec 2025 19:42:27 +0100 Subject: [PATCH 09/26] use apache commons compress for zip file read and write --- pom.xml | 5 ++ .../zarr/zarrjava/store/BufferedZipStore.java | 88 ++++++++++++++----- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 71 +++++++++++++++ 3 files changed, 141 insertions(+), 23 deletions(-) diff --git a/pom.xml b/pom.xml index f4c1091..ea7e597 100644 --- a/pom.xml +++ b/pom.xml @@ -121,6 +121,11 @@ 4.13.1 test + + org.apache.commons + commons-compress + 1.28.0 + diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index bf55e1b..ac67286 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -9,9 +9,15 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.stream.Stream; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import java.util.zip.ZipOutputStream; + +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.compress.archivers.zip.Zip64Mode; + +import java.util.zip.CRC32; +import java.util.zip.ZipEntry; // for STORED constant /** A Store implementation that buffers reads and writes and flushes them to an underlying Store as a zip file. @@ -20,11 +26,19 @@ public class BufferedZipStore implements Store, Store.ListableStore { private final StoreHandle underlyingStore; private final Store.ListableStore bufferStore; + private final String archiveComment; private void writeBuffer() throws IOException{ // create zip file bytes from buffer store and write to underlying store ByteArrayOutputStream baos = new ByteArrayOutputStream(); - try (ZipOutputStream zos = new ZipOutputStream(baos)) { + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(baos)) { + // always use zip64 + zos.setUseZip64(Zip64Mode.Always); + // set archive comment if provided + if (archiveComment != null) { + zos.setComment(archiveComment); + } + // iterate all entries provided by bufferStore.list() bufferStore.list().forEach(keys -> { try { @@ -39,17 +53,30 @@ private void writeBuffer() throws IOException{ if (!entryName.endsWith("/")) { entryName = entryName + "/"; } - zos.putNextEntry(new ZipEntry(entryName)); - zos.closeEntry(); + ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); + dirEntry.setMethod(ZipEntry.STORED); + dirEntry.setSize(0); + dirEntry.setCrc(0); + zos.putArchiveEntry(dirEntry); + zos.closeArchiveEntry(); } else { // read bytes from ByteBuffer without modifying original ByteBuffer dup = bb.duplicate(); int len = dup.remaining(); byte[] bytes = new byte[len]; dup.get(bytes); - zos.putNextEntry(new ZipEntry(entryName)); + + // compute CRC and set size for STORED (no compression) + CRC32 crc = new CRC32(); + crc.update(bytes, 0, bytes.length); + ZipArchiveEntry fileEntry = new ZipArchiveEntry(entryName); + fileEntry.setMethod(ZipEntry.STORED); + fileEntry.setSize(bytes.length); + fileEntry.setCrc(crc.getValue()); + + zos.putArchiveEntry(fileEntry); zos.write(bytes); - zos.closeEntry(); + zos.closeArchiveEntry(); } } catch (IOException e) { // wrap checked exception so it can be rethrown from stream for handling below @@ -76,11 +103,12 @@ private void loadBuffer() throws IOException{ if (buffer == null) { return; } - try (ZipInputStream zis = new ZipInputStream(new ByteBufferBackedInputStream(buffer))) { - ZipEntry entry; - while ((entry = zis.getNextEntry()) != null) { + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { +// this.archiveComment = zis.getComment(); + ArchiveEntry aentry; + while ((aentry = zis.getNextEntry()) != null) { + ZipArchiveEntry entry = (ZipArchiveEntry) aentry; if (entry.isDirectory()) { - zis.closeEntry(); continue; } ByteArrayOutputStream baos = new ByteArrayOutputStream(); @@ -89,21 +117,17 @@ private void loadBuffer() throws IOException{ while ((read = zis.read(tmp)) != -1) { baos.write(tmp, 0, read); } - byte[] bytes = baos.toByteArray(); - System.out.println("Loading entry: " + entry.getName() + " (" + bytes.length + " bytes)"); - bufferStore.set(new String[]{entry.getName()}, ByteBuffer.wrap(bytes)); - - zis.closeEntry(); } } } - public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore) { + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment) { this.underlyingStore = underlyingStore; this.bufferStore = bufferStore; + this.archiveComment = archiveComment; try { loadBuffer(); } catch (IOException e) { @@ -111,27 +135,45 @@ public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.Lis } } + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore) { + this(underlyingStore, bufferStore, null); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, String archiveComment) { + this(underlyingStore, new MemoryStore(), archiveComment); + } + public BufferedZipStore(@Nonnull StoreHandle underlyingStore) { - this(underlyingStore, new MemoryStore()); + this(underlyingStore, (String) null); + } + + public BufferedZipStore(@Nonnull Path underlyingStore, String archiveComment) { + this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString()), archiveComment); } public BufferedZipStore(@Nonnull Path underlyingStore) { - this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString())); - System.out.println("Created BufferedZipStore with underlying path: " + this.underlyingStore.toString()); + this(underlyingStore, null); + } + public BufferedZipStore(@Nonnull String underlyingStorePath, String archiveComment) { + this(Paths.get(underlyingStorePath), archiveComment); } public BufferedZipStore(@Nonnull String underlyingStorePath) { - this(Paths.get(underlyingStorePath)); + this(underlyingStorePath, null); } /** - * Flushes the buffer to the underlying store as a zip file. + * Flushes the buffer and archiveComment to the underlying store as a zip file. */ public void flush() throws IOException { writeBuffer(); } + public String getArchiveComment() { + return archiveComment; + } + @Override public Stream list(String[] keys) { return bufferStore.list(keys); diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 4a26215..ddaad8f 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -4,6 +4,10 @@ import dev.zarr.zarrjava.core.Attributes; import dev.zarr.zarrjava.store.*; import dev.zarr.zarrjava.v3.*; +import org.apache.commons.compress.archivers.zip.*; + +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipFile; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -16,11 +20,14 @@ import java.io.IOException; import java.nio.file.Files; import java.util.Arrays; +import java.util.Collections; import java.util.stream.Stream; import java.nio.file.Path; +import java.util.zip.ZipEntry; import static dev.zarr.zarrjava.Utils.unzipFile; import static dev.zarr.zarrjava.Utils.zipFile; + import static dev.zarr.zarrjava.v3.Node.makeObjectMapper; public class ZarrStoreTest extends ZarrTest { @@ -167,6 +174,70 @@ public void testWriteZipStore() throws ZarrException, IOException { assertIsTestGroupV3(Group.open(fsStore.resolve()), true); } + @Test + public void testZipStoreWithComment() throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreWithComment.zip"); + String comment = "{\"ome\": { \"version\": \"XX.YY\" }}"; + BufferedZipStore zipStore = new BufferedZipStore(path, comment); + writeTestGroupV3(zipStore, true); + zipStore.flush(); + + try (java.util.zip.ZipFile zipFile = new java.util.zip.ZipFile(path.toFile())) { + String retrievedComment = zipFile.getComment(); + Assertions.assertEquals(comment, retrievedComment, "ZIP archive comment does not match expected value."); + } + + Assertions.assertEquals(comment, new BufferedZipStore(path).getArchiveComment(), "ZIP archive comment from store does not match expected value."); + } + + /** + * Test that ZipStore meets requirements for underlying store of Zipped OME-Zarr + * @see RFC-9: Zipped OME-Zarr + * + * Features to test: + * - ZIP64 format + * - No ZIP-level compression + * - Option to add archive comments in the ZIP file header + * - Prohibit nested or multi-part ZIP archives + * - "The root-level zarr.json file SHOULD be the first ZIP file entry and the first entry in the central directory header; other zarr.json files SHOULD follow immediately afterwards, in breadth-first order." + */ + @Test + public void testZipStoreRequirements() throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreRequirements.zip"); + BufferedZipStore zipStore = new BufferedZipStore(path); + writeTestGroupV3(zipStore, true); + zipStore.flush(); + + // test for ZIP64 +// List fileHeaders = new ZipFile(path.toFile()).getFileHeaders(); +// +// HeaderReader headerReader = new HeaderReader(); +// ZipModel zipModel = headerReader.readAllHeaders(new RandomAccessFile(generatedZipFile, +// RandomAccessFileMode.READ.getValue()), buildDefaultConfig()); +// assertThat(zipModel.getZip64EndOfCentralDirectoryLocator()).isNotNull(); +// assertThat(zipModel.getZip64EndOfCentralDirectoryRecord()).isNotNull(); +// assertThat(zipModel.isZip64Format()).isTrue(); + + try (ZipFile zip = new ZipFile(path.toFile())) { + for (ZipArchiveEntry e : Collections.list(zip.getEntries())) { + System.out.println(e.getName()); + ZipExtraField[] extraFields = e.getExtraFields(); + System.out.println(extraFields.length); + Assertions.assertNotNull(extraFields, "Entry " + e.getName() + " has no extra fields"); + Assertions.assertTrue(Arrays.stream(extraFields).anyMatch(xf -> xf instanceof Zip64ExtendedInformationExtraField), + "Entry " + e.getName() + " is missing ZIP64 extra field"); + } + } + // no compression + try (ZipFile zip = new ZipFile(path.toFile())) { + for (ZipArchiveEntry e : Collections.list(zip.getEntries())) { + Assertions.assertEquals(ZipEntry.STORED, e.getMethod(), "Entry " + e.getName() + " is compressed"); + } + } + + + } + static Stream localStores() { return Stream.of( new MemoryStore(), From 5b74372b7b13bcfec632d02c5e374a6462a68be9 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 11 Dec 2025 11:25:07 +0100 Subject: [PATCH 10/26] set Zip64Mode.AsNeeded --- .../java/dev/zarr/zarrjava/store/BufferedZipStore.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index ac67286..1f1f33b 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -11,10 +11,7 @@ import java.util.stream.Stream; import org.apache.commons.compress.archivers.ArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; -import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; -import org.apache.commons.compress.archivers.zip.Zip64Mode; +import org.apache.commons.compress.archivers.zip.*; import java.util.zip.CRC32; import java.util.zip.ZipEntry; // for STORED constant @@ -32,14 +29,11 @@ private void writeBuffer() throws IOException{ // create zip file bytes from buffer store and write to underlying store ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(baos)) { - // always use zip64 - zos.setUseZip64(Zip64Mode.Always); - // set archive comment if provided + zos.setUseZip64(Zip64Mode.AsNeeded); if (archiveComment != null) { zos.setComment(archiveComment); } - // iterate all entries provided by bufferStore.list() bufferStore.list().forEach(keys -> { try { if (keys == null || keys.length == 0) { From ee92e278366d0785b2eb5713edf3722b31d5b804 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 11 Dec 2025 11:25:31 +0100 Subject: [PATCH 11/26] test Zipped OME-Zarr requirements --- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index ddaad8f..6f764aa 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.nio.file.Files; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.stream.Stream; @@ -193,49 +194,54 @@ public void testZipStoreWithComment() throws ZarrException, IOException { /** * Test that ZipStore meets requirements for underlying store of Zipped OME-Zarr * @see RFC-9: Zipped OME-Zarr - * - * Features to test: - * - ZIP64 format - * - No ZIP-level compression - * - Option to add archive comments in the ZIP file header - * - Prohibit nested or multi-part ZIP archives - * - "The root-level zarr.json file SHOULD be the first ZIP file entry and the first entry in the central directory header; other zarr.json files SHOULD follow immediately afterwards, in breadth-first order." */ @Test public void testZipStoreRequirements() throws ZarrException, IOException { Path path = TESTOUTPUT.resolve("testZipStoreRequirements.zip"); BufferedZipStore zipStore = new BufferedZipStore(path); - writeTestGroupV3(zipStore, true); - zipStore.flush(); - // test for ZIP64 -// List fileHeaders = new ZipFile(path.toFile()).getFileHeaders(); -// -// HeaderReader headerReader = new HeaderReader(); -// ZipModel zipModel = headerReader.readAllHeaders(new RandomAccessFile(generatedZipFile, -// RandomAccessFileMode.READ.getValue()), buildDefaultConfig()); -// assertThat(zipModel.getZip64EndOfCentralDirectoryLocator()).isNotNull(); -// assertThat(zipModel.getZip64EndOfCentralDirectoryRecord()).isNotNull(); -// assertThat(zipModel.isZip64Format()).isTrue(); + Group group = Group.create(zipStore.resolve()); + Array array = group.createArray("a1", b -> b + .withShape(1024, 1024) + .withDataType(DataType.UINT32) + .withChunkShape(512, 512) + ); + array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData()), true); + + Group g1 = group.createGroup("g1"); + g1.createGroup("g1_1").createGroup("g1_1_1"); + g1.createGroup("g1_2"); + group.createGroup("g2").createGroup("g2_1"); + group.createGroup("g3"); + + zipStore.flush(); try (ZipFile zip = new ZipFile(path.toFile())) { - for (ZipArchiveEntry e : Collections.list(zip.getEntries())) { - System.out.println(e.getName()); - ZipExtraField[] extraFields = e.getExtraFields(); - System.out.println(extraFields.length); - Assertions.assertNotNull(extraFields, "Entry " + e.getName() + " has no extra fields"); - Assertions.assertTrue(Arrays.stream(extraFields).anyMatch(xf -> xf instanceof Zip64ExtendedInformationExtraField), - "Entry " + e.getName() + " is missing ZIP64 extra field"); - } - } - // no compression - try (ZipFile zip = new ZipFile(path.toFile())) { - for (ZipArchiveEntry e : Collections.list(zip.getEntries())) { + ArrayList entries = Collections.list(zip.getEntries()); + + // no compression + for (ZipArchiveEntry e : entries) { Assertions.assertEquals(ZipEntry.STORED, e.getMethod(), "Entry " + e.getName() + " is compressed"); } - } - + // correct order of zarr.json files + String[] expectedZarrJsonOrder = new String[]{ + "zarr.json", + "a1/zarr.json", + "g1/zarr.json", + "g2/zarr.json", + "g3/zarr.json", + "g1/g1_1/zarr.json", + "g1/g1_2/zarr.json", + "g2/g2_1/zarr.json", + "g1/g1_1/g1_1_1/zarr.json" + }; + String[] actualZarrJsonOrder = entries.stream() + .map(ZipArchiveEntry::getName) + .limit(expectedZarrJsonOrder.length) + .toArray(String[]::new); + Assertions.assertArrayEquals(expectedZarrJsonOrder, actualZarrJsonOrder, "zarr.json files are not in the expected breadth-first order"); + } } static Stream localStores() { From a34465587cb2a55c55de8b8225ad8c3c973652be Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 11 Dec 2025 11:42:07 +0100 Subject: [PATCH 12/26] Sort zarr.json files in breadth-first order within BufferedZipStore --- .../zarr/zarrjava/store/BufferedZipStore.java | 24 ++++++++++++++++++- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 9 +++---- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index 1f1f33b..fd4bc60 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -33,8 +33,30 @@ private void writeBuffer() throws IOException{ if (archiveComment != null) { zos.setComment(archiveComment); } + Stream entries = bufferStore.list().sorted( + (a, b) -> { + boolean aIsZarr = a.length > 0 && a[a.length - 1].equals("zarr.json"); + boolean bIsZarr = b.length > 0 && b[b.length - 1].equals("zarr.json"); + // first all zarr.json files + if (aIsZarr && !bIsZarr) { + return -1; + } else if (!aIsZarr && bIsZarr) { + return 1; + } else if (aIsZarr && bIsZarr) { + // sort zarr.json in BFS order within same depth by lexicographical order + if (a.length != b.length) { + return Integer.compare(a.length, b.length); + } else { + return String.join("/", a).compareTo(String.join("/", b)); + } + } else { + // then all other files in lexicographical order + return String.join("/", a).compareTo(String.join("/", b)); + } + } + ); - bufferStore.list().forEach(keys -> { + entries.forEach(keys -> { try { if (keys == null || keys.length == 0) { // skip root entry diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 6f764aa..22afb68 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -225,7 +225,7 @@ public void testZipStoreRequirements() throws ZarrException, IOException { } // correct order of zarr.json files - String[] expectedZarrJsonOrder = new String[]{ + String[] expectedFirstEntries = new String[]{ "zarr.json", "a1/zarr.json", "g1/zarr.json", @@ -236,11 +236,12 @@ public void testZipStoreRequirements() throws ZarrException, IOException { "g2/g2_1/zarr.json", "g1/g1_1/g1_1_1/zarr.json" }; - String[] actualZarrJsonOrder = entries.stream() + String[] actualFirstEntries = entries.stream() .map(ZipArchiveEntry::getName) - .limit(expectedZarrJsonOrder.length) + .limit(expectedFirstEntries.length) .toArray(String[]::new); - Assertions.assertArrayEquals(expectedZarrJsonOrder, actualZarrJsonOrder, "zarr.json files are not in the expected breadth-first order"); + + Assertions.assertArrayEquals(expectedFirstEntries, actualFirstEntries, "zarr.json files are not in the expected breadth-first order"); } } From ea16692df732e791d00c48740916495257f8d431 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 11:44:20 +0100 Subject: [PATCH 13/26] manually read zip comment --- .../zarr/zarrjava/store/BufferedZipStore.java | 52 ++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index fd4bc60..6304e99 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -23,7 +23,7 @@ public class BufferedZipStore implements Store, Store.ListableStore { private final StoreHandle underlyingStore; private final Store.ListableStore bufferStore; - private final String archiveComment; + private String archiveComment; private void writeBuffer() throws IOException{ // create zip file bytes from buffer store and write to underlying store @@ -113,14 +113,62 @@ private void writeBuffer() throws IOException{ underlyingStore.set(ByteBuffer.wrap(zipBytes)); } + // Source - https://stackoverflow.com/a/9918966 + // Retrieved 2025-12-12, License - CC BY-SA 3.0 + private static String getZipCommentFromBuffer (byte[] buffer, int len) { + byte[] magicDirEnd = {0x50, 0x4b, 0x05, 0x06}; + int buffLen = Math.min(buffer.length, len); + + // Check the buffer from the end + for (int i = buffLen - magicDirEnd.length - 22; i >= 0; i--) { + boolean isMagicStart = true; + + for (int k = 0; k < magicDirEnd.length; k++) { + if (buffer[i + k] != magicDirEnd[k]) { + isMagicStart = false; + break; + } + } + + if (isMagicStart) { + // Magic Start found! + int commentLen = buffer[i + 20] + buffer[i + 21] * 256; + int realLen = buffLen - i - 22; + System.out.println ("ZIP comment found at buffer position " + + (i + 22) + " with len = " + commentLen + ", good!"); + + if (commentLen != realLen) { + System.out.println ("WARNING! ZIP comment size mismatch: " + + "directory says len is " + commentLen + + ", but file ends after " + realLen + " bytes!"); + } + + String comment = new String (buffer, i + 22, Math.min(commentLen, realLen)); + return comment; + } + } + + System.out.println ("ERROR! ZIP comment NOT found!"); + return null; + } + private void loadBuffer() throws IOException{ // read zip file bytes from underlying store and populate buffer store ByteBuffer buffer = underlyingStore.read(); if (buffer == null) { return; } + + // read archive comment + byte[] bufArray; + if (buffer.hasArray()) { + bufArray = buffer.array(); + } else { + bufArray = new byte[buffer.remaining()]; + buffer.duplicate().get(bufArray); + } + this.archiveComment = getZipCommentFromBuffer(bufArray, bufArray.length); try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { -// this.archiveComment = zis.getComment(); ArchiveEntry aentry; while ((aentry = zis.getNextEntry()) != null) { ZipArchiveEntry entry = (ZipArchiveEntry) aentry; From 7e0164f6f2de7b338d5224b16364a6754fc27e56 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 13:38:46 +0100 Subject: [PATCH 14/26] refactor read zip comment --- .../zarr/zarrjava/store/BufferedZipStore.java | 60 +++++++------------ 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index 6304e99..5ccdf5c 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -113,42 +113,30 @@ private void writeBuffer() throws IOException{ underlyingStore.set(ByteBuffer.wrap(zipBytes)); } - // Source - https://stackoverflow.com/a/9918966 - // Retrieved 2025-12-12, License - CC BY-SA 3.0 - private static String getZipCommentFromBuffer (byte[] buffer, int len) { - byte[] magicDirEnd = {0x50, 0x4b, 0x05, 0x06}; - int buffLen = Math.min(buffer.length, len); - + // adopted from https://stackoverflow.com/a/9918966 + @Nullable + private String getZipCommentFromBuffer(byte[] bufArray) throws IOException { + // End of Central Directory (EOCD) record magic number + byte[] EOCD = {0x50, 0x4b, 0x05, 0x06}; + int buffLen = bufArray.length; // Check the buffer from the end - for (int i = buffLen - magicDirEnd.length - 22; i >= 0; i--) { - boolean isMagicStart = true; - - for (int k = 0; k < magicDirEnd.length; k++) { - if (buffer[i + k] != magicDirEnd[k]) { - isMagicStart = false; - break; + search: + for (int i = buffLen - EOCD.length - 22; i >= 0; i--) { + for (int k = 0; k < EOCD.length; k++) { + if (bufArray[i + k] != EOCD[k]) { + continue search; } } - - if (isMagicStart) { - // Magic Start found! - int commentLen = buffer[i + 20] + buffer[i + 21] * 256; - int realLen = buffLen - i - 22; - System.out.println ("ZIP comment found at buffer position " - + (i + 22) + " with len = " + commentLen + ", good!"); - - if (commentLen != realLen) { - System.out.println ("WARNING! ZIP comment size mismatch: " - + "directory says len is " + commentLen - + ", but file ends after " + realLen + " bytes!"); - } - - String comment = new String (buffer, i + 22, Math.min(commentLen, realLen)); - return comment; + // End of Central Directory found! + int commentLen = bufArray[i + 20] + bufArray[i + 21] * 256; + int realLen = buffLen - i - 22; + if (commentLen != realLen) { + throw new IOException("ZIP comment size mismatch: " + + "directory says len is " + commentLen + + ", but file ends after " + realLen + " bytes!"); } + return new String(bufArray, i + 22, commentLen); } - - System.out.println ("ERROR! ZIP comment NOT found!"); return null; } @@ -158,8 +146,6 @@ private void loadBuffer() throws IOException{ if (buffer == null) { return; } - - // read archive comment byte[] bufArray; if (buffer.hasArray()) { bufArray = buffer.array(); @@ -167,11 +153,10 @@ private void loadBuffer() throws IOException{ bufArray = new byte[buffer.remaining()]; buffer.duplicate().get(bufArray); } - this.archiveComment = getZipCommentFromBuffer(bufArray, bufArray.length); + this.archiveComment = getZipCommentFromBuffer(bufArray); try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { - ArchiveEntry aentry; - while ((aentry = zis.getNextEntry()) != null) { - ZipArchiveEntry entry = (ZipArchiveEntry) aentry; + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { if (entry.isDirectory()) { continue; } @@ -185,7 +170,6 @@ private void loadBuffer() throws IOException{ bufferStore.set(new String[]{entry.getName()}, ByteBuffer.wrap(bytes)); } } - } public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment) { From 02445e009b0fdd2726b06aa42ef724053df67267 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 13:58:39 +0100 Subject: [PATCH 15/26] test zip store with v2 --- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 72 +++++++++++++++---- 1 file changed, 60 insertions(+), 12 deletions(-) diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 22afb68..89a8b70 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -3,7 +3,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import dev.zarr.zarrjava.core.Attributes; import dev.zarr.zarrjava.store.*; -import dev.zarr.zarrjava.v3.*; +import dev.zarr.zarrjava.core.*; import org.apache.commons.compress.archivers.zip.*; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; @@ -39,7 +39,7 @@ public void testFileSystemStores() throws IOException, ZarrException { GroupMetadata groupMetadata = objectMapper.readValue( Files.readAllBytes(TESTDATA.resolve("l4_sample").resolve("zarr.json")), - GroupMetadata.class + dev.zarr.zarrjava.v3.GroupMetadata.class ); String groupMetadataString = objectMapper.writeValueAsString(groupMetadata); @@ -48,7 +48,7 @@ public void testFileSystemStores() throws IOException, ZarrException { ArrayMetadata arrayMetadata = objectMapper.readValue(Files.readAllBytes(TESTDATA.resolve( "l4_sample").resolve("color").resolve("1").resolve("zarr.json")), - ArrayMetadata.class); + dev.zarr.zarrjava.v3.ArrayMetadata.class); String arrayMetadataString = objectMapper.writeValueAsString(arrayMetadata); Assertions.assertTrue(arrayMetadataString.contains("\"zarr_format\":3")); @@ -100,10 +100,10 @@ public void testMemoryStoreV3(boolean useParallel) throws ZarrException, IOExcep int[] testData = new int[1024 * 1024]; Arrays.setAll(testData, p -> p); - Group group = Group.create(new MemoryStore().resolve()); + dev.zarr.zarrjava.v3.Group group = dev.zarr.zarrjava.v3.Group.create(new MemoryStore().resolve()); Array array = group.createArray("array", b -> b .withShape(1024, 1024) - .withDataType(DataType.UINT32) + .withDataType(dev.zarr.zarrjava.v3.DataType.UINT32) .withChunkShape(5, 5) ); array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData), useParallel); @@ -200,15 +200,15 @@ public void testZipStoreRequirements() throws ZarrException, IOException { Path path = TESTOUTPUT.resolve("testZipStoreRequirements.zip"); BufferedZipStore zipStore = new BufferedZipStore(path); - Group group = Group.create(zipStore.resolve()); + dev.zarr.zarrjava.v3.Group group = dev.zarr.zarrjava.v3.Group.create(zipStore.resolve()); Array array = group.createArray("a1", b -> b .withShape(1024, 1024) - .withDataType(DataType.UINT32) + .withDataType(dev.zarr.zarrjava.v3.DataType.UINT32) .withChunkShape(512, 512) ); array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData()), true); - Group g1 = group.createGroup("g1"); + dev.zarr.zarrjava.v3.Group g1 = group.createGroup("g1"); g1.createGroup("g1_1").createGroup("g1_1_1"); g1.createGroup("g1_2"); group.createGroup("g2").createGroup("g2_1"); @@ -245,6 +245,25 @@ public void testZipStoreRequirements() throws ZarrException, IOException { } } + + @Test + public void testZipStoreV2() throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreV2.zip"); + BufferedZipStore zipStore = new BufferedZipStore(path); + writeTestGroupV2(zipStore, true); + zipStore.flush(); + + BufferedZipStore zipStoreRead = new BufferedZipStore(path); + assertIsTestGroupV2(dev.zarr.zarrjava.core.Group.open(zipStoreRead.resolve()), true); + + Path unzippedPath = TESTOUTPUT.resolve("testZipStoreV2Unzipped"); + + unzipFile(path, unzippedPath); + FilesystemStore fsStore = new FilesystemStore(unzippedPath); + assertIsTestGroupV2(dev.zarr.zarrjava.core.Group.open(fsStore.resolve()), true); + } + + static Stream localStores() { return Stream.of( new MemoryStore(), @@ -261,6 +280,7 @@ public void testLocalStores(Store store) throws IOException, ZarrException { assertIsTestGroupV3(group, useParallel); } + int[] testData(){ int[] testData = new int[1024 * 1024]; Arrays.setAll(testData, p -> p); @@ -270,10 +290,10 @@ int[] testData(){ Group writeTestGroupV3(Store store, boolean useParallel) throws ZarrException, IOException { StoreHandle storeHandle = store.resolve(); - Group group = Group.create(storeHandle); - Array array = group.createArray("array", b -> b + dev.zarr.zarrjava.v3.Group group = dev.zarr.zarrjava.v3.Group.create(storeHandle); + dev.zarr.zarrjava.v3.Array array = group.createArray("array", b -> b .withShape(1024, 1024) - .withDataType(DataType.UINT32) + .withDataType(dev.zarr.zarrjava.v3.DataType.UINT32) .withChunkShape(512, 512) ); array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData()), useParallel); @@ -289,7 +309,35 @@ void assertIsTestGroupV3(Group group, boolean useParallel) throws ZarrException, Assertions.assertNotNull(array); ucar.ma2.Array result = array.read(useParallel); Assertions.assertArrayEquals(testData(), (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); - Attributes attrs = group.metadata().attributes; + Attributes attrs = group.metadata().attributes(); + Assertions.assertNotNull(attrs); + Assertions.assertEquals("value", attrs.getString("some")); + } + + + dev.zarr.zarrjava.v2.Group writeTestGroupV2(Store store, boolean useParallel) throws ZarrException, IOException { + StoreHandle storeHandle = store.resolve(); + + dev.zarr.zarrjava.v2.Group group = dev.zarr.zarrjava.v2.Group.create(storeHandle); + dev.zarr.zarrjava.v2.Array array = group.createArray("array", b -> b + .withShape(1024, 1024) + .withDataType(dev.zarr.zarrjava.v2.DataType.UINT32) + .withChunks(512, 512) + ); + array.write(ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1024, 1024}, testData()), useParallel); + group.createGroup("subgroup"); + group.setAttributes(new Attributes().set("some", "value")); + return group; + } + + void assertIsTestGroupV2(dev.zarr.zarrjava.core.Group group, boolean useParallel) throws ZarrException, IOException { + Stream nodes = group.list(); + Assertions.assertEquals(2, nodes.count()); + dev.zarr.zarrjava.v2.Array array = (dev.zarr.zarrjava.v2.Array) group.get("array"); + Assertions.assertNotNull(array); + ucar.ma2.Array result = array.read(useParallel); + Assertions.assertArrayEquals(testData(), (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); + Attributes attrs = group.metadata().attributes(); Assertions.assertNotNull(attrs); Assertions.assertEquals("value", attrs.getString("some")); } From caafad0997a7a285a1a669a69b23746d8a63d67d Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 15:59:39 +0100 Subject: [PATCH 16/26] use com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream instead of own implementation --- .../zarr/zarrjava/store/BufferedZipStore.java | 28 +------------------ 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index 5ccdf5c..047fd14 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -4,13 +4,12 @@ import javax.annotation.Nullable; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.file.Path; import java.nio.file.Paths; import java.util.stream.Stream; -import org.apache.commons.compress.archivers.ArchiveEntry; +import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; import org.apache.commons.compress.archivers.zip.*; import java.util.zip.CRC32; @@ -270,29 +269,4 @@ public StoreHandle resolve(String... keys) { public String toString() { return "BufferedZipStore(" + underlyingStore.toString() + ")"; } - - static class ByteBufferBackedInputStream extends InputStream { - private final ByteBuffer buf; - - public ByteBufferBackedInputStream(ByteBuffer buf) { - this.buf = buf; - } - - @Override - public int read() { - return buf.hasRemaining() ? (buf.get() & 0xFF) : -1; - } - - @Override - public int read(byte[] bytes, int off, int len) { - if (!buf.hasRemaining()) { - return -1; - } - - int toRead = Math.min(len, buf.remaining()); - buf.get(bytes, off, toRead); - return toRead; - } - } - } From dbc559c7eb80ddcd57bce39ec2cd3a7cfdea62da Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 16:01:38 +0100 Subject: [PATCH 17/26] add ReadOnlyZipStore --- .../zarr/zarrjava/store/BufferedZipStore.java | 28 +-- .../zarr/zarrjava/store/ReadOnlyZipStore.java | 159 ++++++++++++++++++ .../dev/zarr/zarrjava/utils/ZipUtils.java | 35 ++++ .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 13 ++ 4 files changed, 209 insertions(+), 26 deletions(-) create mode 100644 src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java create mode 100644 src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index 047fd14..af12ef0 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -15,6 +15,8 @@ import java.util.zip.CRC32; import java.util.zip.ZipEntry; // for STORED constant +import static dev.zarr.zarrjava.utils.ZipUtils.getZipCommentFromBuffer; + /** A Store implementation that buffers reads and writes and flushes them to an underlying Store as a zip file. */ @@ -112,32 +114,6 @@ private void writeBuffer() throws IOException{ underlyingStore.set(ByteBuffer.wrap(zipBytes)); } - // adopted from https://stackoverflow.com/a/9918966 - @Nullable - private String getZipCommentFromBuffer(byte[] bufArray) throws IOException { - // End of Central Directory (EOCD) record magic number - byte[] EOCD = {0x50, 0x4b, 0x05, 0x06}; - int buffLen = bufArray.length; - // Check the buffer from the end - search: - for (int i = buffLen - EOCD.length - 22; i >= 0; i--) { - for (int k = 0; k < EOCD.length; k++) { - if (bufArray[i + k] != EOCD[k]) { - continue search; - } - } - // End of Central Directory found! - int commentLen = bufArray[i + 20] + bufArray[i + 21] * 256; - int realLen = buffLen - i - 22; - if (commentLen != realLen) { - throw new IOException("ZIP comment size mismatch: " - + "directory says len is " + commentLen - + ", but file ends after " + realLen + " bytes!"); - } - return new String(bufArray, i + 22, commentLen); - } - return null; - } private void loadBuffer() throws IOException{ // read zip file bytes from underlying store and populate buffer store diff --git a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java new file mode 100644 index 0000000..516a647 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java @@ -0,0 +1,159 @@ +package dev.zarr.zarrjava.store; + +import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.stream.Stream; + +import static dev.zarr.zarrjava.utils.ZipUtils.getZipCommentFromBuffer; + +public class ReadOnlyZipStore implements Store, Store.ListableStore { + + private final StoreHandle underlyingStore; + + String resolveKeys(String[] keys) { + return String.join("/", keys); + } + + String[] resolveEntryKeys(String entryKey) { + return entryKey.split("/"); + } + + @Override + public boolean exists(String[] keys) { + return get(keys, 0, 0) != null; + } + + @Nullable + @Override + public ByteBuffer get(String[] keys) { + return get(keys, 0); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start) { + return get(keys, start, -1); + } + + public String getArchiveComment() throws IOException { + ByteBuffer buffer = underlyingStore.read(); + if (buffer == null) { + return null; + } + byte[] bufArray; + if (buffer.hasArray()) { + bufArray = buffer.array(); + } else { + bufArray = new byte[buffer.remaining()]; + buffer.duplicate().get(bufArray); + } + return getZipCommentFromBuffer(bufArray); + } + + @Nullable + @Override + public ByteBuffer get(String[] keys, long start, long end) { + ByteBuffer buffer = underlyingStore.read(); + if (buffer == null) { + return null; + } + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (entry.isDirectory() || !entry.getName().equals(resolveKeys(keys))) { + continue; + } + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + if (end == -1) { + end = entry.getSize(); + } + if (start > end) { + throw new IllegalArgumentException("Start position can not be larger than end position. Got start=" + start + ", end=" + end); + } + if (start < 0 || end > entry.getSize()) { + throw new IllegalArgumentException("Start and end positions must be within the bounds of the zip entry size. Entry size=" + entry.getSize() + ", got start=" + start + ", end=" + end); + } + zis.skip(start); + long bytesToRead = end - start; + byte[] bufferArray = new byte[8192]; + int len; + while (bytesToRead > 0 && (len = zis.read(bufferArray, 0, (int) Math.min(bufferArray.length, bytesToRead))) != -1) { + baos.write(bufferArray, 0, len); + bytesToRead -= len; + } + byte[] bytes = baos.toByteArray(); + return ByteBuffer.wrap(bytes); + } + } catch (IOException e) { + return null; + } + return null; + } + + @Override + public void set(String[] keys, ByteBuffer bytes) { + throw new UnsupportedOperationException("ReadOnlyZipStore does not support set operation."); + } + + @Override + public void delete(String[] keys) { + throw new UnsupportedOperationException("ReadOnlyZipStore does not support delete operation."); + } + + @Nonnull + @Override + public StoreHandle resolve(String... keys) { + return new StoreHandle(this, keys); + } + + @Override + public String toString() { + return "ReadOnlyZipStore(" + underlyingStore.toString() + ")"; + } + + public ReadOnlyZipStore(@Nonnull StoreHandle underlyingStore) { + this.underlyingStore = underlyingStore; + } + + public ReadOnlyZipStore(@Nonnull Path underlyingStore) { + this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString())); + } + + public ReadOnlyZipStore(@Nonnull String underlyingStorePath) { + this(Paths.get(underlyingStorePath)); + } + + @Override + public Stream list(String[] keys) { + Stream.Builder builder = Stream.builder(); + + ByteBuffer buffer = underlyingStore.read(); + if (buffer == null) { + return builder.build(); + } + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { + ZipArchiveEntry entry; + String prefix = resolveKeys(keys); + while ((entry = zis.getNextEntry()) != null) { + String entryKey = entry.getName(); + if (!entryKey.startsWith(prefix) || entryKey.equals(prefix)) { + continue; + } + String[] entryKeys = resolveEntryKeys(entryKey.substring(prefix.length())); + builder.add(entryKeys); + } + } catch (IOException e) { + return null; + } + return builder.build(); + } +} diff --git a/src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java b/src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java new file mode 100644 index 0000000..e08d930 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java @@ -0,0 +1,35 @@ +package dev.zarr.zarrjava.utils; + +import javax.annotation.Nullable; +import java.io.IOException; + +public class ZipUtils { + + // adopted from https://stackoverflow.com/a/9918966 + @Nullable + public static String getZipCommentFromBuffer(byte[] bufArray) throws IOException { + // End of Central Directory (EOCD) record magic number + byte[] EOCD = {0x50, 0x4b, 0x05, 0x06}; + int buffLen = bufArray.length; + // Check the buffer from the end + search: + for (int i = buffLen - EOCD.length - 22; i >= 0; i--) { + for (int k = 0; k < EOCD.length; k++) { + if (bufArray[i + k] != EOCD[k]) { + continue search; + } + } + // End of Central Directory found! + int commentLen = bufArray[i + 20] + bufArray[i + 21] * 256; + int realLen = buffLen - i - 22; + if (commentLen != realLen) { + throw new IOException("ZIP comment size mismatch: " + + "directory says len is " + commentLen + + ", but file ends after " + realLen + " bytes!"); + } + return new String(bufArray, i + 22, commentLen); + } + return null; + } + +} diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 89a8b70..bb83142 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -263,6 +263,19 @@ public void testZipStoreV2() throws ZarrException, IOException { assertIsTestGroupV2(dev.zarr.zarrjava.core.Group.open(fsStore.resolve()), true); } + @Test + public void testReadOnlyZipStore() throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testReadOnlyZipStore.zip"); + String archiveComment = "This is a test ZIP archive comment."; + BufferedZipStore zipStore = new BufferedZipStore(path, archiveComment); + writeTestGroupV3(zipStore, true); + zipStore.flush(); + + ReadOnlyZipStore readOnlyZipStore = new ReadOnlyZipStore(path); + Assertions.assertEquals(archiveComment, readOnlyZipStore.getArchiveComment(), "ZIP archive comment from ReadOnlyZipStore does not match expected value."); + assertIsTestGroupV3(Group.open(readOnlyZipStore.resolve()), true); + } + static Stream localStores() { return Stream.of( From db57be7eb93a9c30e1d05cf900664242cca96397 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 16:54:56 +0100 Subject: [PATCH 18/26] fix ReadOnlyZipStore for zips with 1. leading slashes in paths 2. no sizes in entry headers --- .../zarr/zarrjava/store/ReadOnlyZipStore.java | 42 ++++++++++++------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java index 516a647..49388d7 100644 --- a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java @@ -15,6 +15,11 @@ import static dev.zarr.zarrjava.utils.ZipUtils.getZipCommentFromBuffer; + +/** A Store implementation that provides read-only access to a zip archive stored in an underlying Store. + * Compared to BufferedZipStore, this implementation reads directly from the zip archive without parsing + * its contents into a buffer store first making it more efficient for read-only access to large zip archives. + */ public class ReadOnlyZipStore implements Store, Store.ListableStore { private final StoreHandle underlyingStore; @@ -69,21 +74,24 @@ public ByteBuffer get(String[] keys, long start, long end) { try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { ZipArchiveEntry entry; while ((entry = zis.getNextEntry()) != null) { - if (entry.isDirectory() || !entry.getName().equals(resolveKeys(keys))) { - continue; - } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - if (end == -1) { - end = entry.getSize(); + String entryName = entry.getName(); + + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); } - if (start > end) { - throw new IllegalArgumentException("Start position can not be larger than end position. Got start=" + start + ", end=" + end); + if (entry.isDirectory() || !entryName.equals(resolveKeys(keys))) { + continue; } - if (start < 0 || end > entry.getSize()) { - throw new IllegalArgumentException("Start and end positions must be within the bounds of the zip entry size. Entry size=" + entry.getSize() + ", got start=" + start + ", end=" + end); + + if (zis.skip(start) != start) { + throw new IOException("Failed to skip to start position " + start + " in zip entry " + entryName); } - zis.skip(start); - long bytesToRead = end - start; + + long bytesToRead; + if (end != -1) bytesToRead = end - start; + else bytesToRead = Long.MAX_VALUE; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte[] bufferArray = new byte[8192]; int len; while (bytesToRead > 0 && (len = zis.read(bufferArray, 0, (int) Math.min(bufferArray.length, bytesToRead))) != -1) { @@ -144,11 +152,15 @@ public Stream list(String[] keys) { ZipArchiveEntry entry; String prefix = resolveKeys(keys); while ((entry = zis.getNextEntry()) != null) { - String entryKey = entry.getName(); - if (!entryKey.startsWith(prefix) || entryKey.equals(prefix)) { + String entryName = entry.getName(); + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + + if (!entryName.startsWith(prefix) || entryName.equals(prefix)) { continue; } - String[] entryKeys = resolveEntryKeys(entryKey.substring(prefix.length())); + String[] entryKeys = resolveEntryKeys(entryName.substring(prefix.length())); builder.add(entryKeys); } } catch (IOException e) { From 768bd62e19f7ad739f0393df4d77fbe2728f1583 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 17:01:01 +0100 Subject: [PATCH 19/26] add BufferedZipStore parameter flushOnWrite --- .../zarr/zarrjava/store/BufferedZipStore.java | 51 ++++++++++++++++++- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 42 ++++++++------- 2 files changed, 74 insertions(+), 19 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index af12ef0..dc82672 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -25,6 +25,7 @@ public class BufferedZipStore implements Store, Store.ListableStore { private final StoreHandle underlyingStore; private final Store.ListableStore bufferStore; private String archiveComment; + private boolean flushOnWrite; private void writeBuffer() throws IOException{ // create zip file bytes from buffer store and write to underlying store @@ -147,10 +148,11 @@ private void loadBuffer() throws IOException{ } } - public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment) { + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment, boolean flushOnWrite) { this.underlyingStore = underlyingStore; this.bufferStore = bufferStore; this.archiveComment = archiveComment; + this.flushOnWrite = flushOnWrite; try { loadBuffer(); } catch (IOException e) { @@ -158,6 +160,10 @@ public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.Lis } } + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment) { + this(underlyingStore, bufferStore, archiveComment, true); + } + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore) { this(underlyingStore, bufferStore, null); } @@ -186,6 +192,35 @@ public BufferedZipStore(@Nonnull String underlyingStorePath) { this(underlyingStorePath, null); } + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, boolean flushOnWrite) { + this(underlyingStore, bufferStore, null, flushOnWrite); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, String archiveComment, boolean flushOnWrite) { + this(underlyingStore, new MemoryStore(), archiveComment, flushOnWrite); + } + + public BufferedZipStore(@Nonnull StoreHandle underlyingStore, boolean flushOnWrite) { + this(underlyingStore, (String) null, flushOnWrite); + } + + public BufferedZipStore(@Nonnull Path underlyingStore, String archiveComment, boolean flushOnWrite) { + this(new FilesystemStore(underlyingStore.getParent()).resolve(underlyingStore.getFileName().toString()), archiveComment, flushOnWrite); + } + + public BufferedZipStore(@Nonnull Path underlyingStore, boolean flushOnWrite) { + this(underlyingStore, null, flushOnWrite); + } + + public BufferedZipStore(@Nonnull String underlyingStorePath, String archiveComment, boolean flushOnWrite) { + this(Paths.get(underlyingStorePath), archiveComment, flushOnWrite); + } + + public BufferedZipStore(@Nonnull String underlyingStorePath, boolean flushOnWrite) { + this(underlyingStorePath, null, flushOnWrite); + } + + /** * Flushes the buffer and archiveComment to the underlying store as a zip file. */ @@ -228,11 +263,25 @@ public ByteBuffer get(String[] keys, long start, long end) { @Override public void set(String[] keys, ByteBuffer bytes) { bufferStore.set(keys, bytes); + if (flushOnWrite) { + try { + writeBuffer(); + } catch (IOException e) { + throw new RuntimeException("Failed to flush buffer to underlying store after set operation", e); + } + } } @Override public void delete(String[] keys) { bufferStore.delete(keys); + if (flushOnWrite) { + try { + writeBuffer(); + } catch (IOException e) { + throw new RuntimeException("Failed to flush buffer to underlying store after delete operation", e); + } + } } @Nonnull diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index bb83142..86c9606 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -156,32 +156,37 @@ public void testOpenZipStore() throws ZarrException, IOException { BufferedZipStore zipStore = new BufferedZipStore(targetDir); assertIsTestGroupV3(Group.open(zipStore.resolve()), true); + + ReadOnlyZipStore readOnlyZipStore = new ReadOnlyZipStore(targetDir); + assertIsTestGroupV3(Group.open(readOnlyZipStore.resolve()), true); } - @Test - public void testWriteZipStore() throws ZarrException, IOException { - Path path = TESTOUTPUT.resolve("testWriteZipStore.zip"); - BufferedZipStore zipStore = new BufferedZipStore(path); + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testWriteZipStore(boolean flushOnWrite) throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testWriteZipStore" + (flushOnWrite ? "Flush" : "NoFlush") + ".zip"); + BufferedZipStore zipStore = new BufferedZipStore(path, flushOnWrite); writeTestGroupV3(zipStore, true); - zipStore.flush(); + if(!flushOnWrite) zipStore.flush(); BufferedZipStore zipStoreRead = new BufferedZipStore(path); assertIsTestGroupV3(Group.open(zipStoreRead.resolve()), true); - Path unzippedPath = TESTOUTPUT.resolve("testWriteZipStoreUnzipped"); + Path unzippedPath = TESTOUTPUT.resolve("testWriteZipStoreUnzipped" + (flushOnWrite ? "Flush" : "NoFlush")); unzipFile(path, unzippedPath); FilesystemStore fsStore = new FilesystemStore(unzippedPath); assertIsTestGroupV3(Group.open(fsStore.resolve()), true); } - @Test - public void testZipStoreWithComment() throws ZarrException, IOException { - Path path = TESTOUTPUT.resolve("testZipStoreWithComment.zip"); + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testZipStoreWithComment(boolean flushOnWrite) throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreWithComment"+ (flushOnWrite ? "Flush" : "NoFlush") + ".zip"); String comment = "{\"ome\": { \"version\": \"XX.YY\" }}"; - BufferedZipStore zipStore = new BufferedZipStore(path, comment); + BufferedZipStore zipStore = new BufferedZipStore(path, comment, flushOnWrite); writeTestGroupV3(zipStore, true); - zipStore.flush(); + if(!flushOnWrite) zipStore.flush(); try (java.util.zip.ZipFile zipFile = new java.util.zip.ZipFile(path.toFile())) { String retrievedComment = zipFile.getComment(); @@ -246,12 +251,13 @@ public void testZipStoreRequirements() throws ZarrException, IOException { } - @Test - public void testZipStoreV2() throws ZarrException, IOException { - Path path = TESTOUTPUT.resolve("testZipStoreV2.zip"); - BufferedZipStore zipStore = new BufferedZipStore(path); + @ParameterizedTest + @CsvSource({"false", "true",}) + public void testZipStoreV2(boolean flushOnWrite) throws ZarrException, IOException { + Path path = TESTOUTPUT.resolve("testZipStoreV2" + (flushOnWrite ? "Flush" : "NoFlush") + ".zip"); + BufferedZipStore zipStore = new BufferedZipStore(path, flushOnWrite); writeTestGroupV2(zipStore, true); - zipStore.flush(); + if(!flushOnWrite) zipStore.flush(); BufferedZipStore zipStoreRead = new BufferedZipStore(path); assertIsTestGroupV2(dev.zarr.zarrjava.core.Group.open(zipStoreRead.resolve()), true); @@ -280,8 +286,8 @@ public void testReadOnlyZipStore() throws ZarrException, IOException { static Stream localStores() { return Stream.of( new MemoryStore(), - new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")) -// new BufferedZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) + new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")), + new BufferedZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) ); } From 38bec2760ad3224789278a6aa3471f1172b41e2a Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 17:01:16 +0100 Subject: [PATCH 20/26] fix testMemoryStore --- src/test/java/dev/zarr/zarrjava/ZarrV2Test.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java index 346fd2a..6522fae 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java @@ -406,8 +406,6 @@ public void testMemoryStore() throws ZarrException, IOException { ); group.createGroup("subgroup"); Assertions.assertEquals(2, group.list().count()); - for(String s: storeHandle.list().toArray(String[]::new)) - System.out.println(s); } } \ No newline at end of file From bdcbc463b720935f4c20858ca0da081161b0acfc Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Fri, 12 Dec 2025 17:17:52 +0100 Subject: [PATCH 21/26] default flushOnWrite to false --- src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java | 2 +- src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index dc82672..4e0a257 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -161,7 +161,7 @@ public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.Lis } public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment) { - this(underlyingStore, bufferStore, archiveComment, true); + this(underlyingStore, bufferStore, archiveComment, false); } public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore) { diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 86c9606..b4cbc63 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -287,7 +287,7 @@ static Stream localStores() { return Stream.of( new MemoryStore(), new FilesystemStore(TESTOUTPUT.resolve("testLocalStoresFS")), - new BufferedZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip")) + new BufferedZipStore(TESTOUTPUT.resolve("testLocalStoresZIP.zip"), true) ); } From 9014feffa7b7b8fa31ea7a635446e3062a383989 Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Mon, 15 Dec 2025 11:19:38 +0100 Subject: [PATCH 22/26] fix s3 store get range --- .../java/dev/zarr/zarrjava/store/S3Store.java | 4 ++-- .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/S3Store.java b/src/main/java/dev/zarr/zarrjava/store/S3Store.java index 58c8d08..6ab1452 100644 --- a/src/main/java/dev/zarr/zarrjava/store/S3Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/S3Store.java @@ -71,7 +71,7 @@ public ByteBuffer get(String[] keys, long start) { GetObjectRequest req = GetObjectRequest.builder() .bucket(bucketName) .key(resolveKeys(keys)) - .range(String.valueOf(start)) + .range(String.format("bytes=%d-", start)) .build(); return get(req); } @@ -82,7 +82,7 @@ public ByteBuffer get(String[] keys, long start, long end) { GetObjectRequest req = GetObjectRequest.builder() .bucket(bucketName) .key(resolveKeys(keys)) - .range(start +"-"+ end) + .range(String.format("bytes=%d-%d", start, end-1)) // S3 range is inclusive .build(); return get(req); } diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index b4cbc63..620b304 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -86,6 +86,24 @@ public void testS3Store() throws IOException, ZarrException { Assertions.assertEquals(0, arrayCore.read(new long[]{0,0,0,0}, new int[]{1,1,1,1}).getInt(0)); } + @Test + public void testS3StoreGet() throws IOException, ZarrException { + S3Store s3Store = new S3Store(S3Client.builder() + .region(Region.of("eu-west-1")) + .credentialsProvider(AnonymousCredentialsProvider.create()) + .build(), "static.webknossos.org", "data"); + String[] keys = new String[]{"zarr_v3", "l4_sample", "color", "1", "zarr.json"}; + + ByteBuffer buffer = s3Store.get(keys); + ByteBuffer bufferWithStart = s3Store.get(keys, 10); + Assertions.assertEquals(10, buffer.remaining()-bufferWithStart.remaining()); + + ByteBuffer bufferWithStartAndEnd = s3Store.get(keys, 0, 10); + Assertions.assertEquals(10, bufferWithStartAndEnd.remaining()); + + } + + @Test public void testHttpStore() throws IOException, ZarrException { HttpStore httpStore = new dev.zarr.zarrjava.store.HttpStore("https://static.webknossos.org/data/zarr_v3/l4_sample"); From 5c74445f353b0f1ce0c7b8333fc4bf2f80dda45b Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 18 Dec 2025 13:56:37 +0100 Subject: [PATCH 23/26] add store.getInputStream --- .../zarr/zarrjava/store/BufferedZipStore.java | 6 ++ .../zarr/zarrjava/store/FilesystemStore.java | 24 ++++++++ .../dev/zarr/zarrjava/store/HttpStore.java | 33 ++++++++++- .../dev/zarr/zarrjava/store/MemoryStore.java | 12 +++- .../zarr/zarrjava/store/ReadOnlyZipStore.java | 39 ++++++++++++- .../java/dev/zarr/zarrjava/store/S3Store.java | 11 ++++ .../java/dev/zarr/zarrjava/store/Store.java | 7 +++ .../dev/zarr/zarrjava/store/StoreHandle.java | 10 ++++ .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 57 +++++++++++++++++-- 9 files changed, 192 insertions(+), 7 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index 4e0a257..c3466af 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -4,6 +4,7 @@ import javax.annotation.Nullable; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.file.Path; import java.nio.file.Paths; @@ -290,6 +291,11 @@ public StoreHandle resolve(String... keys) { return new StoreHandle(this, keys); } + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + return bufferStore.getInputStream(keys, start, end); + } + @Override public String toString() { return "BufferedZipStore(" + underlyingStore.toString() + ")"; diff --git a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java index 5640a1a..9aeba12 100644 --- a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java @@ -1,7 +1,10 @@ package dev.zarr.zarrjava.store; import dev.zarr.zarrjava.utils.Utils; +import org.apache.commons.io.input.BoundedInputStream; + import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; @@ -146,4 +149,25 @@ public String toString() { return this.path.toUri().toString().replaceAll("\\/$", ""); } + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + Path keyPath = resolveKeys(keys); + try { + InputStream inputStream = Files.newInputStream(keyPath); + if (start > 0) { + long skipped = inputStream.skip(start); + if (skipped < start) { + throw new IOException("Unable to skip to the desired start position."); + } + } + if (end != -1) { + long bytesToRead = end - start; + return new BoundedInputStream(inputStream, bytesToRead); + } else { + return inputStream; + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/HttpStore.java b/src/main/java/dev/zarr/zarrjava/store/HttpStore.java index 343d251..7fb044f 100644 --- a/src/main/java/dev/zarr/zarrjava/store/HttpStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/HttpStore.java @@ -5,7 +5,10 @@ import com.squareup.okhttp.Request; import com.squareup.okhttp.Response; import com.squareup.okhttp.ResponseBody; + +import java.io.FilterInputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -101,6 +104,34 @@ public StoreHandle resolve(String... keys) { @Override public String toString() { - return uri; + return uri; } + + @Override + @Nullable + public InputStream getInputStream(String[] keys, long start, long end) { + if (start < 0) { + throw new IllegalArgumentException("Argument 'start' needs to be non-negative."); + } + Request request = new Request.Builder().url(resolveKeys(keys)).header( + "Range", String.format("Bytes=%d-%d", start, end - 1)).build(); + Call call = httpClient.newCall(request); + try { + Response response = call.execute(); + ResponseBody body = response.body(); + if (body == null) return null; + InputStream stream = body.byteStream(); + + // Ensure closing the stream also closes the response + return new FilterInputStream(stream) { + @Override + public void close() throws IOException { + super.close(); + body.close(); + } + }; + } catch (IOException e) { + return null; + } + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java index d97cffe..371e413 100644 --- a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java @@ -2,6 +2,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.*; import java.util.concurrent.ConcurrentHashMap; @@ -45,7 +46,7 @@ public ByteBuffer get(String[] keys, long start, long end) { if (bytes == null) return null; if (end < 0) end = bytes.length; if (end > Integer.MAX_VALUE) throw new IllegalArgumentException("End index too large"); - return ByteBuffer.wrap(bytes, (int) start, (int) end); + return ByteBuffer.wrap(bytes, (int) start, (int) (end - start)); } @@ -83,5 +84,14 @@ public StoreHandle resolve(String... keys) { public String toString() { return String.format("", hashCode()); } + + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + byte[] bytes = map.get(resolveKeys(keys)); + if (bytes == null) return null; + if (end < 0) end = bytes.length; + if (end > Integer.MAX_VALUE) throw new IllegalArgumentException("End index too large"); + return new java.io.ByteArrayInputStream(bytes, (int) start, (int)(end - start)); + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java index 49388d7..d7a8404 100644 --- a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java @@ -3,11 +3,13 @@ import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; +import org.apache.commons.io.input.BoundedInputStream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.file.Path; import java.nio.file.Paths; @@ -83,7 +85,8 @@ public ByteBuffer get(String[] keys, long start, long end) { continue; } - if (zis.skip(start) != start) { + long skipResult = zis.skip(start); + if (skipResult != start) { throw new IOException("Failed to skip to start position " + start + " in zip entry " + entryName); } @@ -168,4 +171,38 @@ public Stream list(String[] keys) { } return builder.build(); } + + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + InputStream baseStream = underlyingStore.getInputStream(); + + try { + ZipArchiveInputStream zis = new ZipArchiveInputStream(baseStream); + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { + String entryName = entry.getName(); + + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + if (entry.isDirectory() || !entryName.equals(resolveKeys(keys))) { + continue; + } + + long skipResult = zis.skip(start); + if (skipResult != start) { + throw new IOException("Failed to skip to start position " + start + " in zip entry " + entryName); + } + + long bytesToRead; + if (end != -1) bytesToRead = end - start; + else bytesToRead = Long.MAX_VALUE; + + return new BoundedInputStream(zis, bytesToRead); + } + return null; + } catch (IOException e) { + } + return null; + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/S3Store.java b/src/main/java/dev/zarr/zarrjava/store/S3Store.java index 6ab1452..37eca90 100644 --- a/src/main/java/dev/zarr/zarrjava/store/S3Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/S3Store.java @@ -121,6 +121,17 @@ public StoreHandle resolve(String... keys) { return new StoreHandle(this, keys); } + @Override + public InputStream getInputStream(String[] keys, long start, long end) { + GetObjectRequest req = GetObjectRequest.builder() + .bucket(bucketName) + .key(resolveKeys(keys)) + .range(String.format("bytes=%d-%d", start, end-1)) // S3 range is inclusive + .build(); + ResponseInputStream responseInputStream = s3client.getObject(req); + return responseInputStream; + } + @Override public String toString() { return "s3://" + bucketName + "/" + prefix; diff --git a/src/main/java/dev/zarr/zarrjava/store/Store.java b/src/main/java/dev/zarr/zarrjava/store/Store.java index 451bf79..ecd2242 100644 --- a/src/main/java/dev/zarr/zarrjava/store/Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/Store.java @@ -1,5 +1,6 @@ package dev.zarr.zarrjava.store; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.stream.Stream; import javax.annotation.Nonnull; @@ -42,4 +43,10 @@ default Stream list() { return list(new String[]{}); } } + + InputStream getInputStream(String[] keys, long start, long end); + + default InputStream getInputStream(String[] keys) { + return getInputStream(keys, 0, -1); + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java index e731a39..84665e8 100644 --- a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java +++ b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java @@ -1,6 +1,8 @@ package dev.zarr.zarrjava.store; import dev.zarr.zarrjava.utils.Utils; + +import java.io.InputStream; import java.nio.ByteBuffer; import java.nio.file.NoSuchFileException; import java.nio.file.Path; @@ -44,6 +46,14 @@ public ByteBuffer read(long start, long end) { return store.get(keys, start, end); } + public InputStream getInputStream(int start, int end) { + return store.getInputStream(keys, start, end); + } + + public InputStream getInputStream() { + return store.getInputStream(keys); + } + public void set(ByteBuffer bytes) { store.set(keys, bytes); } diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 620b304..99b373d 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -18,6 +18,8 @@ import software.amazon.awssdk.services.s3.S3Client; import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; @@ -103,6 +105,55 @@ public void testS3StoreGet() throws IOException, ZarrException { } + static Stream inputStreamStores() throws IOException { + String[] s3StoreKeys = new String[]{"zarr_v3", "l4_sample", "color", "1", "zarr.json"}; + StoreHandle s3StoreHandle = new S3Store(S3Client.builder() + .region(Region.of("eu-west-1")) + .credentialsProvider(AnonymousCredentialsProvider.create()) + .build(), "static.webknossos.org", "data") + .resolve(s3StoreKeys); + + byte[] testData = new byte[100]; + for (int i = 0; i < testData.length; i++) { + testData[i] = (byte) i; + } + + StoreHandle memoryStoreHandle = new MemoryStore().resolve(); + memoryStoreHandle.set(ByteBuffer.wrap(testData)); + + StoreHandle fsStoreHandle = new FilesystemStore(TESTOUTPUT.resolve("testInputStreamFS")).resolve("testfile"); + fsStoreHandle.set(ByteBuffer.wrap(testData)); + + zipFile(TESTOUTPUT.resolve("testInputStreamFS"), TESTOUTPUT.resolve("testInputStreamZIP.zip")); + StoreHandle bufferedZipStoreHandle = new BufferedZipStore(TESTOUTPUT.resolve("testInputStreamZIP.zip"), true) + .resolve("testfile"); + + StoreHandle readOnlyZipStoreHandle = new ReadOnlyZipStore(TESTOUTPUT.resolve("testInputStreamZIP.zip")) + .resolve("testfile"); + + StoreHandle httpStoreHandle = new HttpStore("https://static.webknossos.org/data/zarr_v3/l4_sample") + .resolve("color", "1", "zarr.json"); + return Stream.of( + memoryStoreHandle, + s3StoreHandle, + fsStoreHandle, + bufferedZipStoreHandle, + readOnlyZipStoreHandle, + httpStoreHandle + ); + } + + @ParameterizedTest + @MethodSource("inputStreamStores") + public void testStoreInputStream(StoreHandle storeHandle) throws IOException, ZarrException { + InputStream is = storeHandle.getInputStream(10, 20); + byte[] buffer = new byte[10]; + int bytesRead = is.read(buffer); + Assertions.assertEquals(10, bytesRead); + byte[] expectedBuffer = new byte[10]; + storeHandle.read(10, 20).get(expectedBuffer); + Assertions.assertArrayEquals(expectedBuffer, buffer); + } @Test public void testHttpStore() throws IOException, ZarrException { @@ -115,8 +166,7 @@ public void testHttpStore() throws IOException, ZarrException { @ParameterizedTest @CsvSource({"false", "true",}) public void testMemoryStoreV3(boolean useParallel) throws ZarrException, IOException { - int[] testData = new int[1024 * 1024]; - Arrays.setAll(testData, p -> p); + int[] testData = testData(); dev.zarr.zarrjava.v3.Group group = dev.zarr.zarrjava.v3.Group.create(new MemoryStore().resolve()); Array array = group.createArray("array", b -> b @@ -140,8 +190,7 @@ public void testMemoryStoreV3(boolean useParallel) throws ZarrException, IOExcep @ParameterizedTest @CsvSource({"false", "true",}) public void testMemoryStoreV2(boolean useParallel) throws ZarrException, IOException { - int[] testData = new int[1024 * 1024]; - Arrays.setAll(testData, p -> p); + int[] testData = testData(); dev.zarr.zarrjava.v2.Group group = dev.zarr.zarrjava.v2.Group.create(new MemoryStore().resolve()); dev.zarr.zarrjava.v2.Array array = group.createArray("array", b -> b From 7e0e90e8cd81eb269afffd14048db9bde88f7b9d Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 18 Dec 2025 15:18:19 +0100 Subject: [PATCH 24/26] add store.getSize --- .../zarr/zarrjava/store/BufferedZipStore.java | 4 +++ .../zarr/zarrjava/store/FilesystemStore.java | 7 +++++ .../dev/zarr/zarrjava/store/HttpStore.java | 26 +++++++++++++++++++ .../dev/zarr/zarrjava/store/MemoryStore.java | 10 ++++++- .../java/dev/zarr/zarrjava/store/S3Store.java | 13 ++++++++++ .../java/dev/zarr/zarrjava/store/Store.java | 2 ++ .../dev/zarr/zarrjava/store/StoreHandle.java | 4 +++ .../java/dev/zarr/zarrjava/ZarrStoreTest.java | 8 ++++++ 8 files changed, 73 insertions(+), 1 deletion(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index c3466af..c0ac7b3 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -296,6 +296,10 @@ public InputStream getInputStream(String[] keys, long start, long end) { return bufferStore.getInputStream(keys, start, end); } + public long getSize(String[] keys) { + return bufferStore.getSize(keys); + } + @Override public String toString() { return "BufferedZipStore(" + underlyingStore.toString() + ")"; diff --git a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java index 9aeba12..dbc8a83 100644 --- a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java @@ -170,4 +170,11 @@ public InputStream getInputStream(String[] keys, long start, long end) { throw new RuntimeException(e); } } + public long getSize(String[] keys) { + try { + return Files.size(resolveKeys(keys)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/HttpStore.java b/src/main/java/dev/zarr/zarrjava/store/HttpStore.java index 7fb044f..8dcd75b 100644 --- a/src/main/java/dev/zarr/zarrjava/store/HttpStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/HttpStore.java @@ -134,4 +134,30 @@ public void close() throws IOException { return null; } } + @Override + public long getSize(String[] keys) { + // Explicitly request "identity" encoding to prevent OkHttp from adding "gzip" + // and subsequently stripping the Content-Length header. + Request request = new Request.Builder() + .head() + .url(resolveKeys(keys)) + .header("Accept-Encoding", "identity") + .build(); + + Call call = httpClient.newCall(request); + try { + Response response = call.execute(); + if (!response.isSuccessful()) { + throw new IOException("Failed to get size: " + response.code()); + } + + String contentLength = response.header("Content-Length"); + if (contentLength != null) { + return Long.parseLong(contentLength); + } + return -1; + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java index 371e413..09ee39b 100644 --- a/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/MemoryStore.java @@ -93,5 +93,13 @@ public InputStream getInputStream(String[] keys, long start, long end) { if (end > Integer.MAX_VALUE) throw new IllegalArgumentException("End index too large"); return new java.io.ByteArrayInputStream(bytes, (int) start, (int)(end - start)); } -} + @Override + public long getSize(String[] keys) { + byte[] bytes = map.get(resolveKeys(keys)); + if (bytes == null) { + throw new RuntimeException(new java.io.FileNotFoundException("Key not found: " + String.join("/", keys))); + } + return bytes.length; + } +} diff --git a/src/main/java/dev/zarr/zarrjava/store/S3Store.java b/src/main/java/dev/zarr/zarrjava/store/S3Store.java index 37eca90..d112db0 100644 --- a/src/main/java/dev/zarr/zarrjava/store/S3Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/S3Store.java @@ -132,6 +132,19 @@ public InputStream getInputStream(String[] keys, long start, long end) { return responseInputStream; } + @Override + public long getSize(String[] keys) { + HeadObjectRequest req = HeadObjectRequest.builder() + .bucket(bucketName) + .key(resolveKeys(keys)) + .build(); + try { + return s3client.headObject(req).contentLength(); + } catch (NoSuchKeyException e) { + throw new RuntimeException(e); + } + } + @Override public String toString() { return "s3://" + bucketName + "/" + prefix; diff --git a/src/main/java/dev/zarr/zarrjava/store/Store.java b/src/main/java/dev/zarr/zarrjava/store/Store.java index ecd2242..3923bde 100644 --- a/src/main/java/dev/zarr/zarrjava/store/Store.java +++ b/src/main/java/dev/zarr/zarrjava/store/Store.java @@ -49,4 +49,6 @@ default Stream list() { default InputStream getInputStream(String[] keys) { return getInputStream(keys, 0, -1); } + + long getSize(String[] keys); } diff --git a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java index 84665e8..e2c9273 100644 --- a/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java +++ b/src/main/java/dev/zarr/zarrjava/store/StoreHandle.java @@ -73,6 +73,10 @@ public Stream list() { return ((Store.ListableStore) store).list(keys); } + public long getSize() { + return store.getSize(keys); + } + @Override public String toString() { return store + "/" + String.join("/", keys); diff --git a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java index 99b373d..7b12c75 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrStoreTest.java @@ -155,6 +155,14 @@ public void testStoreInputStream(StoreHandle storeHandle) throws IOException, Za Assertions.assertArrayEquals(expectedBuffer, buffer); } + @ParameterizedTest + @MethodSource("inputStreamStores") + public void testStoreGetSize(StoreHandle storeHandle) throws IOException, ZarrException { + long size = storeHandle.getSize(); + long actual_size = storeHandle.read().remaining(); + Assertions.assertEquals(actual_size, size); + } + @Test public void testHttpStore() throws IOException, ZarrException { HttpStore httpStore = new dev.zarr.zarrjava.store.HttpStore("https://static.webknossos.org/data/zarr_v3/l4_sample"); From 086d3f8df66eba6b3a956d0673c1ef0aa33bbcfc Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 18 Dec 2025 15:20:48 +0100 Subject: [PATCH 25/26] improve performance of ReadOnlyZipStore.getArchiveComment --- .../zarr/zarrjava/store/ReadOnlyZipStore.java | 97 ++++++++++++++----- 1 file changed, 75 insertions(+), 22 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java index d7a8404..454b08e 100644 --- a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java @@ -52,28 +52,49 @@ public ByteBuffer get(String[] keys, long start) { } public String getArchiveComment() throws IOException { - ByteBuffer buffer = underlyingStore.read(); - if (buffer == null) { - return null; - } - byte[] bufArray; - if (buffer.hasArray()) { - bufArray = buffer.array(); - } else { - bufArray = new byte[buffer.remaining()]; - buffer.duplicate().get(bufArray); + // Attempt to read from the end of the file to find the EOCD record. + // We try a small chunk first (1KB) which covers most short comments (or no comment), + // then the maximum possible EOCD size (approx 65KB). + int[] readSizes = {1024, 65535 + 22}; + + for (int size : readSizes) { + ByteBuffer buffer; + long fileSize = underlyingStore.getSize(); + + if (fileSize < size){ + buffer = underlyingStore.read(); + } + else { + buffer = underlyingStore.read(fileSize - size); + } + + if (buffer == null) { + return null; + } + + byte[] bufArray; + if (buffer.hasArray()) { + bufArray = buffer.array(); + } else { + bufArray = new byte[buffer.remaining()]; + buffer.duplicate().get(bufArray); + } + + String comment = getZipCommentFromBuffer(bufArray); + if (comment != null) { + return comment; + } } - return getZipCommentFromBuffer(bufArray); + return null; } - @Nullable @Override public ByteBuffer get(String[] keys, long start, long end) { - ByteBuffer buffer = underlyingStore.read(); - if (buffer == null) { + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { return null; } - try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { ZipArchiveEntry entry; while ((entry = zis.getNextEntry()) != null) { String entryName = entry.getName(); @@ -147,11 +168,11 @@ public ReadOnlyZipStore(@Nonnull String underlyingStorePath) { public Stream list(String[] keys) { Stream.Builder builder = Stream.builder(); - ByteBuffer buffer = underlyingStore.read(); - if (buffer == null) { + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { return builder.build(); } - try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { ZipArchiveEntry entry; String prefix = resolveKeys(keys); while ((entry = zis.getNextEntry()) != null) { @@ -166,9 +187,7 @@ public Stream list(String[] keys) { String[] entryKeys = resolveEntryKeys(entryName.substring(prefix.length())); builder.add(entryKeys); } - } catch (IOException e) { - return null; - } + } catch (IOException ignored) {} return builder.build(); } @@ -201,8 +220,42 @@ public InputStream getInputStream(String[] keys, long start, long end) { return new BoundedInputStream(zis, bytesToRead); } return null; + } catch (IOException ignored) {} + return null; + } + + @Override + public long getSize(String[] keys) { + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { + throw new RuntimeException(new IOException("Underlying store input stream is null")); + } + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { + ZipArchiveEntry entry; + while ((entry = zis.getNextEntry()) != null) { + String entryName = entry.getName(); + + if (entryName.startsWith("/")) { + entryName = entryName.substring(1); + } + if (entry.isDirectory() || !entryName.equals(resolveKeys(keys))) { + continue; + } + long size = entry.getSize(); + if (size < 0) { + // read the entire entry to determine size + size = 0; + byte[] bufferArray = new byte[8192]; + int len; + while ((len = zis.read(bufferArray)) != -1) { + size += len; + } + } + return size; + } + throw new RuntimeException(new java.io.FileNotFoundException("Key not found: " + resolveKeys(keys))); } catch (IOException e) { + throw new RuntimeException(e); } - return null; } } From 7d4f4873e31fe7e1e23a961ddb4454f62328898a Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Thu, 18 Dec 2025 18:00:40 +0100 Subject: [PATCH 26/26] inherit zipstores from common parent and reduce buffers in memory in loadBuffer --- .../zarr/zarrjava/store/BufferedZipStore.java | 96 ++++++++++--------- .../zarr/zarrjava/store/FilesystemStore.java | 4 + .../zarr/zarrjava/store/ReadOnlyZipStore.java | 44 +-------- .../dev/zarr/zarrjava/store/ZipStore.java | 81 ++++++++++++++++ .../dev/zarr/zarrjava/utils/ZipUtils.java | 35 ------- 5 files changed, 138 insertions(+), 122 deletions(-) create mode 100644 src/main/java/dev/zarr/zarrjava/store/ZipStore.java delete mode 100644 src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java diff --git a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java index c0ac7b3..c0b72a1 100644 --- a/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/BufferedZipStore.java @@ -8,27 +8,45 @@ import java.nio.ByteBuffer; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Comparator; import java.util.stream.Stream; -import com.fasterxml.jackson.databind.util.ByteBufferBackedInputStream; import org.apache.commons.compress.archivers.zip.*; import java.util.zip.CRC32; import java.util.zip.ZipEntry; // for STORED constant -import static dev.zarr.zarrjava.utils.ZipUtils.getZipCommentFromBuffer; - /** A Store implementation that buffers reads and writes and flushes them to an underlying Store as a zip file. */ -public class BufferedZipStore implements Store, Store.ListableStore { +public class BufferedZipStore extends ZipStore { - private final StoreHandle underlyingStore; private final Store.ListableStore bufferStore; private String archiveComment; - private boolean flushOnWrite; + private final boolean flushOnWrite; + + private final Comparator zipEntryComparator = (a, b) -> { + boolean aIsZarr = a.length > 0 && a[a.length - 1].equals("zarr.json"); + boolean bIsZarr = b.length > 0 && b[b.length - 1].equals("zarr.json"); + // first all zarr.json files + if (aIsZarr && !bIsZarr) { + return -1; + } else if (!aIsZarr && bIsZarr) { + return 1; + } else if (aIsZarr && bIsZarr) { + // sort zarr.json in BFS order within same depth by lexicographical order + if (a.length != b.length) { + return Integer.compare(a.length, b.length); + } else { + return String.join("/", a).compareTo(String.join("/", b)); + } + } else { + // then all other files in lexicographical order + return String.join("/", a).compareTo(String.join("/", b)); + } + }; - private void writeBuffer() throws IOException{ + private void writeBuffer() throws IOException { // create zip file bytes from buffer store and write to underlying store ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(baos)) { @@ -36,30 +54,7 @@ private void writeBuffer() throws IOException{ if (archiveComment != null) { zos.setComment(archiveComment); } - Stream entries = bufferStore.list().sorted( - (a, b) -> { - boolean aIsZarr = a.length > 0 && a[a.length - 1].equals("zarr.json"); - boolean bIsZarr = b.length > 0 && b[b.length - 1].equals("zarr.json"); - // first all zarr.json files - if (aIsZarr && !bIsZarr) { - return -1; - } else if (!aIsZarr && bIsZarr) { - return 1; - } else if (aIsZarr && bIsZarr) { - // sort zarr.json in BFS order within same depth by lexicographical order - if (a.length != b.length) { - return Integer.compare(a.length, b.length); - } else { - return String.join("/", a).compareTo(String.join("/", b)); - } - } else { - // then all other files in lexicographical order - return String.join("/", a).compareTo(String.join("/", b)); - } - } - ); - - entries.forEach(keys -> { + bufferStore.list().sorted(zipEntryComparator).forEach(keys -> { try { if (keys == null || keys.length == 0) { // skip root entry @@ -116,22 +111,32 @@ private void writeBuffer() throws IOException{ underlyingStore.set(ByteBuffer.wrap(zipBytes)); } + public void setArchiveComment(@Nullable String archiveComment) throws IOException { + this.archiveComment = archiveComment; + if (flushOnWrite) { + writeBuffer(); + } + } + + public void deleteArchiveComment() throws IOException { + this.setArchiveComment(null); + } - private void loadBuffer() throws IOException{ - // read zip file bytes from underlying store and populate buffer store - ByteBuffer buffer = underlyingStore.read(); - if (buffer == null) { - return; + /** + * Loads the buffer from the underlying store zip file. + */ + private void loadBuffer() throws IOException { + String loadedArchiveComment = super.getArchiveComment(); + if (loadedArchiveComment != null && this.archiveComment == null) { + // don't overwrite existing archiveComment + this.archiveComment = loadedArchiveComment; } - byte[] bufArray; - if (buffer.hasArray()) { - bufArray = buffer.array(); - } else { - bufArray = new byte[buffer.remaining()]; - buffer.duplicate().get(bufArray); + + InputStream inputStream = underlyingStore.getInputStream(); + if (inputStream == null) { + return; } - this.archiveComment = getZipCommentFromBuffer(bufArray); - try (ZipArchiveInputStream zis = new ZipArchiveInputStream(new ByteBufferBackedInputStream(buffer))) { + try (ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream)) { ZipArchiveEntry entry; while ((entry = zis.getNextEntry()) != null) { if (entry.isDirectory()) { @@ -150,7 +155,7 @@ private void loadBuffer() throws IOException{ } public BufferedZipStore(@Nonnull StoreHandle underlyingStore, @Nonnull Store.ListableStore bufferStore, @Nullable String archiveComment, boolean flushOnWrite) { - this.underlyingStore = underlyingStore; + super(underlyingStore); this.bufferStore = bufferStore; this.archiveComment = archiveComment; this.flushOnWrite = flushOnWrite; @@ -229,6 +234,7 @@ public void flush() throws IOException { writeBuffer(); } + @Override public String getArchiveComment() { return archiveComment; } diff --git a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java index dbc8a83..d8d992d 100644 --- a/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/FilesystemStore.java @@ -153,6 +153,9 @@ public String toString() { public InputStream getInputStream(String[] keys, long start, long end) { Path keyPath = resolveKeys(keys); try { + if (!Files.exists(keyPath)) { + return null; + } InputStream inputStream = Files.newInputStream(keyPath); if (start > 0) { long skipped = inputStream.skip(start); @@ -170,6 +173,7 @@ public InputStream getInputStream(String[] keys, long start, long end) { throw new RuntimeException(e); } } + public long getSize(String[] keys) { try { return Files.size(resolveKeys(keys)); diff --git a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java index 454b08e..7fa2bed 100644 --- a/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java +++ b/src/main/java/dev/zarr/zarrjava/store/ReadOnlyZipStore.java @@ -15,16 +15,12 @@ import java.nio.file.Paths; import java.util.stream.Stream; -import static dev.zarr.zarrjava.utils.ZipUtils.getZipCommentFromBuffer; - /** A Store implementation that provides read-only access to a zip archive stored in an underlying Store. * Compared to BufferedZipStore, this implementation reads directly from the zip archive without parsing * its contents into a buffer store first making it more efficient for read-only access to large zip archives. */ -public class ReadOnlyZipStore implements Store, Store.ListableStore { - - private final StoreHandle underlyingStore; +public class ReadOnlyZipStore extends ZipStore { String resolveKeys(String[] keys) { return String.join("/", keys); @@ -51,42 +47,6 @@ public ByteBuffer get(String[] keys, long start) { return get(keys, start, -1); } - public String getArchiveComment() throws IOException { - // Attempt to read from the end of the file to find the EOCD record. - // We try a small chunk first (1KB) which covers most short comments (or no comment), - // then the maximum possible EOCD size (approx 65KB). - int[] readSizes = {1024, 65535 + 22}; - - for (int size : readSizes) { - ByteBuffer buffer; - long fileSize = underlyingStore.getSize(); - - if (fileSize < size){ - buffer = underlyingStore.read(); - } - else { - buffer = underlyingStore.read(fileSize - size); - } - - if (buffer == null) { - return null; - } - - byte[] bufArray; - if (buffer.hasArray()) { - bufArray = buffer.array(); - } else { - bufArray = new byte[buffer.remaining()]; - buffer.duplicate().get(bufArray); - } - - String comment = getZipCommentFromBuffer(bufArray); - if (comment != null) { - return comment; - } - } - return null; - } @Nullable @Override public ByteBuffer get(String[] keys, long start, long end) { @@ -153,7 +113,7 @@ public String toString() { } public ReadOnlyZipStore(@Nonnull StoreHandle underlyingStore) { - this.underlyingStore = underlyingStore; + super(underlyingStore); } public ReadOnlyZipStore(@Nonnull Path underlyingStore) { diff --git a/src/main/java/dev/zarr/zarrjava/store/ZipStore.java b/src/main/java/dev/zarr/zarrjava/store/ZipStore.java new file mode 100644 index 0000000..5865456 --- /dev/null +++ b/src/main/java/dev/zarr/zarrjava/store/ZipStore.java @@ -0,0 +1,81 @@ +package dev.zarr.zarrjava.store; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; + +public abstract class ZipStore implements Store, Store.ListableStore { + protected final StoreHandle underlyingStore; + + public ZipStore(@Nonnull StoreHandle underlyingStore) { + this.underlyingStore = underlyingStore; + } + + public String getArchiveComment() throws IOException { + // Attempt to read from the end of the file to find the EOCD record. + // We try a small chunk first (1KB) which covers most short comments (or no comment), + // then the maximum possible EOCD size (approx 65KB). + if (!underlyingStore.exists()) { + return null; + } + int[] readSizes = {1024, 65535 + 22}; + + for (int size : readSizes) { + ByteBuffer buffer; + long fileSize = underlyingStore.getSize(); + + if (fileSize < size){ + buffer = underlyingStore.read(); + } + else { + buffer = underlyingStore.read(fileSize - size); + } + + if (buffer == null) { + return null; + } + + byte[] bufArray; + if (buffer.hasArray()) { + bufArray = buffer.array(); + } else { + bufArray = new byte[buffer.remaining()]; + buffer.duplicate().get(bufArray); + } + + String comment = getZipCommentFromBuffer(bufArray); + if (comment != null) { + return comment; + } + } + return null; + } + + // adopted from https://stackoverflow.com/a/9918966 + @Nullable + public static String getZipCommentFromBuffer(byte[] bufArray) throws IOException { + // End of Central Directory (EOCD) record magic number + byte[] EOCD = {0x50, 0x4b, 0x05, 0x06}; + int buffLen = bufArray.length; + // Check the buffer from the end + search: + for (int i = buffLen - EOCD.length - 22; i >= 0; i--) { + for (int k = 0; k < EOCD.length; k++) { + if (bufArray[i + k] != EOCD[k]) { + continue search; + } + } + // End of Central Directory found! + int commentLen = bufArray[i + 20] + bufArray[i + 21] * 256; + int realLen = buffLen - i - 22; + if (commentLen != realLen) { + throw new IOException("ZIP comment size mismatch: " + + "directory says len is " + commentLen + + ", but file ends after " + realLen + " bytes!"); + } + return new String(bufArray, i + 22, commentLen); + } + return null; + } +} \ No newline at end of file diff --git a/src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java b/src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java deleted file mode 100644 index e08d930..0000000 --- a/src/main/java/dev/zarr/zarrjava/utils/ZipUtils.java +++ /dev/null @@ -1,35 +0,0 @@ -package dev.zarr.zarrjava.utils; - -import javax.annotation.Nullable; -import java.io.IOException; - -public class ZipUtils { - - // adopted from https://stackoverflow.com/a/9918966 - @Nullable - public static String getZipCommentFromBuffer(byte[] bufArray) throws IOException { - // End of Central Directory (EOCD) record magic number - byte[] EOCD = {0x50, 0x4b, 0x05, 0x06}; - int buffLen = bufArray.length; - // Check the buffer from the end - search: - for (int i = buffLen - EOCD.length - 22; i >= 0; i--) { - for (int k = 0; k < EOCD.length; k++) { - if (bufArray[i + k] != EOCD[k]) { - continue search; - } - } - // End of Central Directory found! - int commentLen = bufArray[i + 20] + bufArray[i + 21] * 256; - int realLen = buffLen - i - 22; - if (commentLen != realLen) { - throw new IOException("ZIP comment size mismatch: " - + "directory says len is " + commentLen - + ", but file ends after " + realLen + " bytes!"); - } - return new String(bufArray, i + 22, commentLen); - } - return null; - } - -}