Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ sift/
*.bin
*.out
venv/
.venv

vendor/
dist/
Expand Down
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ endif

ifdef CONFIG_DARWIN
LOADABLE_EXTENSION=dylib
# Let unresolved SQLite symbols resolve against host at load time
# This is standard for SQLite loadable extensions on macOS.
CFLAGS += -undefined dynamic_lookup
endif

ifdef CONFIG_LINUX
Expand Down Expand Up @@ -193,6 +196,10 @@ test-loadable: loadable
test-loadable-snapshot-update: loadable
$(PYTHON) -m pytest -vv tests/test-loadable.py --snapshot-update

# Update snapshots for all loadable tests (use after intentional behavior changes)
test-snapshots-update: loadable
$(PYTHON) -m pytest -vv tests/test-*.py --snapshot-update

test-loadable-watch:
watchexec --exts c,py,Makefile --clear -- make test-loadable

Expand Down
103 changes: 103 additions & 0 deletions sqlite-vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -8434,6 +8434,101 @@ int vec0Update_Delete_DeleteRowids(vec0_vtab *p, i64 rowid) {
return rc;
}

// Clear the rowid slot in v_chunks.rowids for the given chunk/offset
int vec0Update_Delete_ClearRowid(vec0_vtab *p, i64 chunk_id, i64 chunk_offset) {
int rc;
sqlite3_blob *blobChunksRowids = NULL;

rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowChunksName, "rowids",
chunk_id, 1, &blobChunksRowids);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "could not open rowids blob for %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_id);
return SQLITE_ERROR;
}

i64 expected = p->chunk_size * sizeof(i64);
i64 actual = sqlite3_blob_bytes(blobChunksRowids);
if (expected != actual) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"rowids blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
p->schemaName, p->shadowChunksName, chunk_id, expected, actual);
sqlite3_blob_close(blobChunksRowids);
return SQLITE_ERROR;
}

i64 zero = 0;
rc = sqlite3_blob_write(blobChunksRowids, &zero, sizeof(i64),
chunk_offset * sizeof(i64));
int brc = sqlite3_blob_close(blobChunksRowids);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "could not write rowids blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_id);
return rc;
}
if (brc != SQLITE_OK) {
vtab_set_error(&p->base,
"could not close rowids blob on %s.%s.%lld",
p->schemaName, p->shadowChunksName, chunk_id);
return brc;
}
return SQLITE_OK;
}

// Clear the vector bytes for each vector column at the given chunk/offset
int vec0Update_Delete_ClearVectors(vec0_vtab *p, i64 chunk_id, i64 chunk_offset) {
for (int i = 0; i < p->numVectorColumns; i++) {
int rc;
sqlite3_blob *blobVectors = NULL;

rc = sqlite3_blob_open(p->db, p->schemaName, p->shadowVectorChunksNames[i],
"vectors", chunk_id, 1, &blobVectors);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "Could not open vectors blob for %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
return rc;
}

i64 expected = p->chunk_size * vector_column_byte_size(p->vector_columns[i]);
i64 actual = sqlite3_blob_bytes(blobVectors);
if (expected != actual) {
vtab_set_error(&p->base,
VEC_INTERAL_ERROR
"vector blob size mismatch on %s.%s.%lld. Expected %lld, actual %lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id, expected, actual);
sqlite3_blob_close(blobVectors);
return SQLITE_ERROR;
}

size_t nbytes = vector_column_byte_size(p->vector_columns[i]);
void *zeros = sqlite3_malloc(nbytes);
if (!zeros) {
sqlite3_blob_close(blobVectors);
return SQLITE_NOMEM;
}
memset(zeros, 0, nbytes);
rc = vec0_write_vector_to_vector_blob(blobVectors, chunk_offset, zeros,
p->vector_columns[i].dimensions,
p->vector_columns[i].element_type);
sqlite3_free(zeros);

int brc = sqlite3_blob_close(blobVectors);
if (rc != SQLITE_OK) {
vtab_set_error(&p->base, "Could not write to vectors blob for %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
return rc;
}
if (brc != SQLITE_OK) {
vtab_set_error(&p->base,
"Could not commit blob transaction for vectors blob for %s.%s.%lld",
p->schemaName, p->shadowVectorChunksNames[i], chunk_id);
return brc;
}
}
return SQLITE_OK;
}

int vec0Update_Delete_DeleteAux(vec0_vtab *p, i64 rowid) {
int rc;
sqlite3_stmt *stmt = NULL;
Expand Down Expand Up @@ -8574,9 +8669,17 @@ int vec0Update_Delete(sqlite3_vtab *pVTab, sqlite3_value *idValue) {

// 3. zero out rowid in chunks.rowids
// https://github.com/asg017/sqlite-vec/issues/54
rc = vec0Update_Delete_ClearRowid(p, chunk_id, chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}

// 4. zero out any data in vector chunks tables
// https://github.com/asg017/sqlite-vec/issues/54
rc = vec0Update_Delete_ClearVectors(p, chunk_id, chunk_offset);
if (rc != SQLITE_OK) {
return rc;
}

// 5. delete from _rowids table
rc = vec0Update_Delete_DeleteRowids(p, rowid);
Expand Down
4 changes: 2 additions & 2 deletions tests/__snapshots__/test-auxiliary.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
'chunk_id': 1,
'size': 8,
'validity': b'\x06',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
Expand All @@ -163,7 +163,7 @@
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
'vectors': b'\x00\x00\x00\x00\x00\x00\x00@\x00\x00@@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
Expand Down
20 changes: 10 additions & 10 deletions tests/__snapshots__/test-general.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -126,47 +126,47 @@
'rows': list([
OrderedDict({
'schema': 'main',
'name': 'v_auxiliary',
'name': 'v_metadatatext00',
'type': 'shadow',
'ncol': 2,
'wr': 0,
'strict': 0,
}),
OrderedDict({
'schema': 'main',
'name': 'v_chunks',
'name': 'v_metadatachunks00',
'type': 'shadow',
'ncol': 6,
'ncol': 2,
'wr': 0,
'strict': 0,
}),
OrderedDict({
'schema': 'main',
'name': 'v_info',
'name': 'v_rowids',
'type': 'shadow',
'ncol': 2,
'ncol': 4,
'wr': 0,
'strict': 0,
}),
OrderedDict({
'schema': 'main',
'name': 'v_rowids',
'name': 'v_auxiliary',
'type': 'shadow',
'ncol': 4,
'ncol': 2,
'wr': 0,
'strict': 0,
}),
OrderedDict({
'schema': 'main',
'name': 'v_metadatachunks00',
'name': 'v_chunks',
'type': 'shadow',
'ncol': 2,
'ncol': 6,
'wr': 0,
'strict': 0,
}),
OrderedDict({
'schema': 'main',
'name': 'v_metadatatext00',
'name': 'v_info',
'type': 'shadow',
'ncol': 2,
'wr': 0,
Expand Down
8 changes: 4 additions & 4 deletions tests/__snapshots__/test-metadata.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
'chunk_id': 1,
'size': 8,
'validity': b'\x02',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
Expand Down Expand Up @@ -89,7 +89,7 @@
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
'vectors': b'\x00\x00\x00\x00""""\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
Expand Down Expand Up @@ -264,7 +264,7 @@
'chunk_id': 1,
'size': 8,
'validity': b'\x06',
'rowids': b'\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
'rowids': b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
Expand Down Expand Up @@ -335,7 +335,7 @@
'rows': list([
OrderedDict({
'rowid': 1,
'vectors': b'\x11\x11\x11\x11""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
'vectors': b'\x00\x00\x00\x00""""3333\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00',
}),
]),
}),
Expand Down
106 changes: 106 additions & 0 deletions tests/test-delete-clears-bytes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import os


def test_delete_clears_rowid_and_vectors():
try:
import pysqlite3 as sqlite3 # uses bundled modern SQLite with extension loading
except ImportError: # fallback if not available
import sqlite3

db = sqlite3.connect(":memory:")
db.row_factory = sqlite3.Row
if hasattr(db, "enable_load_extension"):
db.enable_load_extension(True)
ext = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "dist", "vec0"))
try:
# Explicit entrypoint to avoid relying on default name
db.load_extension(ext, "sqlite3_vec_init")
except Exception:
# Some loaders accept missing suffix path without explicit entrypoint
db.load_extension(ext)

# One vector column with 1 dimension (4 bytes per vector), chunk_size=8
db.execute("create virtual table v using vec0(vector float[1], chunk_size=8)")

# Insert two rows with distinct raw vector bytes
db.execute(
"insert into v(rowid, vector) values (?, ?)",
[1, b"\x11\x11\x11\x11"],
)
db.execute(
"insert into v(rowid, vector) values (?, ?)",
[2, b"\x22\x22\x22\x22"],
)

# Sanity check pre-delete: validity has first two bits set (0b00000011)
row = db.execute("select validity, rowids from v_chunks").fetchone()
assert row is not None
assert row[0] == b"\x03"

# Delete rowid=1
db.execute("delete from v where rowid = 1")

# After delete, validity should only have bit 1 set (0b00000010)
row = db.execute("select validity, rowids from v_chunks").fetchone()
assert row[0] == b"\x02"

# Rowids BLOB: first 8 bytes (slot 0) must be zero; second (slot 1) must be rowid=2
rowids = row[1]
assert isinstance(rowids, (bytes, bytearray))
assert len(rowids) == 8 * 8 # chunk_size * sizeof(i64)
assert rowids[0:8] == b"\x00" * 8
assert rowids[8:16] == b"\x02\x00\x00\x00\x00\x00\x00\x00"

# Vectors BLOB for the first (and only) vector column
vectors_row = db.execute("select vectors from v_vector_chunks00").fetchone()
vectors = vectors_row[0]
# chunk_size (8) * 4 bytes per float32 = 32 bytes
assert len(vectors) == 32
# Slot 0 cleared to zeros, slot 1 left as inserted (0x22 0x22 0x22 0x22)
assert vectors[0:4] == b"\x00\x00\x00\x00"
assert vectors[4:8] == b"\x22\x22\x22\x22"


def test_vacuum_shrinks_file(tmp_path):
try:
import pysqlite3 as sqlite3
except ImportError:
import sqlite3

db_path = tmp_path / "vacuum_vec.db"

con = sqlite3.connect(str(db_path))
con.row_factory = sqlite3.Row
if hasattr(con, "enable_load_extension"):
con.enable_load_extension(True)
ext = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "dist", "vec0"))
try:
con.load_extension(ext)
except Exception:
# Some platforms require the full filename or default entrypoint; fallback already tried
con.load_extension(ext)

# Use a larger chunk_size to inflate file size more clearly
con.execute("create virtual table v using vec0(vector float[1], chunk_size=4096)")

# Insert a decent number of rows to grow the DB
N = 10000
con.executemany(
"insert into v(rowid, vector) values(?, ?)",
((i, b"\x11\x11\x11\x11") for i in range(1, N + 1)),
)
con.commit()

size_after_insert = os.stat(db_path).st_size
assert size_after_insert > 0

# Drop the table to free its pages, then VACUUM to rewrite/shrink the file
con.execute("drop table v")
con.commit()
con.execute("VACUUM")
con.close()

size_after_vacuum = os.stat(db_path).st_size

# File should shrink after dropping the table and VACUUM
assert size_after_vacuum < size_after_insert