From 327849e0486035a6cc55fd156ca530147b6c54bb Mon Sep 17 00:00:00 2001 From: Ivan K Date: Thu, 15 Jan 2026 14:53:46 +0300 Subject: [PATCH 01/11] Add tests --- inst/tests/tests.Rraw | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 10fd2fc7f..a7ef17bef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21484,3 +21484,14 @@ dt = data.table(a=1:4, b=1:2) test(2362.51, optimize=0:2, dt[, c(list()), b, verbose=TRUE], data.table(b=integer(0L)), output="GForce FALSE") test(2362.52, optimize=0:2, dt[, c(lapply(.SD, sum), list()), b, verbose=TRUE], output=out) test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE], output="GForce FALSE") + +# foverlaps shouldn't segfault on 0-row 'y', #7597 +x = data.table(Id = "A", StartX = 1L, EndX = 2L) +y = data.table(Id = character(), StartY = integer(), EndY = integer()) +by.x = c("Id", "StartX", "EndX") +by.y = c("Id", "StartY", "EndY") +setkeyv(y, by.y) +y2 = data.table(Id = "none", StartY = integer(1), EndY = integer(1)) +setkeyv(y2, by.y) +test(2363, foverlaps(x, y, by.x, by.y), foverlaps(x, y2, by.x, by.y)) +rm(x, y, y2) From e2b19eaa248bcaa0c3d0badf35c1aa045075130f Mon Sep 17 00:00:00 2001 From: Ivan K Date: Thu, 15 Jan 2026 14:35:01 +0300 Subject: [PATCH 02/11] overlaps: avoid accessing length-0 vectors in ux If 'ux' contains 0 rows, pretend that all comparisons against its non-existent elements fail. --- src/ijoin.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ijoin.c b/src/ijoin.c index e81e8325f..80bc3395a 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -223,7 +223,7 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchArg, SEXP verbose) { - R_len_t uxcols=LENGTH(ux),rows=length(VECTOR_ELT(imatches,0)); + R_len_t uxcols=LENGTH(ux), rows=length(VECTOR_ELT(imatches,0)), xrows=length(VECTOR_ELT(ux,0)); int nomatch = INTEGER(nomatchArg)[0], totlen=0, thislen; int *from = INTEGER(VECTOR_ELT(imatches, 0)); int *to = INTEGER(VECTOR_ELT(imatches, 1)); @@ -252,7 +252,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr // As a first pass get the final length, so that we can allocate up-front and not deal with R_Calloc + R_Realloc + size calculation hassle // Checked the time for this loop on realisitc data (81m reads) and took 0.27 seconds! No excuses ;). start = clock(); - if (mult == ALL) { + if (xrows && mult == ALL) { totlen=0; switch (type) { case START: case END: @@ -340,7 +340,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr // switching mult=ALL,FIRST,LAST separately to // - enhance performance for special cases, and // - easy to fix any bugs in the future - switch (mult) { + if (xrows) switch (mult) { case ALL: switch (type) { case START : case END : @@ -723,6 +723,10 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr } break; default: internal_error(__func__, "unknown mult: %d", mult); // # nocov + } else if (totlen) { + int *f1i = INTEGER(f1__), *f2i = INTEGER(f2__); + for (R_len_t i = 0; i < totlen; ++i) f1i[i] = i+1; + for (R_len_t i = 0; i < totlen; ++i) f2i[i] = NA_INTEGER; } end2 = clock() - start; if (LOGICAL(verbose)[0]) From 354c3db2dff692ee51eab66507474b8e2d9deaec Mon Sep 17 00:00:00 2001 From: Ivan K Date: Thu, 15 Jan 2026 14:52:33 +0300 Subject: [PATCH 03/11] overlaps: avoid 'lookup' list overflow This used to happen when from[i] was 0. (No match on non-range columns?) --- src/ijoin.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ijoin.c b/src/ijoin.c index 80bc3395a..37812d321 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -402,8 +402,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr case ANY : for (int i=0; i0) ? from[i] : 1; - const int k = from[i]; + const int k = (from[i]>0) ? from[i] : 1; if (k<=to[i]) { tmp1 = VECTOR_ELT(lookup, k-1); for (int m=0; m0) ? from[i] : 1; - const int k = from[i]; + const int k = (from[i]>0) ? from[i] : 1; if (k <= to[i]) { if (k==to[i] && count[k-1]) { tmp1 = VECTOR_ELT(lookup, k-1); From 9688a353ad602551e07f046693648b15eb3890bd Mon Sep 17 00:00:00 2001 From: Ivan K Date: Thu, 15 Jan 2026 14:56:38 +0300 Subject: [PATCH 04/11] NEWS entry --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index f218b93fb..4f14737f2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,8 @@ 8. When fixing duplicate factor levels, `setattr()` no longer crashes upon encountering missing factor values, [#7595](https://github.com/Rdatatable/data.table/issues/7595). Thanks to @sindribaldur for the report and @aitap for the fix. +9. `foverlaps()` no longer crashes due to out-of-bounds access to list and integer vectors when `y` has no rows or the non-range part of the join fails, [#7597](https://github.com/Rdatatable/data.table/issues/7597). Thanks to @nextpagesoft for the report and @aitap for the fix. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). From bc73eba92b4cdac3af0a16e11854ec68e86da65f Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sun, 18 Jan 2026 22:34:49 +0300 Subject: [PATCH 05/11] overlaps: uncomment one more underflow test Technically this one was harmless (and thus not caught by sanitizers) because the preceding VECSEXP header always contained a 0, preventing the branch where VECTOR_ELT() would be called with a negative index. --- inst/tests/tests.Rraw | 3 ++- src/ijoin.c | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a7ef17bef..f793bc44c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21493,5 +21493,6 @@ by.y = c("Id", "StartY", "EndY") setkeyv(y, by.y) y2 = data.table(Id = "none", StartY = integer(1), EndY = integer(1)) setkeyv(y2, by.y) -test(2363, foverlaps(x, y, by.x, by.y), foverlaps(x, y2, by.x, by.y)) +test(2363.1, foverlaps(x, y, by.x, by.y), foverlaps(x, y2, by.x, by.y)) +test(2363.2, foverlaps(x, y2, by.x, by.y, type="any", mult="all"), foverlaps(x, y2, by.x, by.y, type="any", mult="first")) rm(x, y, y2) diff --git a/src/ijoin.c b/src/ijoin.c index 37812d321..36b092466 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -521,8 +521,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr for (int i=0; i0) ? from[i] : 1; - const int k = from[i]; + const int k = (from[i]>0) ? from[i] : 1; for (int j=k; j<=to[i]; ++j) { if (type_count[j-1]) { tmp2 = VECTOR_ELT(type_lookup, j-1); From d1139399ded1692dde7562cdf0cc55c0a3b0dffd Mon Sep 17 00:00:00 2001 From: Ivan K Date: Sun, 18 Jan 2026 22:38:30 +0300 Subject: [PATCH 06/11] test formatting --- inst/tests/tests.Rraw | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f793bc44c..96e06df2d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21486,12 +21486,12 @@ test(2362.52, optimize=0:2, dt[, c(lapply(.SD, sum), list()), b, verbose=TRUE], test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE], output="GForce FALSE") # foverlaps shouldn't segfault on 0-row 'y', #7597 -x = data.table(Id = "A", StartX = 1L, EndX = 2L) -y = data.table(Id = character(), StartY = integer(), EndY = integer()) +x = data.table(Id="A", StartX=1L, EndX=2L) +y = data.table(Id=character(), StartY=integer(), EndY=integer()) by.x = c("Id", "StartX", "EndX") by.y = c("Id", "StartY", "EndY") setkeyv(y, by.y) -y2 = data.table(Id = "none", StartY = integer(1), EndY = integer(1)) +y2 = data.table(Id="none", StartY=integer(1), EndY=integer(1)) setkeyv(y2, by.y) test(2363.1, foverlaps(x, y, by.x, by.y), foverlaps(x, y2, by.x, by.y)) test(2363.2, foverlaps(x, y2, by.x, by.y, type="any", mult="all"), foverlaps(x, y2, by.x, by.y, type="any", mult="first")) From 2c8ebebfc1be92f202fa80a968f98bc66051351e Mon Sep 17 00:00:00 2001 From: aitap Date: Mon, 19 Jan 2026 08:58:04 +0000 Subject: [PATCH 07/11] Update src/ijoin.c Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- src/ijoin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ijoin.c b/src/ijoin.c index 36b092466..cb845a563 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -557,7 +557,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr ++thislen; ++j; ++m; break; } else if ( INTEGER(tmp1)[j] > INTEGER(tmp2)[m] ) { - ++m;; + ++m; } else ++j; } } From f9b722fb24f383e9fcd7a5aa86833d6a55fa9262 Mon Sep 17 00:00:00 2001 From: aitap Date: Mon, 19 Jan 2026 09:00:57 +0000 Subject: [PATCH 08/11] Update src/ijoin.c Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- src/ijoin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ijoin.c b/src/ijoin.c index cb845a563..9601375b2 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -723,7 +723,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr } else if (totlen) { int *f1i = INTEGER(f1__), *f2i = INTEGER(f2__); for (R_len_t i = 0; i < totlen; ++i) f1i[i] = i+1; - for (R_len_t i = 0; i < totlen; ++i) f2i[i] = NA_INTEGER; + for (R_len_t i = 0; i < totlen; ++i) f2i[i] = nomatch; } end2 = clock() - start; if (LOGICAL(verbose)[0]) From 3a7f9b455b9732a004ee3ed076c4bde3384f6d44 Mon Sep 17 00:00:00 2001 From: aitap Date: Mon, 19 Jan 2026 09:04:38 +0000 Subject: [PATCH 09/11] Update src/ijoin.c Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- src/ijoin.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ijoin.c b/src/ijoin.c index 9601375b2..c26c3d1ad 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -253,7 +253,6 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr // Checked the time for this loop on realisitc data (81m reads) and took 0.27 seconds! No excuses ;). start = clock(); if (xrows && mult == ALL) { - totlen=0; switch (type) { case START: case END: for (int i=0; i Date: Mon, 19 Jan 2026 12:14:31 +0300 Subject: [PATCH 10/11] Update inst/tests/tests.Rraw --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 96e06df2d..4156e541c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21495,4 +21495,5 @@ y2 = data.table(Id="none", StartY=integer(1), EndY=integer(1)) setkeyv(y2, by.y) test(2363.1, foverlaps(x, y, by.x, by.y), foverlaps(x, y2, by.x, by.y)) test(2363.2, foverlaps(x, y2, by.x, by.y, type="any", mult="all"), foverlaps(x, y2, by.x, by.y, type="any", mult="first")) +test(2363.3, foverlaps(x, y, by.x, by.y, which=TRUE, mult="first", nomatch=NULL), foverlaps(x, y2, by.x, by.y, which=TRUE, mult="first", nomatch=NULL)) rm(x, y, y2) From eab8609bae91fcc7ebc921ab403147c9982fdfd3 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Mon, 19 Jan 2026 12:24:22 +0300 Subject: [PATCH 11/11] overlaps: uncomment the remaining underflow test The underflow is covered by already existing tests. --- src/ijoin.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/ijoin.c b/src/ijoin.c index c26c3d1ad..737ac61b6 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -286,8 +286,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr case ANY: for (int i=0; i 0) ? from[i] : 1; - const int k = from[i]; + const int k = (from[i] > 0) ? from[i] : 1; if (k<=to[i]) totlen += count[k-1]; for (int j=k+1; j<=to[i]; ++j)