diff --git a/NEWS.md b/NEWS.md index f218b93fb..4f14737f2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,8 @@ 8. When fixing duplicate factor levels, `setattr()` no longer crashes upon encountering missing factor values, [#7595](https://github.com/Rdatatable/data.table/issues/7595). Thanks to @sindribaldur for the report and @aitap for the fix. +9. `foverlaps()` no longer crashes due to out-of-bounds access to list and integer vectors when `y` has no rows or the non-range part of the join fails, [#7597](https://github.com/Rdatatable/data.table/issues/7597). Thanks to @nextpagesoft for the report and @aitap for the fix. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 10fd2fc7f..a7ef17bef 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -21484,3 +21484,14 @@ dt = data.table(a=1:4, b=1:2) test(2362.51, optimize=0:2, dt[, c(list()), b, verbose=TRUE], data.table(b=integer(0L)), output="GForce FALSE") test(2362.52, optimize=0:2, dt[, c(lapply(.SD, sum), list()), b, verbose=TRUE], output=out) test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE], output="GForce FALSE") + +# foverlaps shouldn't segfault on 0-row 'y', #7597 +x = data.table(Id = "A", StartX = 1L, EndX = 2L) +y = data.table(Id = character(), StartY = integer(), EndY = integer()) +by.x = c("Id", "StartX", "EndX") +by.y = c("Id", "StartY", "EndY") +setkeyv(y, by.y) +y2 = data.table(Id = "none", StartY = integer(1), EndY = integer(1)) +setkeyv(y2, by.y) +test(2363, foverlaps(x, y, by.x, by.y), foverlaps(x, y2, by.x, by.y)) +rm(x, y, y2) diff --git a/src/ijoin.c b/src/ijoin.c index e81e8325f..37812d321 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -223,7 +223,7 @@ SEXP lookup(SEXP ux, SEXP xlen, SEXP indices, SEXP gaps, SEXP overlaps, SEXP mul SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchArg, SEXP verbose) { - R_len_t uxcols=LENGTH(ux),rows=length(VECTOR_ELT(imatches,0)); + R_len_t uxcols=LENGTH(ux), rows=length(VECTOR_ELT(imatches,0)), xrows=length(VECTOR_ELT(ux,0)); int nomatch = INTEGER(nomatchArg)[0], totlen=0, thislen; int *from = INTEGER(VECTOR_ELT(imatches, 0)); int *to = INTEGER(VECTOR_ELT(imatches, 1)); @@ -252,7 +252,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr // As a first pass get the final length, so that we can allocate up-front and not deal with R_Calloc + R_Realloc + size calculation hassle // Checked the time for this loop on realisitc data (81m reads) and took 0.27 seconds! No excuses ;). start = clock(); - if (mult == ALL) { + if (xrows && mult == ALL) { totlen=0; switch (type) { case START: case END: @@ -340,7 +340,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr // switching mult=ALL,FIRST,LAST separately to // - enhance performance for special cases, and // - easy to fix any bugs in the future - switch (mult) { + if (xrows) switch (mult) { case ALL: switch (type) { case START : case END : @@ -402,8 +402,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr case ANY : for (int i=0; i0) ? from[i] : 1; - const int k = from[i]; + const int k = (from[i]>0) ? from[i] : 1; if (k<=to[i]) { tmp1 = VECTOR_ELT(lookup, k-1); for (int m=0; m0) ? from[i] : 1; - const int k = from[i]; + const int k = (from[i]>0) ? from[i] : 1; if (k <= to[i]) { if (k==to[i] && count[k-1]) { tmp1 = VECTOR_ELT(lookup, k-1); @@ -723,6 +721,10 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr } break; default: internal_error(__func__, "unknown mult: %d", mult); // # nocov + } else if (totlen) { + int *f1i = INTEGER(f1__), *f2i = INTEGER(f2__); + for (R_len_t i = 0; i < totlen; ++i) f1i[i] = i+1; + for (R_len_t i = 0; i < totlen; ++i) f2i[i] = NA_INTEGER; } end2 = clock() - start; if (LOGICAL(verbose)[0])