Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -3410,6 +3410,18 @@ is_constantish = function(q, check_singleton=FALSE) {

.gforce_ops = c("+", "-", "*", "/", "^", "%%", "%/%")

# Outer transformations that can wrap GForce-optimizable expressions
# e.g., sqrt(min(x)) should be optimized to sqrt(gmin(x))
# for the moment we only include unary elementwise functions
.gforce_outer_trans = c("sqrt", "abs", "sign", "floor", "ceiling",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what goes wrong if we just let any generic foo() through here? As long as the "innermost" function is GForce-able

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Things like min(min(x)) should already be quite problematic...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our own test suite dislikes e.g. .Internal(mean(x))

"log", "log10", "log2", "log1p", "exp", "expm1",
"cos", "sin", "tan", "acos", "asin", "atan",
"cospi", "sinpi", "tanpi",
"cosh", "sinh", "tanh", "acosh", "asinh", "atanh",
"gamma", "lgamma", "digamma", "trigamma",
"is.na", "is.nan", "is.finite", "is.infinite",
"trunc", "round", "signif")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hello:
By setdiff(methods::getGroupMembers("Math"), .gforce_outer_trans) you can add:

"trunc" "cummax" "cummin" "cumprod" "cumsum"
"cospi" "sinpi" "tanpi"
"gamma" "lgamma" "digamma" "trigamma"

Note "log" can take "base = ..." as first argument. trunc can take ... also.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cumulative functions won't work. Maybe unary functions was too unprecise. I have added an "elementwise".

Binary functions will optimize as long ppl provide constantish parameters and not variables for the parameters, e.g. log(min(x), 2) works while t<-2; log(min(x), 2) does not

.unwrap_conversions = function(expr) {
while (.is_type_conversion(expr) && length(expr) >= 2L) expr = expr[[2L]]
expr
Expand All @@ -3435,8 +3447,8 @@ is_constantish = function(q, check_singleton=FALSE) {
))
}

# check if arithmetic operator -> recursively validate ALL branches (like in AST)
if (is.symbol(q[[1L]]) && q[[1L]] %chin% .gforce_ops) {
# check if arithmetic operator or outer transformation -> recursively validate ALL branches (like in AST)
if (is.symbol(q[[1L]]) && q[[1L]] %chin% c(.gforce_ops, .gforce_outer_trans)) {
for (i in 2:length(q)) {
if (!.gforce_ok(q[[i]], x, envir)) return(FALSE)
}
Expand Down Expand Up @@ -3467,8 +3479,8 @@ is_constantish = function(q, check_singleton=FALSE) {
return(q)
}

# if arithmetic operator, recursively substitute its operands. we know what branches are valid from .gforce_ok
if (is.symbol(q[[1L]]) && q[[1L]] %chin% .gforce_ops) {
# if arithmetic operator or outer transformation, recursively substitute its operands. we know what branches are valid from .gforce_ok
if (is.symbol(q[[1L]]) && q[[1L]] %chin% c(.gforce_ops, .gforce_outer_trans)) {
for (i in 2:length(q)) {
q[[i]] = .gforce_jsub(q[[i]], names_x, envir)
}
Expand Down
19 changes: 17 additions & 2 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -14743,7 +14743,7 @@ DT = data.table(
Date2 = .Date(c(17459.1561177987, 17451.1086757995, 17449.0820898537, 17443.1175238448, 17461.0463715783, 17448.1033968224))
)
DT[ , DiffTime := abs(difftime(Date1, Date2, units = 'days'))]
test(2042.4, DT[ , round(mean(DiffTime)), by=Group, verbose=TRUE],
test(2042.4, DT[ , base::round(mean(DiffTime)), by=Group, verbose=TRUE],
data.table(Group=c("A", "B", "C"), V1=structure(c(16, 8, 12), class="difftime", units="days")),
output="Old mean optimization is on, left j unchanged.*GForce.*FALSE")

Expand Down Expand Up @@ -21465,7 +21465,7 @@ test(2362.32, optimize=0:2, dt[, .((max(a) - min(a)) / (max(a) + min(a))), by=b,
test(2362.33, optimize=0:2, dt[, sum(a) / .N, b, verbose=TRUE], output=out)
test(2362.34, optimize=0:2, dt[, mean(a) * 2L + sum(a), b, verbose=TRUE], output=out)
test(2362.35, optimize=0:2, dt[, list(range=max(a)-min(a), avg=mean(a)), by=b, verbose=TRUE], output=out)
test(2362.36, optimize=0:2, dt[, .(max(a)-sqrt(min(a))), by=b, verbose=TRUE], output="GForce FALSE")
test(2362.36, optimize=0:2, dt[, .(max(a)-sqrt(min(a))), by=b, verbose=TRUE], output=out)
test(2362.37, optimize=0:2, dt[, sum(a) %% 2, b, verbose=TRUE], output=out)
test(2362.38, optimize=0:2, dt[, sum(a) %/% 2, b, verbose=TRUE], output=out)
test(2362.39, optimize=0:2, dt[, -sum(a), b, verbose=TRUE], output=out)
Expand All @@ -21484,3 +21484,18 @@ dt = data.table(a=1:4, b=1:2)
test(2362.51, optimize=0:2, dt[, c(list()), b, verbose=TRUE], data.table(b=integer(0L)), output="GForce FALSE")
test(2362.52, optimize=0:2, dt[, c(lapply(.SD, sum), list()), b, verbose=TRUE], output=out)
test(2362.53, optimize=0:2, dt[, list(lapply(.SD, sum), list()), b, verbose=TRUE], output="GForce FALSE")

# outer transformations with GForce, #7594
dt = data.table(x = 1:2, y = 1:10)
out = c('GForce FALSE', 'GForce TRUE')
test(2363.01, optimize=1:2, dt[, sqrt(min(y)), by=x, verbose=TRUE], output=out)
test(2363.02, optimize=1:2, dt[, mean(y)^2, by=x, verbose=TRUE], output=out)
test(2363.03, optimize=1:2, dt[, sqrt(abs(min(y))), by=x, verbose=TRUE], output=out)
test(2363.04, optimize=1:2, dt[, sqrt(min(y)) + 1, by=x, verbose=TRUE], output=out)
test(2363.05, optimize=1:2, dt[, floor(mean(y)), by=x, verbose=TRUE], output=out)
# Transformation around non-GForce expression should NOT optimize
test(2363.11, optimize=2L, dt[, sqrt(y), by=x, verbose=TRUE], output="GForce FALSE")
test(2363.12, optimize=2L, dt[, log1p(abs(y)), by=x, verbose=TRUE], output="GForce FALSE")
dt = data.table(x = 1:2, y = c(NA, NA, NaN, Inf, 1:4))
test(2363.21, optimize=1:2, dt[, is.na(sum(y)), by=x, verbose=TRUE], output=out)
test(2363.22, optimize=1:2, dt[, is.finite(sum(y, na.rm=TRUE)), by=x, verbose=TRUE], output=out)
Loading