From b8dfee63fa0b5dee1e29aa1385bed7074b84898a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 26 Jan 2026 22:59:26 +0900 Subject: [PATCH] GH-48985: [GLib][Ruby] Fix GC problems in node options and expressions --- c_glib/arrow-glib/compute.cpp | 196 +++++++++++++++++++++++++-- c_glib/arrow-glib/compute.h | 8 ++ c_glib/arrow-glib/expression.cpp | 209 +++++++++++++++++++++++++++-- c_glib/arrow-glib/expression.h | 3 + c_glib/arrow-glib/expression.hpp | 12 ++ ruby/red-arrow/ext/arrow/arrow.cpp | 36 +++++ 6 files changed, 442 insertions(+), 22 deletions(-) diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index ca43d4e0f17..745d3e567e4 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -1274,9 +1274,71 @@ garrow_source_node_options_new_table(GArrowTable *table) return options; } -G_DEFINE_TYPE(GArrowFilterNodeOptions, - garrow_filter_node_options, - GARROW_TYPE_EXECUTE_NODE_OPTIONS) +enum { + PROP_EXPRESSION = 1, +}; + +struct GArrowFilterNodeOptionsPrivate +{ + GArrowExpression *expression; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFilterNodeOptions, + garrow_filter_node_options, + GARROW_TYPE_EXECUTE_NODE_OPTIONS) + +#define GARROW_FILTER_NODE_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_filter_node_options_get_instance_private(GARROW_FILTER_NODE_OPTIONS(object))) + +static void +garrow_filter_node_options_dispose(GObject *object) +{ + auto priv = GARROW_FILTER_NODE_OPTIONS_GET_PRIVATE(object); + + if (priv->expression) { + g_object_unref(priv->expression); + priv->expression = nullptr; + } + + G_OBJECT_CLASS(garrow_filter_node_options_parent_class)->dispose(object); +} + +static void +garrow_filter_node_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FILTER_NODE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_EXPRESSION: + priv->expression = GARROW_EXPRESSION(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_filter_node_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FILTER_NODE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_EXPRESSION: + g_value_set_object(value, priv->expression); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} static void garrow_filter_node_options_init(GArrowFilterNodeOptions *object) @@ -1286,6 +1348,28 @@ garrow_filter_node_options_init(GArrowFilterNodeOptions *object) static void garrow_filter_node_options_class_init(GArrowFilterNodeOptionsClass *klass) { + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_filter_node_options_dispose; + gobject_class->set_property = garrow_filter_node_options_set_property; + gobject_class->get_property = garrow_filter_node_options_get_property; + + GParamSpec *spec; + + /** + * GArrowFilterNodeOptions:expression: + * + * The expression of this filter. + * + * Since: 24.0.0 + */ + spec = g_param_spec_object( + "expression", + "Expression", + "The expression of this filter", + GARROW_TYPE_EXPRESSION, + static_cast(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec); } /** @@ -1301,14 +1385,39 @@ garrow_filter_node_options_new(GArrowExpression *expression) { auto arrow_expression = garrow_expression_get_raw(expression); auto arrow_options = new arrow::acero::FilterNodeOptions(*arrow_expression); - auto options = - g_object_new(GARROW_TYPE_FILTER_NODE_OPTIONS, "options", arrow_options, NULL); + auto options = g_object_new(GARROW_TYPE_FILTER_NODE_OPTIONS, + "options", + arrow_options, + "expression", + expression, + nullptr); return GARROW_FILTER_NODE_OPTIONS(options); } -G_DEFINE_TYPE(GArrowProjectNodeOptions, - garrow_project_node_options, - GARROW_TYPE_EXECUTE_NODE_OPTIONS) +struct GArrowProjectNodeOptionsPrivate +{ + GList *expressions; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowProjectNodeOptions, + garrow_project_node_options, + GARROW_TYPE_EXECUTE_NODE_OPTIONS) + +#define GARROW_PROJECT_NODE_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_project_node_options_get_instance_private( \ + GARROW_PROJECT_NODE_OPTIONS(object))) + +static void +garrow_project_node_options_dispose(GObject *object) +{ + auto priv = GARROW_PROJECT_NODE_OPTIONS_GET_PRIVATE(object); + + g_list_free_full(priv->expressions, g_object_unref); + priv->expressions = nullptr; + + G_OBJECT_CLASS(garrow_project_node_options_parent_class)->dispose(object); +} static void garrow_project_node_options_init(GArrowProjectNodeOptions *object) @@ -1318,6 +1427,9 @@ garrow_project_node_options_init(GArrowProjectNodeOptions *object) static void garrow_project_node_options_class_init(GArrowProjectNodeOptionsClass *klass) { + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_project_node_options_dispose; } /** @@ -1354,9 +1466,28 @@ garrow_project_node_options_new(GList *expressions, gchar **names, gsize n_names new arrow::acero::ProjectNodeOptions(arrow_expressions, arrow_names); auto options = g_object_new(GARROW_TYPE_PROJECT_NODE_OPTIONS, "options", arrow_options, NULL); + auto priv = GARROW_PROJECT_NODE_OPTIONS_GET_PRIVATE(options); + priv->expressions = + g_list_copy_deep(expressions, reinterpret_cast(g_object_ref), nullptr); return GARROW_PROJECT_NODE_OPTIONS(options); } +/** + * garrow_project_node_options_get_expressions: + * @options: A #GArrowProjectNodeOptions. + * + * Returns: (transfer none) (element-type GArrowExpression): Expressions + * of the @options. + * + * Since: 24.0.0 + */ +GList * +garrow_project_node_options_get_expressions(GArrowProjectNodeOptions *options) +{ + auto priv = GARROW_PROJECT_NODE_OPTIONS_GET_PRIVATE(options); + return priv->expressions; +} + typedef struct GArrowAggregationPrivate_ { gchar *function; @@ -1558,9 +1689,28 @@ garrow_aggregation_new(const gchar *function, NULL)); } -G_DEFINE_TYPE(GArrowAggregateNodeOptions, - garrow_aggregate_node_options, - GARROW_TYPE_EXECUTE_NODE_OPTIONS) +struct GArrowAggregateNodeOptionsPrivate +{ + GList *aggregations; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowAggregateNodeOptions, + garrow_aggregate_node_options, + GARROW_TYPE_EXECUTE_NODE_OPTIONS) + +#define GARROW_AGGREGATE_NODE_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_aggregate_node_options_get_instance_private( \ + GARROW_AGGREGATE_NODE_OPTIONS(object))) + +static void +garrow_aggregate_node_options_dispose(GObject *object) +{ + auto priv = GARROW_AGGREGATE_NODE_OPTIONS_GET_PRIVATE(object); + g_list_free_full(priv->aggregations, g_object_unref); + priv->aggregations = nullptr; + G_OBJECT_CLASS(garrow_aggregate_node_options_parent_class)->dispose(object); +} static void garrow_aggregate_node_options_init(GArrowAggregateNodeOptions *object) @@ -1570,6 +1720,9 @@ garrow_aggregate_node_options_init(GArrowAggregateNodeOptions *object) static void garrow_aggregate_node_options_class_init(GArrowAggregateNodeOptionsClass *klass) { + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_aggregate_node_options_dispose; } /** @@ -1623,10 +1776,29 @@ garrow_aggregate_node_options_new(GList *aggregations, auto arrow_options = new arrow::acero::AggregateNodeOptions(std::move(arrow_aggregates), std::move(arrow_keys)); auto options = - g_object_new(GARROW_TYPE_AGGREGATE_NODE_OPTIONS, "options", arrow_options, NULL); + g_object_new(GARROW_TYPE_AGGREGATE_NODE_OPTIONS, "options", arrow_options, nullptr); + auto priv = GARROW_AGGREGATE_NODE_OPTIONS_GET_PRIVATE(options); + priv->aggregations = + g_list_copy_deep(aggregations, reinterpret_cast(g_object_ref), nullptr); return GARROW_AGGREGATE_NODE_OPTIONS(options); } +/** + * garrow_aggregate_node_options_get_aggregations: + * @options: A #GArrowAggregateNodeOptions. + * + * Returns: (transfer none) (element-type GArrowAggregation): Aggregations + * of the @options. + * + * Since: 24.0.0 + */ +GList * +garrow_aggregate_node_options_get_aggregations(GArrowAggregateNodeOptions *options) +{ + auto priv = GARROW_AGGREGATE_NODE_OPTIONS_GET_PRIVATE(options); + return priv->aggregations; +} + typedef struct GArrowSinkNodeOptionsPrivate_ { arrow::AsyncGenerator> generator; diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index ff2d0d29956..2f4153676d4 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -183,6 +183,10 @@ GARROW_AVAILABLE_IN_11_0 GArrowProjectNodeOptions * garrow_project_node_options_new(GList *expressions, gchar **names, gsize n_names); +GARROW_AVAILABLE_IN_24_0 +GList * +garrow_project_node_options_get_expressions(GArrowProjectNodeOptions *options); + #define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type()) GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( @@ -218,6 +222,10 @@ garrow_aggregate_node_options_new(GList *aggregations, gsize n_keys, GError **error); +GARROW_AVAILABLE_IN_24_0 +GList * +garrow_aggregate_node_options_get_aggregations(GArrowAggregateNodeOptions *options); + #define GARROW_TYPE_SINK_NODE_OPTIONS (garrow_sink_node_options_get_type()) GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSinkNodeOptions, diff --git a/c_glib/arrow-glib/expression.cpp b/c_glib/arrow-glib/expression.cpp index 9be8e1f68bc..84cc3ace467 100644 --- a/c_glib/arrow-glib/expression.cpp +++ b/c_glib/arrow-glib/expression.cpp @@ -42,10 +42,14 @@ G_BEGIN_DECLS * Since: 6.0.0 */ -typedef struct GArrowExpressionPrivate_ +enum { + PROP_EXPRESSION = 1, +}; + +struct GArrowExpressionPrivate { arrow::compute::Expression expression; -} GArrowExpressionPrivate; +}; G_DEFINE_ABSTRACT_TYPE_WITH_PRIVATE(GArrowExpression, garrow_expression, G_TYPE_OBJECT) @@ -61,6 +65,25 @@ garrow_expression_finalize(GObject *object) G_OBJECT_CLASS(garrow_expression_parent_class)->finalize(object); } +static void +garrow_expression_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_EXPRESSION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_EXPRESSION: + priv->expression = + *static_cast(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + static void garrow_expression_init(GArrowExpression *object) { @@ -74,6 +97,15 @@ garrow_expression_class_init(GArrowExpressionClass *klass) auto gobject_class = G_OBJECT_CLASS(klass); gobject_class->finalize = garrow_expression_finalize; + gobject_class->set_property = garrow_expression_set_property; + + GParamSpec *spec; + spec = g_param_spec_pointer( + "expression", + "Expression", + "The raw arrow::compute::Expression *", + static_cast(G_PARAM_WRITABLE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_EXPRESSION, spec); } /** @@ -112,7 +144,71 @@ garrow_expression_equal(GArrowExpression *expression, GArrowExpression *other_ex return priv->expression.Equals(other_priv->expression); } -G_DEFINE_TYPE(GArrowLiteralExpression, garrow_literal_expression, GARROW_TYPE_EXPRESSION) +enum { + PROP_DATUM = 1, +}; + +struct GArrowLiteralExpressionPrivate +{ + GArrowDatum *datum; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowLiteralExpression, + garrow_literal_expression, + GARROW_TYPE_EXPRESSION) + +#define GARROW_LITERAL_EXPRESSION_GET_PRIVATE(object) \ + static_cast( \ + garrow_literal_expression_get_instance_private(GARROW_LITERAL_EXPRESSION(object))) + +static void +garrow_literal_expression_dispose(GObject *object) +{ + auto priv = GARROW_LITERAL_EXPRESSION_GET_PRIVATE(object); + + if (priv->datum) { + g_object_unref(priv->datum); + priv->datum = nullptr; + } + + G_OBJECT_CLASS(garrow_literal_expression_parent_class)->dispose(object); +} + +static void +garrow_literal_expression_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LITERAL_EXPRESSION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATUM: + priv->datum = GARROW_DATUM(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_literal_expression_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_LITERAL_EXPRESSION_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_DATUM: + g_value_set_object(value, priv->datum); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} static void garrow_literal_expression_init(GArrowLiteralExpression *object) @@ -122,6 +218,28 @@ garrow_literal_expression_init(GArrowLiteralExpression *object) static void garrow_literal_expression_class_init(GArrowLiteralExpressionClass *klass) { + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_literal_expression_dispose; + gobject_class->set_property = garrow_literal_expression_set_property; + gobject_class->get_property = garrow_literal_expression_get_property; + + GParamSpec *spec; + + /** + * GArrowLiteralExpression:datum: + * + * The datum of this literal. + * + * Since: 24.0.0 + */ + spec = g_param_spec_object( + "datum", + "Datum", + "The datum of this literal", + GARROW_TYPE_DATUM, + static_cast(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_DATUM, spec); } /** @@ -137,7 +255,12 @@ garrow_literal_expression_new(GArrowDatum *datum) { auto arrow_datum = garrow_datum_get_raw(datum); auto arrow_expression = arrow::compute::literal(arrow_datum); - return GARROW_LITERAL_EXPRESSION(garrow_expression_new_raw(arrow_expression)); + return GARROW_LITERAL_EXPRESSION(garrow_expression_new_raw(arrow_expression, + "expression", + &arrow_expression, + "datum", + datum, + nullptr)); } G_DEFINE_TYPE(GArrowFieldExpression, garrow_field_expression, GARROW_TYPE_EXPRESSION) @@ -173,7 +296,29 @@ garrow_field_expression_new(const gchar *reference, GError **error) return GARROW_FIELD_EXPRESSION(garrow_expression_new_raw(arrow_expression)); } -G_DEFINE_TYPE(GArrowCallExpression, garrow_call_expression, GARROW_TYPE_EXPRESSION) +struct GArrowCallExpressionPrivate +{ + GList *arguments; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCallExpression, + garrow_call_expression, + GARROW_TYPE_EXPRESSION) + +#define GARROW_CALL_EXPRESSION_GET_PRIVATE(object) \ + static_cast( \ + garrow_call_expression_get_instance_private(GARROW_CALL_EXPRESSION(object))) + +static void +garrow_call_expression_dispose(GObject *object) +{ + auto priv = GARROW_CALL_EXPRESSION_GET_PRIVATE(object); + + g_list_free_full(priv->arguments, g_object_unref); + priv->arguments = nullptr; + + G_OBJECT_CLASS(garrow_call_expression_parent_class)->dispose(object); +} static void garrow_call_expression_init(GArrowCallExpression *object) @@ -183,6 +328,9 @@ garrow_call_expression_init(GArrowCallExpression *object) static void garrow_call_expression_class_init(GArrowCallExpressionClass *klass) { + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_call_expression_dispose; } /** @@ -211,13 +359,57 @@ garrow_call_expression_new(const gchar *function, arrow_options.reset(garrow_function_options_get_raw(options)->Copy().release()); } auto arrow_expression = arrow::compute::call(function, arrow_arguments, arrow_options); - return GARROW_CALL_EXPRESSION(garrow_expression_new_raw(arrow_expression)); + auto expression = GARROW_CALL_EXPRESSION(garrow_expression_new_raw(arrow_expression)); + auto priv = GARROW_CALL_EXPRESSION_GET_PRIVATE(expression); + priv->arguments = + g_list_copy_deep(arguments, reinterpret_cast(g_object_ref), nullptr); + return expression; +} + +/** + * garrow_call_expression_get_arguments: + * @expression: A #GArrowCallExpression. + * + * Returns: (transfer none) (element-type GArrowExpression): Arguments + * of this expression. + * + * Since: 24.0.0 + */ +GList * +garrow_call_expression_get_arguments(GArrowCallExpression *expression) +{ + auto priv = GARROW_CALL_EXPRESSION_GET_PRIVATE(expression); + return priv->arguments; } G_END_DECLS GArrowExpression * garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression) +{ + return garrow_expression_new_raw(arrow_expression, + "expression", + &arrow_expression, + nullptr); +} + +GArrowExpression * +garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression, + const gchar *first_property_name, + ...) +{ + va_list args; + va_start(args, first_property_name); + auto array = + garrow_expression_new_raw_valist(arrow_expression, first_property_name, args); + va_end(args); + return array; +} + +GArrowExpression * +garrow_expression_new_raw_valist(const arrow::compute::Expression &arrow_expression, + const gchar *first_property_name, + va_list args) { GType gtype = GARROW_TYPE_EXPRESSION; if (arrow_expression.literal()) { @@ -227,10 +419,7 @@ garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression) } else if (arrow_expression.call()) { gtype = GARROW_TYPE_CALL_EXPRESSION; } - auto expression = GARROW_EXPRESSION(g_object_new(gtype, NULL)); - auto priv = GARROW_EXPRESSION_GET_PRIVATE(expression); - priv->expression = arrow_expression; - return expression; + return GARROW_EXPRESSION(g_object_new_valist(gtype, first_property_name, args)); } arrow::compute::Expression * diff --git a/c_glib/arrow-glib/expression.h b/c_glib/arrow-glib/expression.h index 5a6bfb456fc..e690aa41b86 100644 --- a/c_glib/arrow-glib/expression.h +++ b/c_glib/arrow-glib/expression.h @@ -76,5 +76,8 @@ GArrowCallExpression * garrow_call_expression_new(const gchar *function, GList *arguments, GArrowFunctionOptions *options); +GARROW_AVAILABLE_IN_24_0 +GList * +garrow_call_expression_get_arguments(GArrowCallExpression *expression); G_END_DECLS diff --git a/c_glib/arrow-glib/expression.hpp b/c_glib/arrow-glib/expression.hpp index cc96badbe67..90606a6fb31 100644 --- a/c_glib/arrow-glib/expression.hpp +++ b/c_glib/arrow-glib/expression.hpp @@ -27,6 +27,18 @@ GARROW_EXTERN GArrowExpression * garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression); +GARROW_EXTERN +GArrowExpression * +garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression, + const gchar *first_property_name, + ...); + +GARROW_EXTERN +GArrowExpression * +garrow_expression_new_raw_valist(const arrow::compute::Expression &arrow_expression, + const gchar *first_property_name, + va_list args); + GARROW_EXTERN arrow::compute::Expression * garrow_expression_get_raw(GArrowExpression *expression); diff --git a/ruby/red-arrow/ext/arrow/arrow.cpp b/ruby/red-arrow/ext/arrow/arrow.cpp index 404ec8996f2..76d5f94443d 100644 --- a/ruby/red-arrow/ext/arrow/arrow.cpp +++ b/ruby/red-arrow/ext/arrow/arrow.cpp @@ -63,6 +63,36 @@ namespace red_arrow { rbgobj_gc_mark_instance(node->data); } } + + void + call_expression_mark(gpointer object) + { + auto expression = GARROW_CALL_EXPRESSION(object); + auto arguments = garrow_call_expression_get_arguments(expression); + for (auto argument = arguments; argument; argument = g_list_next(argument)) { + rbgobj_gc_mark_instance(argument->data); + } + } + + void + aggregate_node_options_mark(gpointer object) + { + auto options = GARROW_AGGREGATE_NODE_OPTIONS(object); + auto aggregations = garrow_aggregate_node_options_get_aggregations(options); + for (auto aggregation = aggregations; aggregation; aggregation = g_list_next(aggregation)) { + rbgobj_gc_mark_instance(aggregation->data); + } + } + + void + project_node_options_mark(gpointer object) + { + auto options = GARROW_PROJECT_NODE_OPTIONS(object); + auto expressions = garrow_project_node_options_get_expressions(options); + for (auto expression = expressions; expression; expression = g_list_next(expression)) { + rbgobj_gc_mark_instance(expression->data); + } + } } extern "C" void Init_arrow() { @@ -124,4 +154,10 @@ extern "C" void Init_arrow() { red_arrow::record_batch_reader_mark); rbgobj_register_mark_func(GARROW_TYPE_EXECUTE_PLAN, red_arrow::execute_plan_mark); + rbgobj_register_mark_func(GARROW_TYPE_CALL_EXPRESSION, + red_arrow::call_expression_mark); + rbgobj_register_mark_func(GARROW_TYPE_AGGREGATE_NODE_OPTIONS, + red_arrow::aggregate_node_options_mark); + rbgobj_register_mark_func(GARROW_TYPE_PROJECT_NODE_OPTIONS, + red_arrow::project_node_options_mark); }