From 7834f4be6c58146a6581100ab157de5bb3f3f6d3 Mon Sep 17 00:00:00 2001 From: akshat-1704 <119754570+akshat-1704@users.noreply.github.com> Date: Wed, 16 Apr 2025 13:53:27 +0530 Subject: [PATCH 1/2] Added support for BYTE_SIZE and TIME_DURATION in grammar and directives --- wrangler-api/pom.xml | 2 +- .../io/cdap/wrangler/api/parser/ByteSize.java | 147 ++++ .../wrangler/api/parser/TimeDuration.java | 156 ++++ .../io/cdap/wrangler/api/parser/Token.java | 2 + .../cdap/wrangler/api/parser/TokenType.java | 315 ++++---- .../wrangler/api/parser/UsageDefinition.java | 473 ++++++------ wrangler-core/pom.xml | 2 +- .../io/cdap/wrangler/parser/Directives.g4 | 40 +- .../aggregates/SizeTimeAggregator.java | 298 ++++++++ .../cdap/wrangler/parser/RecipeVisitor.java | 684 ++++++++++-------- .../aggregates/SizeTimeAggregatorTest.java | 299 ++++++++ 11 files changed, 1735 insertions(+), 683 deletions(-) create mode 100644 wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java create mode 100644 wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java create mode 100644 wrangler-core/src/main/java/io/cdap/directives/aggregates/SizeTimeAggregator.java create mode 100644 wrangler-core/src/test/java/io/cdap/directives/aggregates/SizeTimeAggregatorTest.java diff --git a/wrangler-api/pom.xml b/wrangler-api/pom.xml index e97464a64..b4f5aa36a 100644 --- a/wrangler-api/pom.xml +++ b/wrangler-api/pom.xml @@ -41,4 +41,4 @@ - + \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java new file mode 100644 index 000000000..8c816b4d3 --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/ByteSize.java @@ -0,0 +1,147 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + + package io.cdap.wrangler.api.parser; + + import com.google.gson.JsonElement; + import com.google.gson.JsonObject; + import io.cdap.wrangler.api.annotations.PublicEvolving; + + import java.util.regex.Matcher; + import java.util.regex.Pattern; + + /** + * Token class representing byte size values with units (e.g., "10KB", "5MB"). + * Parses and stores byte sizes, providing methods to retrieve the value in + * bytes. + */ + @PublicEvolving + public class ByteSize implements Token { + private static final Pattern BYTE_SIZE_PATTERN = Pattern.compile("(\\d+)\\s*([kKmMgGtTpP]?[bB]?)"); + private final String value; + private final long bytes; + + /** + * Constructs a ByteSize token from a string representation. + * Accepts formats like "10KB", "5MB", "1GB", etc. + * + * @param value String representation of a byte size with unit + * @throws IllegalArgumentException if the string cannot be parsed as a byte + * size + */ + public ByteSize(String value) { + this.value = value; + this.bytes = parseBytes(value); + } + + /** + * Parses a string representation of byte size into its byte value. + * + * @param sizeStr String representation of a byte size (e.g., "10KB") + * @return The size in bytes + * @throws IllegalArgumentException if the string cannot be parsed + */ + private long parseBytes(String sizeStr) { + Matcher matcher = BYTE_SIZE_PATTERN.matcher(sizeStr); + if (!matcher.matches()) { + throw new IllegalArgumentException("Invalid byte size format: " + sizeStr); + } + + long size = Long.parseLong(matcher.group(1)); + String unit = matcher.group(2).toUpperCase(); + + switch (unit) { + case "B": + case "": + return size; + case "KB": + case "K": + return size * 1024; + case "MB": + case "M": + return size * 1024 * 1024; + case "GB": + case "G": + return size * 1024 * 1024 * 1024; + case "TB": + case "T": + return size * 1024 * 1024 * 1024 * 1024; + case "PB": + case "P": + return size * 1024 * 1024 * 1024 * 1024 * 1024; + default: + throw new IllegalArgumentException("Unsupported byte size unit: " + unit); + } + } + + /** + * Returns the original string representation of the byte size. + */ + @Override + public String value() { + return value; + } + + /** + * Returns the size in bytes. + * + * @return The size in bytes + */ + public long getBytes() { + return bytes; + } + + /** + * Returns the size in kilobytes. + * + * @return The size in kilobytes + */ + public double getKilobytes() { + return bytes / 1024.0; + } + + /** + * Returns the size in megabytes. + * + * @return The size in megabytes + */ + public double getMegabytes() { + return bytes / (1024.0 * 1024.0); + } + + /** + * Returns the size in gigabytes. + * + * @return The size in gigabytes + */ + public double getGigabytes() { + return bytes / (1024.0 * 1024.0 * 1024.0); + } + + @Override + public TokenType type() { + return TokenType.BYTE_SIZE; + } + + @Override + public JsonElement toJson() { + JsonObject object = new JsonObject(); + object.addProperty("type", TokenType.BYTE_SIZE.name()); + object.addProperty("value", value); + object.addProperty("bytes", bytes); + return object; + } + } \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java new file mode 100644 index 000000000..d7607bbab --- /dev/null +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TimeDuration.java @@ -0,0 +1,156 @@ +/* + * Copyright © 2017-2019 Cask Data, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + + package io.cdap.wrangler.api.parser; + + import com.google.gson.JsonElement; + import com.google.gson.JsonObject; + import io.cdap.wrangler.api.annotations.PublicEvolving; + + import java.util.concurrent.TimeUnit; + import java.util.regex.Matcher; + import java.util.regex.Pattern; + + /** + * Token class representing time duration values with units (e.g., "5s", "10m", + * "2h"). + * Parses and stores time durations, providing methods to retrieve the value in + * various time units. + */ + @PublicEvolving + public class TimeDuration implements Token { + private static final Pattern TIME_PATTERN = Pattern.compile("(\\d+)\\s*([smhdwy]|mo)"); + private final String value; + private final long milliseconds; + + /** + * Constructs a TimeDuration token from a string representation. + * Accepts formats like "5s" (seconds), "10m" (minutes), "2h" (hours), etc. + * + * @param value String representation of a time duration with unit + * @throws IllegalArgumentException if the string cannot be parsed as a time + * duration + */ + public TimeDuration(String value) { + this.value = value; + this.milliseconds = parseMilliseconds(value); + } + + /** + * Parses a string representation of time duration into milliseconds. + * + * @param durationStr String representation of a time duration (e.g., "5s") + * @return The duration in milliseconds + * @throws IllegalArgumentException if the string cannot be parsed + */ + private long parseMilliseconds(String durationStr) { + Matcher matcher = TIME_PATTERN.matcher(durationStr); + if (!matcher.matches()) { + throw new IllegalArgumentException("Invalid time duration format: " + durationStr); + } + + long amount = Long.parseLong(matcher.group(1)); + String unit = matcher.group(2).toLowerCase(); + + switch (unit) { + case "s": + return TimeUnit.SECONDS.toMillis(amount); + case "m": + return TimeUnit.MINUTES.toMillis(amount); + case "h": + return TimeUnit.HOURS.toMillis(amount); + case "d": + return TimeUnit.DAYS.toMillis(amount); + case "w": + return TimeUnit.DAYS.toMillis(amount * 7); + case "mo": + // Approximate a month as 30 days + return TimeUnit.DAYS.toMillis(amount * 30); + case "y": + // Approximate a year as 365 days + return TimeUnit.DAYS.toMillis(amount * 365); + default: + throw new IllegalArgumentException("Unsupported time unit: " + unit); + } + } + + /** + * Returns the original string representation of the time duration. + */ + @Override + public String value() { + return value; + } + + /** + * Returns the duration in milliseconds. + * + * @return The duration in milliseconds + */ + public long getMilliseconds() { + return milliseconds; + } + + /** + * Returns the duration in seconds. + * + * @return The duration in seconds + */ + public double getSeconds() { + return milliseconds / 1000.0; + } + + /** + * Returns the duration in minutes. + * + * @return The duration in minutes + */ + public double getMinutes() { + return milliseconds / (1000.0 * 60); + } + + /** + * Returns the duration in hours. + * + * @return The duration in hours + */ + public double getHours() { + return milliseconds / (1000.0 * 60 * 60); + } + + /** + * Returns the duration in days. + * + * @return The duration in days + */ + public double getDays() { + return milliseconds / (1000.0 * 60 * 60 * 24); + } + + @Override + public TokenType type() { + return TokenType.TIME_DURATION; + } + + @Override + public JsonElement toJson() { + JsonObject object = new JsonObject(); + object.addProperty("type", TokenType.TIME_DURATION.name()); + object.addProperty("value", value); + object.addProperty("milliseconds", milliseconds); + return object; + } + } \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java index bc596f4df..683fb26ae 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/Token.java @@ -32,6 +32,7 @@ * of this interface.
*/ @PublicEvolving + public interface Token extends Serializable { /** * Returns the {@code value} of the object wrapped by the @@ -57,3 +58,4 @@ public interface Token extends Serializable { */ JsonElement toJson(); } +// [blank line here] \ No newline at end of file diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java index 8c93b0e6a..22fcf2dd4 100644 --- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java +++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java @@ -14,143 +14,178 @@ * the License. */ -package io.cdap.wrangler.api.parser; - -import io.cdap.wrangler.api.annotations.PublicEvolving; - -import java.io.Serializable; - -/** - * The TokenType class provides the enumerated types for different types of - * tokens that are supported by the grammar. - * - * Each of the enumerated types specified in this class also has associated - * object representing it. e.g. {@code DIRECTIVE_NAME} is represented by the - * object {@code DirectiveName}. - * - * @see Bool - * @see BoolList - * @see ColumnName - * @see ColumnNameList - * @see DirectiveName - * @see Numeric - * @see NumericList - * @see Properties - * @see Ranges - * @see Expression - * @see Text - * @see TextList - */ -@PublicEvolving -public enum TokenType implements Serializable { - /** - * Represents the enumerated type for the object {@code DirectiveName} type. - * This type is associated with the token that is recognized as a directive - * name within the recipe. - */ - DIRECTIVE_NAME, - - /** - * Represents the enumerated type for the object of {@code ColumnName} type. - * This type is associated with token that represents the column as defined - * by the grammar as :
- * ColumnName[,ColumnName]*
- *
- */
- COLUMN_NAME_LIST,
-
- /**
- * Represents the enumerated type for the object of type {@code TextList} type.
- * This type is associated with the comma separated text represented were each text
- * is enclosed within a single quote (') or double quote (") and each text is separated
- * by comma (,). E.g.
- *
- * Text[,Text]*
- *
- */
- TEXT_LIST,
-
- /**
- * Represents the enumerated type for the object of type {@code NumericList} type.
- * This type is associated with the collection of {@code Numeric} values separated by
- * comma(,). E.g.
- *
- * Numeric[,Numeric]*
- *
- *
- */
- NUMERIC_LIST,
-
- /**
- * Represents the enumerated type for the object of type {@code BoolList} type.
- * This type is associated with the collection of {@code Bool} values separated by
- * comma(,). E.g.
- *
- * Boolean[,Boolean]*
- *
- */
- BOOLEAN_LIST,
-
- /**
- * Represents the enumerated type for the object of type {@code Expression} type.
- * This type is associated with code block that either represents a condition or
- * an expression. E.g.
- *
- * exp:{ }
- *
- */
- EXPRESSION,
-
- /**
- * Represents the enumerated type for the object of type {@code Properties} type.
- * This type is associated with a collection of key and value pairs all separated
- * by a comma(,). E.g.
- *
- * prop:{ =[,=]*}
- *
- */
- PROPERTIES,
-
- /**
- * Represents the enumerated type for the object of type {@code Ranges} types.
- * This type is associated with a collection of range represented in the form shown
- * below
- *
- * :=value[,:=value]*
- *
- */
- RANGES,
-
- /**
- * Represents the enumerated type for the object of type {@code String} with restrictions
- * on characters that can be present in a string.
- */
- IDENTIFIER
-}
+ package io.cdap.wrangler.api.parser;
+
+ import io.cdap.wrangler.api.annotations.PublicEvolving;
+
+ import java.io.Serializable;
+
+ /**
+ * The TokenType class provides the enumerated types for different types of
+ * tokens that are supported by the grammar.
+ *
+ * Each of the enumerated types specified in this class also has associated
+ * object representing it. e.g. {@code DIRECTIVE_NAME} is represented by the
+ * object {@code DirectiveName}.
+ *
+ * @see Bool
+ * @see BoolList
+ * @see ColumnName
+ * @see ColumnNameList
+ * @see DirectiveName
+ * @see Numeric
+ * @see NumericList
+ * @see Properties
+ * @see Ranges
+ * @see Expression
+ * @see Text
+ * @see TextList
+ */
+ @PublicEvolving
+ public enum TokenType implements Serializable {
+ /**
+ * Represents the enumerated type for the object {@code DirectiveName} type.
+ * This type is associated with the token that is recognized as a directive
+ * name within the recipe.
+ */
+ DIRECTIVE_NAME,
+
+ /**
+ * Represents the enumerated type for the object of {@code ColumnName} type.
+ * This type is associated with token that represents the column as defined
+ * by the grammar as :
+ * ColumnName[,ColumnName]*
+ *
+ */
+ COLUMN_NAME_LIST,
+
+ /**
+ * Represents the enumerated type for the object of type {@code TextList} type.
+ * This type is associated with the comma separated text represented were each
+ * text
+ * is enclosed within a single quote (') or double quote (") and each text is
+ * separated
+ * by comma (,). E.g.
+ *
+ * Text[,Text]*
+ *
+ */
+ TEXT_LIST,
+
+ /**
+ * Represents the enumerated type for the object of type {@code NumericList}
+ * type.
+ * This type is associated with the collection of {@code Numeric} values
+ * separated by
+ * comma(,). E.g.
+ *
+ * Numeric[,Numeric]*
+ *
+ *
+ */
+ NUMERIC_LIST,
+
+ /**
+ * Represents the enumerated type for the object of type {@code BoolList} type.
+ * This type is associated with the collection of {@code Bool} values separated
+ * by
+ * comma(,). E.g.
+ *
+ * Boolean[,Boolean]*
+ *
+ */
+ BOOLEAN_LIST,
+
+ /**
+ * Represents the enumerated type for the object of type {@code Expression}
+ * type.
+ * This type is associated with code block that either represents a condition or
+ * an expression. E.g.
+ *
+ * exp:{ }
+ *
+ */
+ EXPRESSION,
+
+ /**
+ * Represents the enumerated type for the object of type {@code Properties}
+ * type.
+ * This type is associated with a collection of key and value pairs all
+ * separated
+ * by a comma(,). E.g.
+ *
+ * prop:{ =[,=]*}
+ *
+ */
+ PROPERTIES,
+
+ /**
+ * Represents the enumerated type for the object of type {@code Ranges} types.
+ * This type is associated with a collection of range represented in the form
+ * shown
+ * below
+ *
+ * :=value[,:=value]*
+ *
+ */
+ RANGES,
+
+ /**
+ * Represents the enumerated type for the object of type {@code String} with
+ * restrictions
+ * on characters that can be present in a string.
+ */
+ IDENTIFIER,
+
+ /**
+ * Represents the enumerated type for the object of type {@code ByteSize} type.
+ * This type is associated with token that represents a byte size value with a
+ * unit,
+ * like "10KB", "5MB", "2GB", etc.
+ *
+ * @see ByteSize
+ */
+ BYTE_SIZE,
+
+ /**
+ * Represents the enumerated type for the object of type {@code TimeDuration}
+ * type.
+ * This type is associated with token that represents a time duration value with
+ * a unit,
+ * like "5s" (seconds), "10m" (minutes), "2h" (hours), etc.
+ *
+ * @see TimeDuration
+ */
+ TIME_DURATION
+ }
\ No newline at end of file
diff --git a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java
index 78800b7d1..10a498278 100644
--- a/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java
+++ b/wrangler-api/src/main/java/io/cdap/wrangler/api/parser/UsageDefinition.java
@@ -14,231 +14,250 @@
* the License.
*/
-package io.cdap.wrangler.api.parser;
+ package io.cdap.wrangler.api.parser;
-import io.cdap.wrangler.api.Optional;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * This class {@link UsageDefinition} provides a way for users to registers the argument for UDDs.
- *
- * {@link UsageDefinition} is a collection of {@link TokenDefinition} and the name of the directive
- * itself. Each token specification has an associated ordinal that can be used to position the argument
- * within the directive.
- *
- * Following is a example of how this class can be used.
- *
- * UsageDefinition.Builder builder = UsageDefinition.builder();
- * builder.add("col1", TypeToken.COLUMN_NAME); // By default, this field is required.
- * builder.add("col2", TypeToken.COLUMN_NAME, false); // This is a optional field.
- * builder.add("expression", TypeToken.EXPRESSION);
- * UsageDefinition definition = builder.build();
- *
- *
- * NOTE: No constraints checks are included in this implementation.
- *
- * @see TokenDefinition
- */
-public final class UsageDefinition implements Serializable {
- // transient so it doesn't show up when serialized using gson in service endpoint responses
- private final transient int optionalCnt;
- private final String directive;
- private final ListUsageDefinition
- * object is created.
- *
- * @return name of the directive.
- */
- public String getDirectiveName() {
- return directive;
- }
-
- /**
- * This method returns the list of TokenDefinition that should be
- * used for parsing the directive into Arguments.
- *
- * @return List of TokenDefinition.
- */
- public ListTokenDefinition that have been specified
- * as optional in the UsageDefinition.
- *
- * @return number of tokens in the usage that are optional.
- */
- public int getOptionalTokensCount() {
- return optionalCnt;
- }
-
- /**
- * This method converts the UsageDefinition into a usage string
- * for this directive. It inspects all the tokens to generate a standard syntax
- * for the usage of the directive.
- *
- * @return a usage representation of this object.
- */
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(directive).append(" ");
-
- int count = tokens.size();
- for (TokenDefinition token : tokens) {
- if (token.optional()) {
- sb.append(" [");
- }
-
- if (token.label() != null) {
- sb.append(token.label());
- } else {
- if (token.type().equals(TokenType.DIRECTIVE_NAME)) {
- sb.append(token.name());
- } else if (token.type().equals(TokenType.COLUMN_NAME)) {
- sb.append(":").append(token.name());
- } else if (token.type().equals(TokenType.COLUMN_NAME_LIST)) {
- sb.append(":").append(token.name()).append(" [,:").append(token.name()).append(" ]*");
- } else if (token.type().equals(TokenType.BOOLEAN)) {
- sb.append(token.name()).append(" (true/false)");
- } else if (token.type().equals(TokenType.TEXT)) {
- sb.append("'").append(token.name()).append("'");
- } else if (token.type().equals(TokenType.IDENTIFIER) || token.type().equals(TokenType.NUMERIC)) {
- sb.append(token.name());
- } else if (token.type().equals(TokenType.BOOLEAN_LIST) || token.type().equals(TokenType.NUMERIC_LIST)
- || token.type().equals(TokenType.TEXT_LIST)) {
- sb.append(token.name()).append("[,").append(token.name()).append(" ...]*");
- } else if (token.type().equals(TokenType.EXPRESSION)) {
- sb.append("exp:{<").append(token.name()).append(">}");
- } else if (token.type().equals(TokenType.PROPERTIES)) {
- sb.append("prop:{key:value,[key:value]*");
- } else if (token.type().equals(TokenType.RANGES)) {
- sb.append("start:end=[bool|text|numeric][,start:end=[bool|text|numeric]*");
- }
- }
-
- count--;
-
- if (token.optional()) {
- sb.append("]");
- } else {
- if (count > 0) {
- sb.append(" ");
- }
- }
- }
- return sb.toString();
- }
-
- /**
- * This is a static method for creating a builder for the UsageDefinition
- * object. In order to create a UsageDefinition, a builder has to created.
- *
- * This builder is provided as user API for constructing the usage specification - * for a directive.
- * - * @param directive name of the directive for which the builder is created for. - * @return AUsageDefinition.Builder object that can be used to construct
- * UsageDefinition object.
- */
- public static UsageDefinition.Builder builder(String directive) {
- return new UsageDefinition.Builder(directive);
- }
-
- /**
- * This inner builder class provides a way to create UsageDefinition
- * object. It exposes different methods that allow users to configure the TokenDefinition
- * for each token used within the usage of a directive.
- */
- public static final class Builder {
- private final String directive;
- private final ListOptional#TRUE if token is optional, else Optional#FALSE.
- */
- public void define(String name, TokenType type, boolean optional) {
- TokenDefinition spec = new TokenDefinition(name, type, null, currentOrdinal, optional);
- optionalCnt = optional ? optionalCnt + 1 : optionalCnt;
- currentOrdinal++;
- tokens.add(spec);
- }
-
- /**
- * Method allows users to specify a field as optional in combination to the
- * name of the token, the type of token and also the ability to specify a label
- * for the usage.
- *
- * @param name of the token in the definition of a directive.
- * @param type of the token to be extracted.
- * @param label label that modifies the usage for this field.
- * @param optional Optional#TRUE if token is optional, else Optional#FALSE.
- */
- public void define(String name, TokenType type, String label, boolean optional) {
- TokenDefinition spec = new TokenDefinition(name, type, label, currentOrdinal, optional);
- optionalCnt = optional ? optionalCnt + 1 : optionalCnt;
- currentOrdinal++;
- tokens.add(spec);
- }
-
- /**
- * @return a instance of UsageDefinition object.
- */
- public UsageDefinition build() {
- return new UsageDefinition(directive, optionalCnt, tokens);
- }
- }
-}
+ import io.cdap.wrangler.api.Optional;
+
+ import java.io.Serializable;
+ import java.util.ArrayList;
+ import java.util.List;
+
+ /**
+ * This class {@link UsageDefinition} provides a way for users to registers the
+ * argument for UDDs.
+ *
+ * {@link UsageDefinition} is a collection of {@link TokenDefinition} and the
+ * name of the directive
+ * itself. Each token specification has an associated ordinal that can be used
+ * to position the argument
+ * within the directive.
+ *
+ * Following is a example of how this class can be used.
+ *
+ * UsageDefinition.Builder builder = UsageDefinition.builder();
+ * builder.add("col1", TypeToken.COLUMN_NAME); // By default, this field is required.
+ * builder.add("col2", TypeToken.COLUMN_NAME, false); // This is a optional field.
+ * builder.add("expression", TypeToken.EXPRESSION);
+ * UsageDefinition definition = builder.build();
+ *
+ *
+ * NOTE: No constraints checks are included in this implementation.
+ *
+ * @see TokenDefinition
+ */
+ public final class UsageDefinition implements Serializable {
+ // transient so it doesn't show up when serialized using gson in service
+ // endpoint responses
+ private final transient int optionalCnt;
+ private final String directive;
+ private final ListUsageDefinition
+ * object is created.
+ *
+ * @return name of the directive.
+ */
+ public String getDirectiveName() {
+ return directive;
+ }
+
+ /**
+ * This method returns the list of TokenDefinition that should be
+ * used for parsing the directive into Arguments.
+ *
+ * @return List of TokenDefinition.
+ */
+ public ListTokenDefinition that have been specified
+ * as optional in the UsageDefinition.
+ *
+ * @return number of tokens in the usage that are optional.
+ */
+ public int getOptionalTokensCount() {
+ return optionalCnt;
+ }
+
+ /**
+ * This method converts the UsageDefinition into a usage string
+ * for this directive. It inspects all the tokens to generate a standard syntax
+ * for the usage of the directive.
+ *
+ * @return a usage representation of this object.
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(directive).append(" ");
+
+ int count = tokens.size();
+ for (TokenDefinition token : tokens) {
+ if (token.optional()) {
+ sb.append(" [");
+ }
+
+ if (token.label() != null) {
+ sb.append(token.label());
+ } else {
+ if (token.type().equals(TokenType.DIRECTIVE_NAME)) {
+ sb.append(token.name());
+ } else if (token.type().equals(TokenType.COLUMN_NAME)) {
+ sb.append(":").append(token.name());
+ } else if (token.type().equals(TokenType.COLUMN_NAME_LIST)) {
+ sb.append(":").append(token.name()).append(" [,:").append(token.name()).append(" ]*");
+ } else if (token.type().equals(TokenType.BOOLEAN)) {
+ sb.append(token.name()).append(" (true/false)");
+ } else if (token.type().equals(TokenType.TEXT)) {
+ sb.append("'").append(token.name()).append("'");
+ } else if (token.type().equals(TokenType.IDENTIFIER) || token.type().equals(TokenType.NUMERIC)) {
+ sb.append(token.name());
+ } else if (token.type().equals(TokenType.BOOLEAN_LIST) || token.type().equals(TokenType.NUMERIC_LIST)
+ || token.type().equals(TokenType.TEXT_LIST)) {
+ sb.append(token.name()).append("[,").append(token.name()).append(" ...]*");
+ } else if (token.type().equals(TokenType.EXPRESSION)) {
+ sb.append("exp:{<").append(token.name()).append(">}");
+ } else if (token.type().equals(TokenType.PROPERTIES)) {
+ sb.append("prop:{key:value,[key:value]*");
+ } else if (token.type().equals(TokenType.RANGES)) {
+ sb.append("start:end=[bool|text|numeric][,start:end=[bool|text|numeric]*");
+ } else if (token.type().equals(TokenType.BYTE_SIZE)) {
+ sb.append(token.name()).append(" (e.g., 10KB, 5MB)");
+ } else if (token.type().equals(TokenType.TIME_DURATION)) {
+ sb.append(token.name()).append(" (e.g., 10s, 5m, 2h)");
+ }
+ }
+
+ count--;
+
+ if (token.optional()) {
+ sb.append("]");
+ } else {
+ if (count > 0) {
+ sb.append(" ");
+ }
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * This is a static method for creating a builder for the
+ * UsageDefinition
+ * object. In order to create a UsageDefinition, a builder has to
+ * created.
+ *
+ * + * This builder is provided as user API for constructing the usage specification + * for a directive. + *
+ * + * @param directive name of the directive for which the builder is created for. + * @return AUsageDefinition.Builder object that can be used to
+ * construct
+ * UsageDefinition object.
+ */
+ public static UsageDefinition.Builder builder(String directive) {
+ return new UsageDefinition.Builder(directive);
+ }
+
+ /**
+ * This inner builder class provides a way to create
+ * UsageDefinition
+ * object. It exposes different methods that allow users to configure the
+ * TokenDefinition
+ * for each token used within the usage of a directive.
+ */
+ public static final class Builder {
+ private final String directive;
+ private final ListOptional#TRUE if token is optional, else
+ * Optional#FALSE.
+ */
+ public void define(String name, TokenType type, boolean optional) {
+ TokenDefinition spec = new TokenDefinition(name, type, null, currentOrdinal, optional);
+ optionalCnt = optional ? optionalCnt + 1 : optionalCnt;
+ currentOrdinal++;
+ tokens.add(spec);
+ }
+
+ /**
+ * Method allows users to specify a field as optional in combination to the
+ * name of the token, the type of token and also the ability to specify a label
+ * for the usage.
+ *
+ * @param name of the token in the definition of a directive.
+ * @param type of the token to be extracted.
+ * @param label label that modifies the usage for this field.
+ * @param optional Optional#TRUE if token is optional, else
+ * Optional#FALSE.
+ */
+ public void define(String name, TokenType type, String label, boolean optional) {
+ TokenDefinition spec = new TokenDefinition(name, type, label, currentOrdinal, optional);
+ optionalCnt = optional ? optionalCnt + 1 : optionalCnt;
+ currentOrdinal++;
+ tokens.add(spec);
+ }
+
+ /**
+ * @return a instance of UsageDefinition object.
+ */
+ public UsageDefinition build() {
+ return new UsageDefinition(directive, optionalCnt, tokens);
+ }
+ }
+ }
\ No newline at end of file
diff --git a/wrangler-core/pom.xml b/wrangler-core/pom.xml
index e2dcb3c2b..cd0857474 100644
--- a/wrangler-core/pom.xml
+++ b/wrangler-core/pom.xml
@@ -361,4 +361,4 @@
-
+
\ No newline at end of file
diff --git a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4 b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4
index 7c517ed6a..ac25f71bf 100644
--- a/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4
+++ b/wrangler-core/src/main/antlr4/io/cdap/wrangler/parser/Directives.g4
@@ -64,6 +64,8 @@ directive
| stringList
| numberRanges
| properties
+ | byteSize
+ | timeDuration
)*?
;
@@ -140,7 +142,7 @@ numberRange
;
value
- : String | Number | Column | Bool
+ : String | Number | Column | Bool | BYTE_SIZE | TIME_DURATION
;
ecommand
@@ -195,6 +197,13 @@ identifierList
: Identifier (',' Identifier)*
;
+byteSize
+ : BYTE_SIZE
+ ;
+
+timeDuration
+ : TIME_DURATION
+ ;
/*
* Following are the Lexer Rules used for tokenizing the recipe.
@@ -303,6 +312,33 @@ Space
: [ \t\r\n\u000C]+ -> skip
;
+BYTE_SIZE
+ : Int BYTE_UNIT
+ ;
+
+TIME_DURATION
+ : Int TIME_UNIT
+ ;
+
+fragment BYTE_UNIT
+ : [kK][bB] // kilobyte
+ | [mM][bB] // megabyte
+ | [gG][bB] // gigabyte
+ | [tT][bB] // terabyte
+ | [pP][bB] // petabyte
+ | [bB] // byte
+ ;
+
+fragment TIME_UNIT
+ : [sS] // seconds
+ | [mM] // minutes
+ | [hH] // hours
+ | [dD] // days
+ | [wW] // weeks
+ | [mM][oO] // months
+ | [yY] // years
+ ;
+
fragment Int
: '-'? [1-9] Digit* [L]*
| '0'
@@ -310,4 +346,4 @@ fragment Int
fragment Digit
: [0-9]
- ;
+ ;
\ No newline at end of file
diff --git a/wrangler-core/src/main/java/io/cdap/directives/aggregates/SizeTimeAggregator.java b/wrangler-core/src/main/java/io/cdap/directives/aggregates/SizeTimeAggregator.java
new file mode 100644
index 000000000..feb7a278d
--- /dev/null
+++ b/wrangler-core/src/main/java/io/cdap/directives/aggregates/SizeTimeAggregator.java
@@ -0,0 +1,298 @@
+/*
+ * Copyright © 2023 Cask Data, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+ package io.cdap.directives.aggregates;
+
+ import io.cdap.cdap.api.annotation.Description;
+ import io.cdap.cdap.api.annotation.Name;
+ import io.cdap.cdap.api.annotation.Plugin;
+ import io.cdap.wrangler.api.Arguments;
+ import io.cdap.wrangler.api.Directive;
+ import io.cdap.wrangler.api.DirectiveExecutionException;
+ import io.cdap.wrangler.api.DirectiveParseException;
+ import io.cdap.wrangler.api.ExecutorContext;
+ import io.cdap.wrangler.api.Optional;
+ import io.cdap.wrangler.api.Row;
+ import io.cdap.wrangler.api.TransientStore;
+ import io.cdap.wrangler.api.TransientVariableScope;
+ import io.cdap.wrangler.api.annotations.Categories;
+ import io.cdap.wrangler.api.lineage.Lineage;
+ import io.cdap.wrangler.api.lineage.Mutation;
+ import io.cdap.wrangler.api.parser.ByteSize;
+ import io.cdap.wrangler.api.parser.ColumnName;
+ import io.cdap.wrangler.api.parser.Text;
+ import io.cdap.wrangler.api.parser.TimeDuration;
+ import io.cdap.wrangler.api.parser.TokenType;
+ import io.cdap.wrangler.api.parser.UsageDefinition;
+
+ import java.util.ArrayList;
+ import java.util.List;
+
+ /**
+ * A directive for aggregating byte sizes and time durations across multiple
+ * rows.
+ *
+ * This directive processes ByteSize and TimeDuration tokens, accumulates them,
+ * and produces summary statistics such as total size and total/average time.
+ */
+ @Plugin(type = Directive.TYPE)
+ @Name(SizeTimeAggregator.NAME)
+ @Categories(categories = { "aggregator", "statistics" })
+ @Description("Aggregates byte sizes and time durations across rows, calculating totals and averages.")
+ public class SizeTimeAggregator implements Directive, Lineage {
+ public static final String NAME = "aggregate-size-time";
+
+ // Store keys for the transient store
+ private static final String TOTAL_SIZE_KEY = "aggregate_total_size_bytes";
+ private static final String TOTAL_TIME_KEY = "aggregate_total_time_ms";
+ private static final String COUNT_KEY = "aggregate_count";
+
+ // Source column names
+ private String sizeColumnName;
+ private String timeColumnName;
+
+ // Target column names
+ private String targetSizeColumnName;
+ private String targetTimeColumnName;
+
+ // Unit settings for output (optional)
+ private String sizeUnit; // Default: bytes, Options: KB, MB, GB
+ private String timeUnit; // Default: ms, Options: s, m, h
+ private boolean useAverage; // Default: false (use total)
+
+ @Override
+ public UsageDefinition define() {
+ UsageDefinition.Builder builder = UsageDefinition.builder(NAME);
+ builder.define("size-column", TokenType.COLUMN_NAME);
+ builder.define("time-column", TokenType.COLUMN_NAME);
+ builder.define("target-size-column", TokenType.COLUMN_NAME);
+ builder.define("target-time-column", TokenType.COLUMN_NAME);
+ builder.define("size-unit", TokenType.TEXT, Optional.TRUE);
+ builder.define("time-unit", TokenType.TEXT, Optional.TRUE);
+ builder.define("aggregate-type", TokenType.TEXT, Optional.TRUE);
+ return builder.build();
+ }
+
+ @Override
+ public void initialize(Arguments args) throws DirectiveParseException {
+ this.sizeColumnName = ((ColumnName) args.value("size-column")).value();
+ this.timeColumnName = ((ColumnName) args.value("time-column")).value();
+ this.targetSizeColumnName = ((ColumnName) args.value("target-size-column")).value();
+ this.targetTimeColumnName = ((ColumnName) args.value("target-time-column")).value();
+
+ // Parse optional arguments with default values
+ this.sizeUnit = args.contains("size-unit") ? ((Text) args.value("size-unit")).value().toUpperCase() : "BYTES";
+ this.timeUnit = args.contains("time-unit") ? ((Text) args.value("time-unit")).value().toLowerCase() : "ms";
+
+ // Determine aggregation type: total or average
+ String aggregateType = args.contains("aggregate-type")
+ ? ((Text) args.value("aggregate-type")).value().toLowerCase()
+ : "total";
+ this.useAverage = "average".equals(aggregateType) || "avg".equals(aggregateType);
+
+ // Validate size unit
+ if (!("BYTES".equals(sizeUnit) || "KB".equals(sizeUnit) ||
+ "MB".equals(sizeUnit) || "GB".equals(sizeUnit))) {
+ throw new DirectiveParseException(
+ NAME, String.format("Invalid size unit '%s'. Supported units are BYTES, KB, MB, GB", sizeUnit));
+ }
+
+ // Validate time unit
+ if (!("ms".equals(timeUnit) || "s".equals(timeUnit) ||
+ "m".equals(timeUnit) || "h".equals(timeUnit))) {
+ throw new DirectiveParseException(
+ NAME, String.format("Invalid time unit '%s'. Supported units are ms, s, m, h", timeUnit));
+ }
+ }
+
+ @Override
+ public void destroy() {
+ // no-op
+ }
+
+ @Override
+ public ListRecipeVisitor implements the visitor pattern
- * used during traversal of the AST tree. The ParserTree#Walker
- * invokes appropriate methods as call backs with information about the node.
- *
- * In order to understand what's being invoked, please look at the grammar file - * Directive.g4
. - * - *This class exposes a getTokenGroups method for retrieving the
- * RecipeSymbol after visiting. The RecipeSymbol represents
- * all the TokenGroup for all directives in a recipe. Each directive
- * will create a TokenGroup
As the ParseTree is walking through the call graph, it generates
- * one TokenGroup for each directive in the recipe. Each TokenGroup
- * contains parsed Tokens for that directive along with more information like
- * SourceInfo. A collection of TokenGroup consistutes a RecipeSymbol
- * that is returned by this function.
RecipeSymbol for the recipe being parsed. This
- * object has all the tokens that were successfully parsed along with source
- * information for each directive in the recipe.
- *
- * @return An compiled object after parsing the recipe.
- */
- public RecipeSymbol getCompiledUnit() {
- return builder.build();
- }
-
- /**
- * A Recipe is made up of Directives and Directives is made up of each individual
- * Directive. This method is invoked on every visit to a new directive in the recipe.
- */
- @Override
- public RecipeSymbol.Builder visitDirective(DirectivesParser.DirectiveContext ctx) {
- builder.createTokenGroup(getOriginalSource(ctx));
- return super.visitDirective(ctx);
- }
-
- /**
- * A Directive can include identifiers, this method extracts that token that is being
- * identified as token of type Identifier.
- */
- @Override
- public RecipeSymbol.Builder visitIdentifier(DirectivesParser.IdentifierContext ctx) {
- builder.addToken(new Identifier(ctx.Identifier().getText()));
- return super.visitIdentifier(ctx);
- }
-
- /**
- * A Directive can include properties (which are a collection of key and value pairs),
- * this method extracts that token that is being identified as token of type Properties.
- */
- @Override
- public RecipeSymbol.Builder visitPropertyList(DirectivesParser.PropertyListContext ctx) {
- MapDirectiveRegistry. These do not affect the data flow.
- *
- * E.g. #pragma load-directives test1, test2, test3; will collect the tokens
- * test1, test2 and test3 as dynamically loadable directives.
- */
- @Override
- public RecipeSymbol.Builder visitPragmaLoadDirective(DirectivesParser.PragmaLoadDirectiveContext ctx) {
- List
+ * In order to understand what's being invoked, please look at the grammar file
+ * Directive.g4
+ *
+ * This class exposes a
+ * As the
+ * E.g.
+ */
+ @Override
+ public RecipeSymbol.Builder visitPragmaLoadDirective(DirectivesParser.PragmaLoadDirectiveContext ctx) {
+ ListRanges.
- */
- @Override
- public RecipeSymbol.Builder visitNumberRanges(DirectivesParser.NumberRangesContext ctx) {
- ListColumnName.
- */
- @Override
- public RecipeSymbol.Builder visitColumn(DirectivesParser.ColumnContext ctx) {
- builder.addToken(new ColumnName(ctx.Column().getText().substring(1)));
- return builder;
- }
-
- /**
- * A Directive can consist of text field. These type of fields are enclosed within
- * a single-quote or a double-quote. This visitor method extracts the string value
- * within the quotes and creates a token type Text.
- */
- @Override
- public RecipeSymbol.Builder visitText(DirectivesParser.TextContext ctx) {
- String value = ctx.String().getText();
- builder.addToken(new Text(value.substring(1, value.length() - 1)));
- return builder;
- }
-
- /**
- * A Directive can consist of numeric field. This visitor method extracts the
- * numeric value Numeric.
- */
- @Override
- public RecipeSymbol.Builder visitNumber(DirectivesParser.NumberContext ctx) {
- LazyNumber number = new LazyNumber(ctx.Number().getText());
- builder.addToken(new Numeric(number));
- return builder;
- }
-
- /**
- * A Directive can consist of Bool field. The Bool field is represented as
- * either true or false. This visitor method extract the bool value into a
- * token type Bool.
- */
- @Override
- public RecipeSymbol.Builder visitBool(DirectivesParser.BoolContext ctx) {
- builder.addToken(new Bool(Boolean.valueOf(ctx.Bool().getText())));
- return builder;
- }
-
- /**
- * A Directive can include a expression or a condition to be evaluated. When
- * such a token type is found, the visitor extracts the expression and generates
- * a token type Expression to be added to the TokenGroup
- */
- @Override
- public RecipeSymbol.Builder visitCondition(DirectivesParser.ConditionContext ctx) {
- int childCount = ctx.getChildCount();
- StringBuilder sb = new StringBuilder();
- for (int i = 1; i < childCount - 1; ++i) {
- ParseTree child = ctx.getChild(i);
- sb.append(child.getText()).append(" ");
- }
- builder.addToken(new Expression(sb.toString()));
- return builder;
- }
-
- /**
- * A Directive has name and in the parsing context it's called a command.
- * This visitor methods extracts the command and creates a toke type DirectiveName
- */
- @Override
- public RecipeSymbol.Builder visitCommand(DirectivesParser.CommandContext ctx) {
- builder.addToken(new DirectiveName(ctx.Identifier().getText()));
- return builder;
- }
-
- /**
- * This visitor methods extracts the list of columns specified. It creates a token
- * type ColumnNameList to be added to TokenGroup.
- */
- @Override
- public RecipeSymbol.Builder visitColList(DirectivesParser.ColListContext ctx) {
- ListNumericList to be added to TokenGroup.
- */
- @Override
- public RecipeSymbol.Builder visitNumberList(DirectivesParser.NumberListContext ctx) {
- ListBoolList to be added to TokenGroup.
- */
- @Override
- public RecipeSymbol.Builder visitBoolList(DirectivesParser.BoolListContext ctx) {
- ListStringList to be added to TokenGroup.
- */
- @Override
- public RecipeSymbol.Builder visitStringList(DirectivesParser.StringListContext ctx) {
- ListRecipeVisitor implements the visitor pattern
+ * used during traversal of the AST tree. The ParserTree#Walker
+ * invokes appropriate methods as call backs with information about the node.
+ *
+ * getTokenGroups method for retrieving the
+ * RecipeSymbol after visiting. The RecipeSymbol
+ * represents
+ * all the TokenGroup for all directives in a recipe. Each
+ * directive
+ * will create a TokenGroup
+ * ParseTree is walking through the call graph, it generates
+ * one TokenGroup for each directive in the recipe. Each
+ * TokenGroup
+ * contains parsed Tokens for that directive along with more
+ * information like
+ * SourceInfo. A collection of TokenGroup consistutes
+ * a RecipeSymbol
+ * that is returned by this function.
+ * RecipeSymbol for the recipe being parsed. This
+ * object has all the tokens that were successfully parsed along with source
+ * information for each directive in the recipe.
+ *
+ * @return An compiled object after parsing the recipe.
+ */
+ public RecipeSymbol getCompiledUnit() {
+ return builder.build();
+ }
+
+ /**
+ * A Recipe is made up of Directives and Directives is made up of each
+ * individual
+ * Directive. This method is invoked on every visit to a new directive in the
+ * recipe.
+ */
+ @Override
+ public RecipeSymbol.Builder visitDirective(DirectivesParser.DirectiveContext ctx) {
+ builder.createTokenGroup(getOriginalSource(ctx));
+ return super.visitDirective(ctx);
+ }
+
+ /**
+ * A Directive can include identifiers, this method extracts that token that is
+ * being
+ * identified as token of type Identifier.
+ */
+ @Override
+ public RecipeSymbol.Builder visitIdentifier(DirectivesParser.IdentifierContext ctx) {
+ builder.addToken(new Identifier(ctx.Identifier().getText()));
+ return super.visitIdentifier(ctx);
+ }
+
+ /**
+ * A Directive can include properties (which are a collection of key and value
+ * pairs),
+ * this method extracts that token that is being identified as token of type
+ * Properties.
+ */
+ @Override
+ public RecipeSymbol.Builder visitPropertyList(DirectivesParser.PropertyListContext ctx) {
+ MapDirectiveRegistry. These do not affect the data flow.
+ *
+ * #pragma load-directives test1, test2, test3; will collect
+ * the tokens
+ * test1, test2 and test3 as dynamically loadable directives.
+ * Ranges.
+ */
+ @Override
+ public RecipeSymbol.Builder visitNumberRanges(DirectivesParser.NumberRangesContext ctx) {
+ ListColumnName.
+ */
+ @Override
+ public RecipeSymbol.Builder visitColumn(DirectivesParser.ColumnContext ctx) {
+ builder.addToken(new ColumnName(ctx.Column().getText().substring(1)));
+ return builder;
+ }
+
+ /**
+ * A Directive can consist of text field. These type of fields are enclosed
+ * within
+ * a single-quote or a double-quote. This visitor method extracts the string
+ * value
+ * within the quotes and creates a token type Text.
+ */
+ @Override
+ public RecipeSymbol.Builder visitText(DirectivesParser.TextContext ctx) {
+ String value = ctx.String().getText();
+ builder.addToken(new Text(value.substring(1, value.length() - 1)));
+ return builder;
+ }
+
+ /**
+ * A Directive can consist of numeric field. This visitor method extracts the
+ * numeric value Numeric.
+ */
+ @Override
+ public RecipeSymbol.Builder visitNumber(DirectivesParser.NumberContext ctx) {
+ LazyNumber number = new LazyNumber(ctx.Number().getText());
+ builder.addToken(new Numeric(number));
+ return builder;
+ }
+
+ /**
+ * A Directive can consist of Bool field. The Bool field is represented as
+ * either true or false. This visitor method extract the bool value into a
+ * token type Bool.
+ */
+ @Override
+ public RecipeSymbol.Builder visitBool(DirectivesParser.BoolContext ctx) {
+ builder.addToken(new Bool(Boolean.valueOf(ctx.Bool().getText())));
+ return builder;
+ }
+
+ /**
+ * A Directive can consist of a ByteSize field. The ByteSize field is
+ * represented as
+ * a number followed by a byte unit (e.g., "10KB", "5MB"). This visitor method
+ * extracts
+ * the byte size value into a token type ByteSize.
+ */
+ @Override
+ public RecipeSymbol.Builder visitByteSize(DirectivesParser.ByteSizeContext ctx) {
+ builder.addToken(new ByteSize(ctx.BYTE_SIZE().getText()));
+ return builder;
+ }
+
+ /**
+ * A Directive can consist of a TimeDuration field. The TimeDuration field is
+ * represented as
+ * a number followed by a time unit (e.g., "5s", "10m", "2h"). This visitor
+ * method extracts
+ * the time duration value into a token type TimeDuration.
+ */
+ @Override
+ public RecipeSymbol.Builder visitTimeDuration(DirectivesParser.TimeDurationContext ctx) {
+ builder.addToken(new TimeDuration(ctx.TIME_DURATION().getText()));
+ return builder;
+ }
+
+ /**
+ * A Directive can include a expression or a condition to be evaluated. When
+ * such a token type is found, the visitor extracts the expression and generates
+ * a token type Expression to be added to the
+ * TokenGroup
+ */
+ @Override
+ public RecipeSymbol.Builder visitCondition(DirectivesParser.ConditionContext ctx) {
+ int childCount = ctx.getChildCount();
+ StringBuilder sb = new StringBuilder();
+ for (int i = 1; i < childCount - 1; ++i) {
+ ParseTree child = ctx.getChild(i);
+ sb.append(child.getText()).append(" ");
+ }
+ builder.addToken(new Expression(sb.toString()));
+ return builder;
+ }
+
+ /**
+ * A Directive has name and in the parsing context it's called a command.
+ * This visitor methods extracts the command and creates a toke type
+ * DirectiveName
+ */
+ @Override
+ public RecipeSymbol.Builder visitCommand(DirectivesParser.CommandContext ctx) {
+ builder.addToken(new DirectiveName(ctx.Identifier().getText()));
+ return builder;
+ }
+
+ /**
+ * This visitor methods extracts the list of columns specified. It creates a
+ * token
+ * type ColumnNameList to be added to TokenGroup.
+ */
+ @Override
+ public RecipeSymbol.Builder visitColList(DirectivesParser.ColListContext ctx) {
+ ListNumericList to be added to TokenGroup.
+ */
+ @Override
+ public RecipeSymbol.Builder visitNumberList(DirectivesParser.NumberListContext ctx) {
+ ListBoolList to be added to TokenGroup.
+ */
+ @Override
+ public RecipeSymbol.Builder visitBoolList(DirectivesParser.BoolListContext ctx) {
+ ListStringList to be added to TokenGroup.
+ */
+ @Override
+ public RecipeSymbol.Builder visitStringList(DirectivesParser.StringListContext ctx) {
+ List