Skip to content

Commit 72f2907

Browse files
authored
Enable custom tags (#6)
1 parent 4651ead commit 72f2907

File tree

7 files changed

+196
-220
lines changed

7 files changed

+196
-220
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
"deploy": "wrangler deploy",
1313
"start": "wrangler dev",
1414
"dev": "wrangler dev",
15-
"cf-typegen": "wrangler types",
15+
"cf-typegen": "wrangler types --strict-vars=false",
1616
"typecheck": "tsgo",
1717
"lint": "biome check .",
1818
"lint:fix": "biome check --write .",

readme.md

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,36 @@ To run GraphQL queries from a specific jurisdiction (closer to the data source),
3333

3434
```jsonc
3535
"vars": {
36-
"BATCH_SIZE": "5000",
37-
"JURISDICTION": "eu" // e.g., "eu", "fedramp"
36+
"BATCH_SIZE": 5000,
37+
"RETRY_LIMIT": 3,
38+
"RETRY_DELAY_SECONDS": 1,
39+
"JURISDICTION": "eu", // e.g., "eu", "fedramp"
40+
"DATADOG_TAGS": {}
3841
}
3942
```
4043

4144
This uses a Durable Object to proxy requests from the specified jurisdiction.
4245

46+
#### Optional: Custom Datadog Tags
47+
48+
Add custom tags to all metrics by setting the `DATADOG_TAGS` variable in `wrangler.jsonc`:
49+
50+
```jsonc
51+
"vars": {
52+
"BATCH_SIZE": 5000,
53+
"RETRY_LIMIT": 3,
54+
"RETRY_DELAY_SECONDS": 1,
55+
"JURISDICTION": "eu",
56+
"DATADOG_TAGS": {
57+
"env": "production",
58+
"team": "platform",
59+
"service": "containers"
60+
}
61+
}
62+
```
63+
64+
These tags will be added to all health and resource metrics sent to Datadog.
65+
4366
### Verify
4467

4568
```bash
@@ -123,11 +146,21 @@ See [Datadog's documentation](https://docs.datadoghq.com/dashboards/configure/#c
123146

124147
## Workflow Behavior
125148

126-
The exporter runs as a Cloudflare Workflow triggered every minute via cron. Each workflow step uses the default retry configuration:
149+
The exporter runs as a Cloudflare Workflow triggered every minute via cron. Each workflow step uses configurable retry settings:
127150

128-
- **Retries**: 3 attempts
129-
- **Delay**: 1 second initial delay
130-
- **Backoff**: Exponential (1s, 2s, 4s)
151+
- **Retries**: Configurable via `RETRY_LIMIT` (default: 3 attempts)
152+
- **Delay**: Configurable via `RETRY_DELAY_SECONDS` (default: 1 second initial delay)
153+
- **Backoff**: Exponential (e.g., 1s, 2s, 4s)
131154

132155
Steps will automatically retry on transient failures (API errors, network issues).
133156

157+
### Configuration Options
158+
159+
| Variable | Type | Default | Description |
160+
|----------|------|---------|-------------|
161+
| `BATCH_SIZE` | number | 5000 | Maximum metrics per Datadog API request |
162+
| `RETRY_LIMIT` | number | 3 | Number of retry attempts for failed workflow steps |
163+
| `RETRY_DELAY_SECONDS` | number | 1 | Initial delay in seconds before retry (exponential backoff) |
164+
| `JURISDICTION` | string | "" | Durable Object jurisdiction for GraphQL queries (e.g., "eu", "fedramp") |
165+
| `DATADOG_TAGS` | object | {} | Custom tags to add to all metrics |
166+

src/metrics.ts

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
1+
import { z } from "zod/v4";
12
import type { DatadogMetric } from "./api/datadog";
23
import type { Container, MetricsGroup } from "./types";
34

4-
export interface ContainerWithMetrics {
5-
container: Container;
6-
metrics: MetricsGroup[];
7-
}
8-
95
export interface ContainerInfo {
106
id: string;
117
name: string;
128
version: number;
139
}
1410

11+
const DatadogTagsSchema = z.record(z.string(), z.string()).optional();
12+
13+
function parseCustomTags(datadogTags: unknown): string[] {
14+
const parsed = DatadogTagsSchema.safeParse(datadogTags);
15+
if (!parsed.success) {
16+
console.warn("Invalid DATADOG_TAGS format, ignoring custom tags", {
17+
error: parsed.error.message,
18+
});
19+
return [];
20+
}
21+
if (!parsed.data) {
22+
return [];
23+
}
24+
return Object.entries(parsed.data).map(([key, value]) => `${key}:${value}`);
25+
}
26+
1527
/**
1628
* Format metrics for a single container into Datadog metrics
1729
*/
@@ -20,7 +32,9 @@ export function formatMetricsForContainer(
2032
container: ContainerInfo,
2133
metricsGroups: MetricsGroup[],
2234
timestamp?: number,
35+
datadogTags?: unknown,
2336
): DatadogMetric[] {
37+
const customTags = parseCustomTags(datadogTags);
2438
const ts = timestamp ?? Math.floor(Date.now() / 1000);
2539
const metrics: DatadogMetric[] = [];
2640

@@ -32,6 +46,7 @@ export function formatMetricsForContainer(
3246
`version:${container.version}`,
3347
`instance_id:${group.dimensions.deploymentId}`,
3448
`placement_id:${group.dimensions.placementId}`,
49+
...customTags,
3550
];
3651

3752
// CPU metrics
@@ -138,36 +153,18 @@ export function formatMetricsForContainer(
138153
return metrics;
139154
}
140155

141-
/**
142-
* Format container metrics data into Datadog metrics
143-
*/
144-
export function formatContainerMetrics(
145-
accountId: string,
146-
containersWithMetrics: ContainerWithMetrics[],
147-
timestamp?: number,
148-
): DatadogMetric[] {
149-
const ts = timestamp ?? Math.floor(Date.now() / 1000);
150-
const metrics: DatadogMetric[] = [];
151-
152-
for (const { container, metrics: groups } of containersWithMetrics) {
153-
metrics.push(
154-
...formatMetricsForContainer(accountId, container, groups, ts),
155-
);
156-
}
157-
158-
return metrics;
159-
}
160-
161156
/**
162157
* Format container health data into Datadog metrics
163158
*/
164159
export function formatHealthMetrics(
165160
accountId: string,
166161
containers: Container[],
167162
timestamp?: number,
163+
datadogTags?: unknown,
168164
): DatadogMetric[] {
165+
const customTags = parseCustomTags(datadogTags);
169166
const ts = timestamp ?? Math.floor(Date.now() / 1000);
170-
const baseTags = [`account_id:${accountId}`];
167+
const baseTags = [`account_id:${accountId}`, ...customTags];
171168
const metrics: DatadogMetric[] = [];
172169

173170
const totals = {

src/workflow.ts

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,19 @@ function getMetricsTimeWindow(now: Date = new Date()): {
2727
return { start, end };
2828
}
2929

30-
const STEP_CONFIG = {
31-
retries: {
32-
limit: 3,
33-
delay: "1 second" as const,
34-
backoff: "exponential" as const,
35-
},
36-
};
37-
3830
export class MetricsExporterWorkflow extends WorkflowEntrypoint<Env> {
3931
async run(_event: WorkflowEvent<unknown>, step: WorkflowStep) {
40-
const batchSize = Number.parseInt(this.env.BATCH_SIZE || "5000", 10);
32+
const batchSize = this.env.BATCH_SIZE ?? 5000;
33+
const retryLimit = this.env.RETRY_LIMIT ?? 3;
34+
const retryDelaySeconds = this.env.RETRY_DELAY_SECONDS ?? 1;
35+
36+
const stepConfig = {
37+
retries: {
38+
limit: retryLimit,
39+
delay: `${retryDelaySeconds} seconds` as const,
40+
backoff: "exponential" as const,
41+
},
42+
};
4143

4244
// Create a fetcher that proxies requests through a Durable Object in a specific jurisdiction
4345
// This ensures GraphQL queries run close to the data source
@@ -70,14 +72,16 @@ export class MetricsExporterWorkflow extends WorkflowEntrypoint<Env> {
7072

7173
const containers = await step.do(
7274
"fetch containers",
73-
STEP_CONFIG,
75+
stepConfig,
7476
async () => {
7577
const result = await cloudflare.listContainers();
7678
console.log("Fetched containers", { count: result.length });
7779

7880
const healthMetrics = formatHealthMetrics(
7981
this.env.CLOUDFLARE_ACCOUNT_ID,
8082
result,
83+
undefined,
84+
this.env.DATADOG_TAGS,
8185
);
8286
await datadog.sendMetrics(healthMetrics);
8387

@@ -94,7 +98,7 @@ export class MetricsExporterWorkflow extends WorkflowEntrypoint<Env> {
9498
for (const container of containers) {
9599
const count = await step.do(
96100
`Download Metrics: ${container.name}`,
97-
STEP_CONFIG,
101+
stepConfig,
98102
async () => {
99103
const metricsGroups = await cloudflare.getContainerMetrics(
100104
container.id,
@@ -106,6 +110,8 @@ export class MetricsExporterWorkflow extends WorkflowEntrypoint<Env> {
106110
this.env.CLOUDFLARE_ACCOUNT_ID,
107111
container,
108112
metricsGroups,
113+
undefined,
114+
this.env.DATADOG_TAGS,
109115
);
110116

111117
const batches = chunk(metrics, batchSize);
@@ -115,7 +121,7 @@ export class MetricsExporterWorkflow extends WorkflowEntrypoint<Env> {
115121
(batch, i) => () =>
116122
step.do(
117123
`Export Metrics: ${container.name} batch ${i + 1}/${batches.length}`,
118-
STEP_CONFIG,
124+
stepConfig,
119125
async () => {
120126
await datadog.sendMetrics(batch);
121127
},

0 commit comments

Comments
 (0)