diff --git a/apps/dev-playground/client/src/appKitTypes.d.ts b/apps/dev-playground/client/src/appKitTypes.d.ts index 0e0ae0b0..049fae9f 100644 --- a/apps/dev-playground/client/src/appKitTypes.d.ts +++ b/apps/dev-playground/client/src/appKitTypes.d.ts @@ -14,46 +14,28 @@ declare module "@databricks/appkit-ui/react" { endDate: SQLDateMarker; }; result: Array<{ - /** @sqlType STRING */ - app_name: string; - /** @sqlType STRING */ - day_of_week: string; - /** @sqlType DECIMAL(35,2) */ - spend: number; + ; }>; }; apps_list: { name: "apps_list"; parameters: Record; result: Array<{ - /** @sqlType STRING */ - id: string; - /** @sqlType STRING */ - name: string; - /** @sqlType STRING */ - creator: string; - /** @sqlType STRING */ - tags: string; - /** @sqlType DECIMAL(38,6) */ - totalSpend: number; - /** @sqlType DATE */ - createdAt: string; + ; }>; }; cost_recommendations: { name: "cost_recommendations"; parameters: Record; result: Array<{ - /** @sqlType INT */ - dummy: number; + ; }>; }; example: { name: "example"; parameters: Record; result: Array<{ - /** @sqlType BOOLEAN */ - "(1 = 1)": boolean; + ; }>; }; spend_data: { @@ -73,12 +55,7 @@ declare module "@databricks/appkit-ui/react" { creator: SQLStringMarker; }; result: Array<{ - /** @sqlType STRING */ - group_key: string; - /** @sqlType TIMESTAMP */ - aggregation_period: string; - /** @sqlType DECIMAL(38,6) */ - cost_usd: number; + ; }>; }; spend_summary: { @@ -92,12 +69,7 @@ declare module "@databricks/appkit-ui/react" { startDate: SQLDateMarker; }; result: Array<{ - /** @sqlType DECIMAL(33,0) */ - total: number; - /** @sqlType DECIMAL(33,0) */ - average: number; - /** @sqlType DECIMAL(33,0) */ - forecasted: number; + ; }>; }; sql_helpers_test: { @@ -117,22 +89,7 @@ declare module "@databricks/appkit-ui/react" { binaryParam: SQLStringMarker; }; result: Array<{ - /** @sqlType STRING */ - string_value: string; - /** @sqlType STRING */ - number_value: string; - /** @sqlType STRING */ - boolean_value: string; - /** @sqlType STRING */ - date_value: string; - /** @sqlType STRING */ - timestamp_value: string; - /** @sqlType BINARY */ - binary_value: string; - /** @sqlType STRING */ - binary_hex: string; - /** @sqlType INT */ - binary_length: number; + ; }>; }; top_contributors: { @@ -146,10 +103,7 @@ declare module "@databricks/appkit-ui/react" { endDate: SQLDateMarker; }; result: Array<{ - /** @sqlType STRING */ - app_name: string; - /** @sqlType DECIMAL(38,6) */ - total_cost_usd: number; + ; }>; }; untagged_apps: { @@ -163,14 +117,7 @@ declare module "@databricks/appkit-ui/react" { endDate: SQLDateMarker; }; result: Array<{ - /** @sqlType STRING */ - app_name: string; - /** @sqlType STRING */ - creator: string; - /** @sqlType DECIMAL(38,6) */ - total_cost_usd: number; - /** @sqlType DECIMAL(38,10) */ - avg_period_cost_usd: number; + ; }>; }; } diff --git a/apps/dev-playground/server/index.ts b/apps/dev-playground/server/index.ts index f43da821..3e514140 100644 --- a/apps/dev-playground/server/index.ts +++ b/apps/dev-playground/server/index.ts @@ -1,5 +1,5 @@ import "reflect-metadata"; -import { analytics, createApp, server } from "@databricks/appkit"; +import { analytics, createApp, files, server } from "@databricks/appkit"; import { WorkspaceClient } from "@databricks/sdk-experimental"; import { lakebaseExamples } from "./lakebase-examples-plugin"; import { reconnect } from "./reconnect-plugin"; @@ -22,6 +22,7 @@ createApp({ telemetryExamples(), analytics({}), lakebaseExamples(), + files({ defaultVolume: process.env.DATABRICKS_DEFAULT_VOLUME }), ], ...(process.env.APPKIT_E2E_TEST && { client: createMockClient() }), }).then((appkit) => { diff --git a/docs/docs/api/appkit/Class.Plugin.md b/docs/docs/api/appkit/Class.Plugin.md index 64de5830..5f7bae17 100644 --- a/docs/docs/api/appkit/Class.Plugin.md +++ b/docs/docs/api/appkit/Class.Plugin.md @@ -345,6 +345,24 @@ BasePlugin.getEndpoints *** +### getSkipBodyParsingPaths() + +```ts +getSkipBodyParsingPaths(): ReadonlySet; +``` + +#### Returns + +`ReadonlySet`\<`string`\> + +#### Implementation of + +```ts +BasePlugin.getSkipBodyParsingPaths +``` + +*** + ### injectRoutes() ```ts diff --git a/docs/docs/api/appkit/Function.contentTypeFromPath.md b/docs/docs/api/appkit/Function.contentTypeFromPath.md new file mode 100644 index 00000000..540a344a --- /dev/null +++ b/docs/docs/api/appkit/Function.contentTypeFromPath.md @@ -0,0 +1,24 @@ +# Function: contentTypeFromPath() + +```ts +function contentTypeFromPath( + filePath: string, + reported?: string, + customTypes?: Record): string; +``` + +Resolve the MIME content type for a file path. + +## Parameters + +| Parameter | Type | Description | +| ------ | ------ | ------ | +| `filePath` | `string` | Path to the file (only the extension is inspected). | +| `reported?` | `string` | Optional MIME type reported by the caller; used as fallback when the extension is unknown. | +| `customTypes?` | `Record`\<`string`, `string`\> | Optional map of extension → MIME type overrides (e.g. `{ ".csv": "text/csv" }`). | + +## Returns + +`string` + +The resolved MIME content type string. diff --git a/docs/docs/api/appkit/index.md b/docs/docs/api/appkit/index.md index 91f9a66e..3c58b61c 100644 --- a/docs/docs/api/appkit/index.md +++ b/docs/docs/api/appkit/index.md @@ -65,6 +65,7 @@ plugin architecture, and React integration. | Function | Description | | ------ | ------ | | [appKitTypesPlugin](Function.appKitTypesPlugin.md) | Vite plugin to generate types for AppKit queries. Calls generateFromEntryPoint under the hood. | +| [contentTypeFromPath](Function.contentTypeFromPath.md) | Resolve the MIME content type for a file path. | | [createApp](Function.createApp.md) | Bootstraps AppKit with the provided configuration. | | [createLakebasePool](Function.createLakebasePool.md) | Create a Lakebase pool with appkit's logger integration. Telemetry automatically uses appkit's OpenTelemetry configuration via global registry. | | [generateDatabaseCredential](Function.generateDatabaseCredential.md) | Generate OAuth credentials for Postgres database connection using the proper Postgres API. | diff --git a/docs/docs/api/appkit/typedoc-sidebar.ts b/docs/docs/api/appkit/typedoc-sidebar.ts index 3421d7ee..63b513c2 100644 --- a/docs/docs/api/appkit/typedoc-sidebar.ts +++ b/docs/docs/api/appkit/typedoc-sidebar.ts @@ -200,6 +200,11 @@ const typedocSidebar: SidebarsConfig = { id: "api/appkit/Function.appKitTypesPlugin", label: "appKitTypesPlugin" }, + { + type: "doc", + id: "api/appkit/Function.contentTypeFromPath", + label: "contentTypeFromPath" + }, { type: "doc", id: "api/appkit/Function.createApp", diff --git a/docs/docs/plugins.md b/docs/docs/plugins.md index 3bfe1067..310f16d6 100644 --- a/docs/docs/plugins.md +++ b/docs/docs/plugins.md @@ -15,6 +15,7 @@ For complete API documentation, see the [`Plugin`](api/appkit/Class.Plugin.md) c Provides HTTP server capabilities with development and production modes. **Key features:** + - Express server for REST APIs - Vite dev server with hot module reload - Static file serving for production @@ -70,10 +71,10 @@ import { createApp, server } from "@databricks/appkit"; await createApp({ plugins: [ server({ - port: 8000, // default: Number(process.env.DATABRICKS_APP_PORT) || 8000 - host: "0.0.0.0", // default: process.env.FLASK_RUN_HOST || "0.0.0.0" - autoStart: true, // default: true - staticPath: "dist", // optional: force a specific static directory + port: 8000, // default: Number(process.env.DATABRICKS_APP_PORT) || 8000 + host: "0.0.0.0", // default: process.env.FLASK_RUN_HOST || "0.0.0.0" + autoStart: true, // default: true + staticPath: "dist", // optional: force a specific static directory }), ], }); @@ -84,6 +85,7 @@ await createApp({ Enables SQL query execution against Databricks SQL Warehouses. **Key features:** + - File-based SQL queries with automatic type generation - Parameterized queries with type-safe [SQL helpers](api/appkit/Variable.sql.md) - JSON and Arrow format support @@ -119,6 +121,7 @@ LIMIT :limit ``` **Supported `-- @param` types** (case-insensitive): + - `STRING`, `NUMERIC`, `BOOLEAN`, `DATE`, `TIMESTAMP`, `BINARY` #### Server-injected parameters @@ -143,6 +146,152 @@ The analytics plugin exposes these endpoints (mounted under `/api/analytics`): - `format: "JSON"` (default) returns JSON rows - `format: "ARROW"` returns an Arrow "statement_id" payload over SSE, then the client fetches binary Arrow from `/api/analytics/arrow-result/:jobId` +### Files plugin + +Provides HTTP routes and a programmatic API for Databricks Unity Catalog volume file operations (list, read, download, upload, delete, preview). + +Routes are mounted at `/api/files/*`. + +#### Configuration + +| Option | Type | Default | Description | +| -------------------- | ------------------------ | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `defaultVolume` | `string` | — | Absolute volume path used to resolve relative file paths (e.g. `"/Volumes/catalog/schema/vol"`). | +| `timeout` | `number` | Per-tier | Operation timeout in milliseconds. Overrides the built-in per-tier defaults (30 s read, 600 s write). | +| `customContentTypes` | `Record` | — | Map of file extensions to MIME types that takes priority over the built-in extension map. Keys should include the leading dot (e.g. `{ ".parquet": "application/vnd.apache.parquet" }`). | + +#### Programmatic API + +After registration, the plugin exposes methods on the app instance via `app.files.()`: + +| Method | Signature | Description | +| ----------------- | ------------------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------- | +| `list` | `(path?: string) => Promise` | List entries in a directory. Defaults to the configured `defaultVolume` root. | +| `read` | `(path: string) => Promise` | Read a file and return its contents as a UTF-8 string. | +| `download` | `(path: string) => Promise` | Download a file as a readable stream. | +| `exists` | `(path: string) => Promise` | Check whether a file exists. | +| `metadata` | `(path: string) => Promise` | Retrieve metadata (size, content type, last modified) for a file. | +| `upload` | `(path: string, contents: ReadableStream \| Buffer \| string, options?: { overwrite?: boolean }) => Promise` | Upload a file to a Unity Catalog volume. | +| `createDirectory` | `(path: string) => Promise` | Create a directory in a Unity Catalog volume. | +| `delete` | `(path: string) => Promise` | Delete a file or directory from a Unity Catalog volume. | +| `preview` | `(path: string) => Promise` | Get a preview of a file including metadata and a text excerpt. | + +#### HTTP Routes + +All routes are mounted under `/api/files`. File paths are passed via the `path` query parameter. + +| Method | Path | Description | +| ------ | --------------------------- | ---------------------------------------------------- | +| `GET` | `/api/files/root` | Returns the configured `defaultVolume` path. | +| `GET` | `/api/files/list?path=` | List directory contents. | +| `GET` | `/api/files/read?path=` | Read a file as plain text. | +| `GET` | `/api/files/download?path=` | Download a file as an attachment. | +| `GET` | `/api/files/raw?path=` | Serve a file inline with its detected content type. | +| `GET` | `/api/files/exists?path=` | Check whether a file exists (`{ exists: boolean }`). | +| `GET` | `/api/files/metadata?path=` | Retrieve file metadata (size, type, last modified). | +| `GET` | `/api/files/preview?path=` | Get a file preview with text excerpt. | +| `POST` | `/api/files/upload?path=` | Upload a file (stream the request body). | +| `POST` | `/api/files/mkdir` | Create a directory (`{ path }` in body). | +| `POST` | `/api/files/delete` | Delete a file or directory (`{ path }` in body). | + +#### Execution defaults + +Operations use three tiers of execution settings: + +| Tier | Cache | Retry | Timeout | Operations | +| ------------ | -------- | ----------------------- | ------------------- | ------------------------------------- | +| **Read** | 60 s TTL | 3 attempts, 1 s backoff | 30 s | list, read, exists, metadata, preview | +| **Download** | Disabled | 3 attempts, 1 s backoff | 30 s (stream start) | download, raw | +| **Write** | Disabled | Disabled | 600 s | upload, mkdir, delete | + +#### Basic usage + +```ts +import { createApp, files } from "@databricks/appkit"; + +const app = await createApp({ + plugins: [files({ defaultVolume: "/Volumes/catalog/schema/vol" })], +}); +``` + +#### List files in a directory + +```ts +const entries = await app.files.list("/path/to/dir"); +for (const entry of entries) { + console.log(entry.name, entry.is_directory); +} +``` + +#### Read a file as a string + +```ts +const content = await app.files.read("data/config.json"); +const config = JSON.parse(content); +``` + +#### Download a file as a stream + +```ts +const response = await app.files.download("reports/export.csv"); +// response.contents is a ReadableStream +``` + +#### Upload a file + +```ts +await app.files.upload("uploads/report.pdf", fileBuffer, { + overwrite: true, +}); +``` + +#### Check if a file exists + +```ts +const found = await app.files.exists("data/config.json"); +if (!found) { + console.log("File not found"); +} +``` + +#### Get file metadata + +```ts +const meta = await app.files.metadata("data/report.csv"); +console.log(meta.contentLength, meta.contentType, meta.lastModified); +``` + +#### Preview a file + +```ts +const preview = await app.files.preview("data/readme.md"); +// { contentLength, contentType, lastModified, textPreview, isText, isImage } +``` + +#### Custom content types + +```ts +const app = await createApp({ + plugins: [ + files({ + defaultVolume: "/Volumes/catalog/schema/vol", + customContentTypes: { + ".dbx": "application/x-databricks", + ".arrow": "application/vnd.apache.arrow.stream", + }, + }), + ], +}); +``` + +#### User-scoped operations in a route handler + +```ts +// Inside a custom plugin route handler: +const userFiles = this.asUser(req); +const entries = await userFiles.list(); +``` + ### Execution context and `asUser(req)` AppKit manages Databricks authentication via two contexts: @@ -196,10 +345,7 @@ Configure plugins when creating your AppKit instance: import { createApp, server, analytics } from "@databricks/appkit"; const AppKit = await createApp({ - plugins: [ - server({ port: 8000 }), - analytics(), - ], + plugins: [server({ port: 8000 }), analytics()], }); ``` @@ -235,12 +381,12 @@ class MyPlugin extends Plugin { permission: "READ", fields: { scope: { env: "MY_SECRET_SCOPE", description: "Secret scope" }, - key: { env: "MY_API_KEY", description: "Secret key name" } - } - } + key: { env: "MY_API_KEY", description: "Secret key name" }, + }, + }, ], - optional: [] - } + optional: [], + }, }; async setup() { @@ -258,15 +404,16 @@ class MyPlugin extends Plugin { exports() { // an object with the methods from this plugin to expose return { - myCustomMethod: this.myCustomMethod - } + myCustomMethod: this.myCustomMethod, + }; } } -export const myPlugin = toPlugin, "myPlugin">( - MyPlugin, - "myPlugin", -); +export const myPlugin = toPlugin< + typeof MyPlugin, + Record, + "myPlugin" +>(MyPlugin, "myPlugin"); ``` ### Config-dependent resources @@ -289,13 +436,30 @@ class MyPlugin extends Plugin { description: "A plugin with optional caching", resources: { required: [ - { type: "sql_warehouse", alias: "warehouse", resourceKey: "sqlWarehouse", description: "Query execution", permission: "CAN_USE", fields: { id: { env: "DATABRICKS_WAREHOUSE_ID" } } } + { + type: "sql_warehouse", + alias: "warehouse", + resourceKey: "sqlWarehouse", + description: "Query execution", + permission: "CAN_USE", + fields: { id: { env: "DATABRICKS_WAREHOUSE_ID" } }, + }, ], optional: [ // Listed as optional in manifest for static analysis - { type: "database", alias: "cache", resourceKey: "cache", description: "Query result caching (if enabled)", permission: "CAN_CONNECT_AND_CREATE", fields: { instance_name: { env: "DATABRICKS_CACHE_INSTANCE" }, database_name: { env: "DATABRICKS_CACHE_DB" } } } - ] - } + { + type: "database", + alias: "cache", + resourceKey: "cache", + description: "Query result caching (if enabled)", + permission: "CAN_CONNECT_AND_CREATE", + fields: { + instance_name: { env: "DATABRICKS_CACHE_INSTANCE" }, + database_name: { env: "DATABRICKS_CACHE_DB" }, + }, + }, + ], + }, }; // Runtime: Convert optional resources to required based on config @@ -313,7 +477,7 @@ class MyPlugin extends Plugin { instance_name: { env: "DATABRICKS_CACHE_INSTANCE" }, database_name: { env: "DATABRICKS_CACHE_DB" }, }, - required: true // Mark as required at runtime + required: true, // Mark as required at runtime }); } return resources; @@ -322,6 +486,7 @@ class MyPlugin extends Plugin { ``` This pattern allows: + - **Static tools** (CLI, docs) to show all possible resources - **Runtime validation** to enforce resources based on actual configuration @@ -341,11 +506,7 @@ To do that, your plugin needs to implement the `exports` method, returning an ob ```ts const AppKit = await createApp({ - plugins: [ - server({ port: 8000 }), - analytics(), - myPlugin(), - ], + plugins: [server({ port: 8000 }), analytics(), myPlugin()], }); AppKit.myPlugin.myCustomMethod(); @@ -364,7 +525,7 @@ await createApp({ plugins: [server(), analytics({})], cache: { enabled: true, - ttl: 3600, // seconds + ttl: 3600, // seconds strictPersistence: false, }, }); diff --git a/packages/appkit/src/connectors/files/client.ts b/packages/appkit/src/connectors/files/client.ts new file mode 100644 index 00000000..72631e10 --- /dev/null +++ b/packages/appkit/src/connectors/files/client.ts @@ -0,0 +1,348 @@ +import { ApiError, type WorkspaceClient } from "@databricks/sdk-experimental"; +import type { TelemetryOptions } from "shared"; +import { createLogger } from "../../logging/logger"; +import type { + DirectoryEntry, + DownloadResponse, + FileMetadata, + FilePreview, +} from "../../plugins/files/types"; +import type { TelemetryProvider } from "../../telemetry"; +import { + type Counter, + type Histogram, + type Span, + SpanKind, + SpanStatusCode, + TelemetryManager, +} from "../../telemetry"; +import { contentTypeFromPath, isTextContentType } from "./defaults"; + +const logger = createLogger("connectors:files"); + +export interface FilesConnectorConfig { + defaultVolume?: string; + timeout?: number; + telemetry?: TelemetryOptions; + customContentTypes?: Record; +} + +export class FilesConnector { + private readonly name = "files"; + private defaultVolume: string | undefined; + private readonly customContentTypes: Record | undefined; + + private readonly telemetry: TelemetryProvider; + private readonly telemetryMetrics: { + operationCount: Counter; + operationDuration: Histogram; + }; + + constructor(config: FilesConnectorConfig) { + this.defaultVolume = config.defaultVolume; + this.customContentTypes = config.customContentTypes; + + this.telemetry = TelemetryManager.getProvider(this.name, config.telemetry); + this.telemetryMetrics = { + operationCount: this.telemetry + .getMeter() + .createCounter("files.operation.count", { + description: "Total number of file operations", + unit: "1", + }), + operationDuration: this.telemetry + .getMeter() + .createHistogram("files.operation.duration", { + description: "Duration of file operations", + unit: "ms", + }), + }; + } + + resolvePath(filePath: string): string { + if (filePath.includes("..")) { + throw new Error('Path traversal ("../") is not allowed.'); + } + if (filePath.startsWith("/")) { + return filePath; + } + if (!this.defaultVolume) { + throw new Error( + "Cannot resolve relative path: no default volume set. Use an absolute path or set a default volume.", + ); + } + return `${this.defaultVolume}/${filePath}`; + } + + volume(volumePath: string): FilesConnector { + return new FilesConnector({ + defaultVolume: volumePath, + telemetry: false, + customContentTypes: this.customContentTypes, + }); + } + + private async traced( + operation: string, + attributes: Record, + fn: (span: Span) => Promise, + ): Promise { + const startTime = Date.now(); + let success = false; + + return this.telemetry.startActiveSpan( + `files.${operation}`, + { + kind: SpanKind.CLIENT, + attributes: { + "files.operation": operation, + ...attributes, + }, + }, + async (span: Span) => { + try { + const result = await fn(span); + success = true; + span.setStatus({ code: SpanStatusCode.OK }); + return result; + } catch (error) { + span.recordException(error as Error); + span.setStatus({ + code: SpanStatusCode.ERROR, + message: error instanceof Error ? error.message : String(error), + }); + throw error; + } finally { + span.end(); + const duration = Date.now() - startTime; + const metricAttrs = { + "files.operation": operation, + success: String(success), + }; + this.telemetryMetrics.operationCount.add(1, metricAttrs); + this.telemetryMetrics.operationDuration.record(duration, metricAttrs); + } + }, + { name: this.name, includePrefix: true }, + ); + } + + async list( + client: WorkspaceClient, + directoryPath?: string, + ): Promise { + const resolvedPath = directoryPath + ? this.resolvePath(directoryPath) + : this.defaultVolume; + if (!resolvedPath) { + throw new Error("No directory path provided and no default volume set."); + } + + return this.traced("list", { "files.path": resolvedPath }, async () => { + const entries: DirectoryEntry[] = []; + for await (const entry of client.files.listDirectoryContents({ + directory_path: resolvedPath, + })) { + entries.push(entry); + } + return entries; + }); + } + + async read(client: WorkspaceClient, filePath: string): Promise { + return this.traced( + "read", + { "files.path": this.resolvePath(filePath) }, + async () => { + const response = await this.download(client, filePath); + if (!response.contents) { + return ""; + } + const reader = response.contents.getReader(); + const decoder = new TextDecoder(); + let result = ""; + while (true) { + const { done, value } = await reader.read(); + if (done) break; + result += decoder.decode(value, { stream: true }); + } + result += decoder.decode(); + return result; + }, + ); + } + + async download( + client: WorkspaceClient, + filePath: string, + ): Promise { + return this.traced( + "download", + { "files.path": this.resolvePath(filePath) }, + async () => { + return client.files.download({ + file_path: this.resolvePath(filePath), + }); + }, + ); + } + + async exists(client: WorkspaceClient, filePath: string): Promise { + return this.traced( + "exists", + { "files.path": this.resolvePath(filePath) }, + async () => { + try { + await this.metadata(client, filePath); + return true; + } catch (error) { + if (error instanceof ApiError && error.statusCode === 404) { + return false; + } + throw error; + } + }, + ); + } + + async metadata( + client: WorkspaceClient, + filePath: string, + ): Promise { + return this.traced( + "metadata", + { "files.path": this.resolvePath(filePath) }, + async () => { + const response = await client.files.getMetadata({ + file_path: this.resolvePath(filePath), + }); + return { + contentLength: response["content-length"], + contentType: contentTypeFromPath( + filePath, + response["content-type"], + this.customContentTypes, + ), + lastModified: response["last-modified"], + }; + }, + ); + } + + async upload( + client: WorkspaceClient, + filePath: string, + contents: ReadableStream | Buffer | string, + options?: { overwrite?: boolean }, + ): Promise { + const resolvedPath = this.resolvePath(filePath); + + return this.traced("upload", { "files.path": resolvedPath }, async () => { + const body = contents; + const overwrite = options?.overwrite ?? true; + + // Workaround: The SDK's files.upload() has two bugs: + // 1. It ignores the `contents` field (sets body to undefined) + // 2. apiClient.request() checks `instanceof` against its own ReadableStream + // subclass, so standard ReadableStream instances get JSON.stringified to "{}" + // Bypass both by calling the REST API directly with SDK-provided auth. + const url = new URL( + `/api/2.0/fs/files${resolvedPath}`, + client.config.host, + ); + url.searchParams.set("overwrite", String(overwrite)); + + const headers = new Headers({ + "Content-Type": "application/octet-stream", + }); + const fetchOptions: RequestInit = { method: "PUT", headers, body }; + + if (body instanceof ReadableStream) { + fetchOptions.duplex = "half"; + } + + await client.config.authenticate(headers); + + const res = await fetch(url.toString(), fetchOptions); + + if (!res.ok) { + const text = await res.text(); + logger.error(`Upload failed (${res.status}): ${text}`); + throw new Error(`Upload failed (${res.status}): ${text}`); + } + }); + } + + async createDirectory( + client: WorkspaceClient, + directoryPath: string, + ): Promise { + return this.traced( + "createDirectory", + { "files.path": this.resolvePath(directoryPath) }, + async () => { + await client.files.createDirectory({ + directory_path: this.resolvePath(directoryPath), + }); + }, + ); + } + + async delete(client: WorkspaceClient, filePath: string): Promise { + return this.traced( + "delete", + { "files.path": this.resolvePath(filePath) }, + async () => { + await client.files.delete({ + file_path: this.resolvePath(filePath), + }); + }, + ); + } + + async preview( + client: WorkspaceClient, + filePath: string, + options?: { maxChars?: number }, + ): Promise { + return this.traced( + "preview", + { "files.path": this.resolvePath(filePath) }, + async () => { + const meta = await this.metadata(client, filePath); + const isText = isTextContentType(meta.contentType); + const isImage = meta.contentType?.startsWith("image/") || false; + + if (!isText) { + return { ...meta, textPreview: null, isText: false, isImage }; + } + + const response = await client.files.download({ + file_path: this.resolvePath(filePath), + }); + if (!response.contents) { + return { ...meta, textPreview: "", isText: true, isImage: false }; + } + + const reader = response.contents.getReader(); + const decoder = new TextDecoder(); + let preview = ""; + const maxChars = options?.maxChars ?? 1024; + + while (preview.length < maxChars) { + const { done, value } = await reader.read(); + if (done) break; + preview += decoder.decode(value, { stream: true }); + } + preview += decoder.decode(); + await reader.cancel(); + + if (preview.length > maxChars) { + preview = preview.slice(0, maxChars); + } + + return { ...meta, textPreview: preview, isText: true, isImage: false }; + }, + ); + } +} diff --git a/packages/appkit/src/connectors/files/defaults.ts b/packages/appkit/src/connectors/files/defaults.ts new file mode 100644 index 00000000..7791de6d --- /dev/null +++ b/packages/appkit/src/connectors/files/defaults.ts @@ -0,0 +1,106 @@ +export const EXTENSION_CONTENT_TYPES: Record = Object.freeze({ + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".svg": "image/svg+xml", + ".bmp": "image/bmp", + ".ico": "image/vnd.microsoft.icon", + ".html": "text/html", + ".css": "text/css", + ".js": "text/javascript", + ".ts": "text/typescript", + ".py": "text/x-python", + ".txt": "text/plain", + ".md": "text/markdown", + ".csv": "text/csv", + ".json": "application/json", + ".jsonl": "application/x-ndjson", + ".xml": "application/xml", + ".yaml": "application/x-yaml", + ".yml": "application/x-yaml", + ".sql": "application/sql", + ".pdf": "application/pdf", + ".ipynb": "application/x-ipynb+json", + ".parquet": "application/vnd.apache.parquet", + ".zip": "application/zip", + ".gz": "application/gzip", +}); + +const TEXT_KEYWORDS = ["json", "xml", "yaml", "sql", "javascript"] as const; + +/** + * Determine whether a content type represents text. + * + * Returns `true` for any `text/*` type and for known structured-text types + * such as JSON, XML, YAML, SQL, and JavaScript. + * + * @param contentType - MIME content type string to check. + * @returns `true` if the content type is text-based. + */ +export function isTextContentType(contentType: string | undefined): boolean { + if (!contentType) return false; + if (contentType.startsWith("text/")) return true; + return TEXT_KEYWORDS.some((kw) => contentType.includes(kw)); +} + +/** + * MIME types that are safe to serve inline (i.e. browsers cannot execute + * scripts from them). Any type **not** in this set should be forced to + * download via `Content-Disposition: attachment` when served by the `/raw` + * endpoint to prevent stored-XSS attacks. + */ +export const SAFE_INLINE_CONTENT_TYPES: ReadonlySet = new Set([ + "image/png", + "image/jpeg", + "image/gif", + "image/webp", + "image/bmp", + "image/vnd.microsoft.icon", + "text/plain", + "text/csv", + "text/markdown", + "application/json", + "application/pdf", +]); + +/** + * Check whether a content type is safe to serve inline. + * + * @param contentType - MIME content type string. + * @returns `true` if the type is in the safe-inline allowlist. + */ +export function isSafeInlineContentType(contentType: string): boolean { + return SAFE_INLINE_CONTENT_TYPES.has(contentType); +} + +/** + * Resolve the MIME content type for a file path. + * + * @param filePath - Path to the file (only the extension is inspected). + * @param reported - Optional MIME type reported by the caller; used as fallback when the extension is unknown. + * @param customTypes - Optional map of extension → MIME type overrides (e.g. `{ ".csv": "text/csv" }`). + * @returns The resolved MIME content type string. + */ +export function contentTypeFromPath( + filePath: string, + reported?: string, + customTypes?: Record, +): string { + const dotIndex = filePath.lastIndexOf("."); + const ext = dotIndex > 0 ? filePath.slice(dotIndex).toLowerCase() : ""; + const fromCustom = customTypes?.[ext]; + + if (fromCustom) { + return fromCustom; + } + + const fromExt = EXTENSION_CONTENT_TYPES[ext]; + + if (fromExt) { + return fromExt; + } + + return reported ?? "application/octet-stream"; +} diff --git a/packages/appkit/src/connectors/files/index.ts b/packages/appkit/src/connectors/files/index.ts new file mode 100644 index 00000000..6f0217a1 --- /dev/null +++ b/packages/appkit/src/connectors/files/index.ts @@ -0,0 +1,2 @@ +export * from "./client"; +export * from "./defaults"; diff --git a/packages/appkit/src/connectors/files/tests/client.test.ts b/packages/appkit/src/connectors/files/tests/client.test.ts new file mode 100644 index 00000000..96e4693a --- /dev/null +++ b/packages/appkit/src/connectors/files/tests/client.test.ts @@ -0,0 +1,804 @@ +import type { WorkspaceClient } from "@databricks/sdk-experimental"; +import { createMockTelemetry } from "@tools/test-helpers"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { FilesConnector } from "../client"; + +function streamFromString(text: string): ReadableStream { + const encoder = new TextEncoder(); + return new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(text)); + controller.close(); + }, + }); +} + +function streamFromChunks(chunks: string[]): ReadableStream { + const encoder = new TextEncoder(); + return new ReadableStream({ + start(controller) { + for (const chunk of chunks) { + controller.enqueue(encoder.encode(chunk)); + } + controller.close(); + }, + }); +} + +const { mockFilesApi, mockConfig, mockClient, MockApiError } = vi.hoisted( + () => { + const mockFilesApi = { + listDirectoryContents: vi.fn(), + download: vi.fn(), + getMetadata: vi.fn(), + upload: vi.fn(), + createDirectory: vi.fn(), + delete: vi.fn(), + }; + + const mockConfig = { + host: "https://test.databricks.com", + authenticate: vi.fn(), + }; + + const mockClient = { + files: mockFilesApi, + config: mockConfig, + } as unknown as WorkspaceClient; + + class MockApiError extends Error { + statusCode: number; + constructor(message: string, statusCode: number) { + super(message); + this.name = "ApiError"; + this.statusCode = statusCode; + } + } + + return { mockFilesApi, mockConfig, mockClient, MockApiError }; + }, +); + +vi.mock("@databricks/sdk-experimental", () => ({ + WorkspaceClient: vi.fn(() => mockClient), + ApiError: MockApiError, +})); + +const mockTelemetry = createMockTelemetry(); + +vi.mock("../../../telemetry", () => ({ + TelemetryManager: { + getProvider: vi.fn(() => mockTelemetry), + }, + SpanKind: { CLIENT: 2 }, + SpanStatusCode: { OK: 1, ERROR: 2 }, +})); + +describe("FilesConnector", () => { + describe("Path Resolution", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + test("absolute paths are returned as-is", () => { + const connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + + mockFilesApi.download.mockResolvedValue({ contents: null }); + connector.download(mockClient, "/Volumes/other/path/file.txt"); + + expect(mockFilesApi.download).toHaveBeenCalledWith({ + file_path: "/Volumes/other/path/file.txt", + }); + }); + + test("relative paths prepend defaultVolume", () => { + const connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + + mockFilesApi.download.mockResolvedValue({ contents: null }); + connector.download(mockClient, "subdir/file.txt"); + + expect(mockFilesApi.download).toHaveBeenCalledWith({ + file_path: "/Volumes/catalog/schema/vol/subdir/file.txt", + }); + }); + + test("relative path without defaultVolume throws error", async () => { + const connector = new FilesConnector({}); + + await expect(connector.download(mockClient, "file.txt")).rejects.toThrow( + "Cannot resolve relative path: no default volume set.", + ); + }); + + test("volume() creates new connector scoped to a different volume", () => { + const connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol1", + }); + + const scoped = connector.volume("/Volumes/catalog/schema/vol2"); + + mockFilesApi.download.mockResolvedValue({ contents: null }); + scoped.download(mockClient, "file.txt"); + + expect(mockFilesApi.download).toHaveBeenCalledWith({ + file_path: "/Volumes/catalog/schema/vol2/file.txt", + }); + }); + + test("volume() does not affect the original connector", () => { + const connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol1", + }); + + connector.volume("/Volumes/catalog/schema/vol2"); + + mockFilesApi.download.mockResolvedValue({ contents: null }); + connector.download(mockClient, "file.txt"); + + expect(mockFilesApi.download).toHaveBeenCalledWith({ + file_path: "/Volumes/catalog/schema/vol1/file.txt", + }); + }); + + test("paths containing '..' are rejected", async () => { + const connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + + await expect( + connector.download(mockClient, "../../../etc/passwd"), + ).rejects.toThrow('Path traversal ("../") is not allowed.'); + }); + + test("absolute paths containing '..' are rejected", async () => { + const connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + + await expect( + connector.download(mockClient, "/Volumes/catalog/../other/file.txt"), + ).rejects.toThrow('Path traversal ("../") is not allowed.'); + }); + + test("constructor without defaultVolume omits it", async () => { + const connector = new FilesConnector({}); + + await expect(connector.list(mockClient)).rejects.toThrow( + "No directory path provided and no default volume set.", + ); + }); + }); + + describe("list()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("collects async iterator entries", async () => { + const entries = [ + { + name: "file1.txt", + path: "/Volumes/catalog/schema/vol/file1.txt", + is_directory: false, + }, + { + name: "subdir", + path: "/Volumes/catalog/schema/vol/subdir", + is_directory: true, + }, + ]; + + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () { + for (const entry of entries) { + yield entry; + } + })(), + ); + + const result = await connector.list(mockClient); + + expect(result).toEqual(entries); + expect(mockFilesApi.listDirectoryContents).toHaveBeenCalledWith({ + directory_path: "/Volumes/catalog/schema/vol", + }); + }); + + test("uses defaultVolume when no path provided", async () => { + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () {})(), + ); + + await connector.list(mockClient); + + expect(mockFilesApi.listDirectoryContents).toHaveBeenCalledWith({ + directory_path: "/Volumes/catalog/schema/vol", + }); + }); + + test("throws when no path and no defaultVolume", async () => { + const noVolumeConnector = new FilesConnector({}); + + await expect(noVolumeConnector.list(mockClient)).rejects.toThrow( + "No directory path provided and no default volume set.", + ); + }); + + test("uses provided absolute path", async () => { + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () {})(), + ); + + await connector.list(mockClient, "/Volumes/other/path"); + + expect(mockFilesApi.listDirectoryContents).toHaveBeenCalledWith({ + directory_path: "/Volumes/other/path", + }); + }); + + test("resolves relative path with defaultVolume", async () => { + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () {})(), + ); + + await connector.list(mockClient, "subdir"); + + expect(mockFilesApi.listDirectoryContents).toHaveBeenCalledWith({ + directory_path: "/Volumes/catalog/schema/vol/subdir", + }); + }); + + test("returns empty array for empty directory", async () => { + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () {})(), + ); + + const result = await connector.list(mockClient); + + expect(result).toEqual([]); + }); + }); + + describe("read()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("decodes ReadableStream to UTF-8 string", async () => { + const content = "Hello, world!"; + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString(content), + }); + + const result = await connector.read(mockClient, "/file.txt"); + + expect(result).toBe(content); + }); + + test("returns empty string when contents is null", async () => { + mockFilesApi.download.mockResolvedValue({ contents: null }); + + const result = await connector.read(mockClient, "/empty.txt"); + + expect(result).toBe(""); + }); + + test("concatenates multiple chunks correctly", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromChunks(["Hello, ", "world", "!"]), + }); + + const result = await connector.read(mockClient, "/chunked.txt"); + + expect(result).toBe("Hello, world!"); + }); + + test("handles multi-byte UTF-8 characters", async () => { + const content = "Héllo wörld 🌍"; + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString(content), + }); + + const result = await connector.read(mockClient, "/unicode.txt"); + + expect(result).toBe(content); + }); + }); + + describe("download()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("calls client.files.download with resolved path", async () => { + const response = { contents: streamFromString("data") }; + mockFilesApi.download.mockResolvedValue(response); + + const result = await connector.download(mockClient, "file.txt"); + + expect(mockFilesApi.download).toHaveBeenCalledWith({ + file_path: "/Volumes/catalog/schema/vol/file.txt", + }); + expect(result).toBe(response); + }); + + test("passes absolute path directly", async () => { + const response = { contents: null }; + mockFilesApi.download.mockResolvedValue(response); + + await connector.download(mockClient, "/Volumes/other/file.txt"); + + expect(mockFilesApi.download).toHaveBeenCalledWith({ + file_path: "/Volumes/other/file.txt", + }); + }); + }); + + describe("exists()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("returns true when metadata succeeds", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 100, + "content-type": "text/plain", + "last-modified": "2025-01-01", + }); + + const result = await connector.exists(mockClient, "/file.txt"); + + expect(result).toBe(true); + }); + + test("returns false on 404 ApiError", async () => { + mockFilesApi.getMetadata.mockRejectedValue( + new MockApiError("Not found", 404), + ); + + const result = await connector.exists(mockClient, "/missing.txt"); + + expect(result).toBe(false); + }); + + test("rethrows non-404 ApiError", async () => { + mockFilesApi.getMetadata.mockRejectedValue( + new MockApiError("Server error", 500), + ); + + await expect(connector.exists(mockClient, "/file.txt")).rejects.toThrow( + "Server error", + ); + }); + + test("rethrows generic errors", async () => { + mockFilesApi.getMetadata.mockRejectedValue(new Error("Network failure")); + + await expect(connector.exists(mockClient, "/file.txt")).rejects.toThrow( + "Network failure", + ); + }); + }); + + describe("metadata()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("maps SDK response to FileMetadata", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 1234, + "content-type": "application/json", + "last-modified": "2025-06-15T10:00:00Z", + }); + + const result = await connector.metadata(mockClient, "/data.json"); + + expect(result).toEqual({ + contentLength: 1234, + contentType: "application/json", + lastModified: "2025-06-15T10:00:00Z", + }); + }); + + test("uses contentTypeFromPath to resolve octet-stream", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 500, + "content-type": "application/octet-stream", + "last-modified": "2025-01-01", + }); + + const result = await connector.metadata(mockClient, "/image.png"); + + expect(result.contentType).toBe("image/png"); + }); + + test("handles undefined content-type from SDK", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 100, + "content-type": undefined, + "last-modified": "2025-01-01", + }); + + const result = await connector.metadata(mockClient, "/data.csv"); + + expect(result.contentType).toBe("text/csv"); + }); + + test("resolves relative path via defaultVolume", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 0, + "content-type": "text/plain", + "last-modified": "2025-01-01", + }); + + await connector.metadata(mockClient, "notes.txt"); + + expect(mockFilesApi.getMetadata).toHaveBeenCalledWith({ + file_path: "/Volumes/catalog/schema/vol/notes.txt", + }); + }); + }); + + describe("upload()", () => { + let connector: FilesConnector; + let fetchSpy: ReturnType; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + mockConfig.authenticate.mockResolvedValue(undefined); + fetchSpy = vi.fn().mockResolvedValue({ ok: true }); + vi.stubGlobal("fetch", fetchSpy); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + }); + + test("handles string input", async () => { + await connector.upload(mockClient, "file.txt", "hello world"); + + expect(fetchSpy).toHaveBeenCalledWith( + expect.stringContaining( + "/api/2.0/fs/files/Volumes/catalog/schema/vol/file.txt", + ), + expect.objectContaining({ + method: "PUT", + body: "hello world", + }), + ); + }); + + test("handles Buffer input", async () => { + const buf = Buffer.from("buffer data"); + await connector.upload(mockClient, "file.bin", buf); + + expect(fetchSpy).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + method: "PUT", + body: buf, + }), + ); + }); + + test("handles ReadableStream input (streams directly)", async () => { + const stream = streamFromString("stream data"); + await connector.upload(mockClient, "file.txt", stream); + + expect(fetchSpy).toHaveBeenCalledWith( + expect.any(String), + expect.objectContaining({ + method: "PUT", + body: expect.any(ReadableStream), + duplex: "half", + }), + ); + }); + + test("defaults overwrite to true", async () => { + await connector.upload(mockClient, "file.txt", "data"); + + const url = fetchSpy.mock.calls[0][0] as string; + expect(url).toContain("overwrite=true"); + }); + + test("sets overwrite=false when specified", async () => { + await connector.upload(mockClient, "file.txt", "data", { + overwrite: false, + }); + + const url = fetchSpy.mock.calls[0][0] as string; + expect(url).toContain("overwrite=false"); + }); + + test("calls config.authenticate on the headers", async () => { + await connector.upload(mockClient, "file.txt", "data"); + + expect(mockConfig.authenticate).toHaveBeenCalledWith(expect.any(Headers)); + }); + + test("builds URL from client.config.host", async () => { + await connector.upload(mockClient, "file.txt", "data"); + + const url = fetchSpy.mock.calls[0][0] as string; + expect(url).toMatch( + /^https:\/\/test\.databricks\.com\/api\/2\.0\/fs\/files/, + ); + }); + + test("throws on non-ok response", async () => { + fetchSpy.mockResolvedValue({ + ok: false, + status: 403, + text: () => Promise.resolve("Forbidden"), + }); + + await expect( + connector.upload(mockClient, "file.txt", "data"), + ).rejects.toThrow("Upload failed (403): Forbidden"); + }); + + test("resolves absolute paths directly", async () => { + await connector.upload(mockClient, "/Volumes/other/vol/file.txt", "data"); + + const url = fetchSpy.mock.calls[0][0] as string; + expect(url).toContain("/api/2.0/fs/files/Volumes/other/vol/file.txt"); + }); + }); + + describe("createDirectory()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("calls client.files.createDirectory with resolved path", async () => { + mockFilesApi.createDirectory.mockResolvedValue(undefined); + + await connector.createDirectory(mockClient, "new-dir"); + + expect(mockFilesApi.createDirectory).toHaveBeenCalledWith({ + directory_path: "/Volumes/catalog/schema/vol/new-dir", + }); + }); + + test("uses absolute path when provided", async () => { + mockFilesApi.createDirectory.mockResolvedValue(undefined); + + await connector.createDirectory( + mockClient, + "/Volumes/other/path/new-dir", + ); + + expect(mockFilesApi.createDirectory).toHaveBeenCalledWith({ + directory_path: "/Volumes/other/path/new-dir", + }); + }); + }); + + describe("delete()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("calls client.files.delete with resolved path", async () => { + mockFilesApi.delete.mockResolvedValue(undefined); + + await connector.delete(mockClient, "file.txt"); + + expect(mockFilesApi.delete).toHaveBeenCalledWith({ + file_path: "/Volumes/catalog/schema/vol/file.txt", + }); + }); + + test("uses absolute path when provided", async () => { + mockFilesApi.delete.mockResolvedValue(undefined); + + await connector.delete(mockClient, "/Volumes/other/file.txt"); + + expect(mockFilesApi.delete).toHaveBeenCalledWith({ + file_path: "/Volumes/other/file.txt", + }); + }); + }); + + describe("preview()", () => { + let connector: FilesConnector; + + beforeEach(() => { + vi.clearAllMocks(); + connector = new FilesConnector({ + defaultVolume: "/Volumes/catalog/schema/vol", + }); + }); + + test("text files return truncated preview (max 1024 chars)", async () => { + const longText = "A".repeat(2000); + + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 2000, + "content-type": "text/plain", + "last-modified": "2025-01-01", + }); + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString(longText), + }); + + const result = await connector.preview(mockClient, "/file.txt"); + + expect(result.isText).toBe(true); + expect(result.isImage).toBe(false); + expect(result.textPreview).not.toBeNull(); + expect(result.textPreview?.length).toBeLessThanOrEqual(1024); + }); + + test("text/html files are treated as text", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 30, + "content-type": "text/html", + "last-modified": "2025-01-01", + }); + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString("

Hello

"), + }); + + const result = await connector.preview(mockClient, "/page.html"); + + expect(result.isText).toBe(true); + expect(result.textPreview).toBe("

Hello

"); + }); + + test("application/json files are treated as text", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 20, + "content-type": "application/json", + "last-modified": "2025-01-01", + }); + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString('{"key":"value"}'), + }); + + const result = await connector.preview(mockClient, "/data.json"); + + expect(result.isText).toBe(true); + expect(result.textPreview).toBe('{"key":"value"}'); + }); + + test("application/xml files are treated as text", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 30, + "content-type": "application/xml", + "last-modified": "2025-01-01", + }); + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString(""), + }); + + const result = await connector.preview(mockClient, "/data.xml"); + + expect(result.isText).toBe(true); + expect(result.textPreview).toBe(""); + }); + + test("image files return isImage: true, textPreview: null", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 5000, + "content-type": "image/png", + "last-modified": "2025-01-01", + }); + + const result = await connector.preview(mockClient, "/image.png"); + + expect(result.isImage).toBe(true); + expect(result.isText).toBe(false); + expect(result.textPreview).toBeNull(); + }); + + test("other files return isText: false, isImage: false, textPreview: null", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 1000, + "content-type": "application/pdf", + "last-modified": "2025-01-01", + }); + + const result = await connector.preview(mockClient, "/doc.pdf"); + + expect(result.isText).toBe(false); + expect(result.isImage).toBe(false); + expect(result.textPreview).toBeNull(); + }); + + test("empty file contents return empty string preview", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 0, + "content-type": "text/plain", + "last-modified": "2025-01-01", + }); + mockFilesApi.download.mockResolvedValue({ + contents: null, + }); + + const result = await connector.preview(mockClient, "/empty.txt"); + + expect(result.isText).toBe(true); + expect(result.isImage).toBe(false); + expect(result.textPreview).toBe(""); + }); + + test("preview spreads metadata into result", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 42, + "content-type": "text/plain", + "last-modified": "2025-06-15T10:00:00Z", + }); + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString("hello"), + }); + + const result = await connector.preview(mockClient, "/notes.txt"); + + expect(result.contentLength).toBe(42); + expect(result.contentType).toBe("text/plain"); + expect(result.lastModified).toBe("2025-06-15T10:00:00Z"); + expect(result.textPreview).toBe("hello"); + }); + + test("short text file returns full content", async () => { + const content = "Short file."; + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": content.length, + "content-type": "text/plain", + "last-modified": "2025-01-01", + }); + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString(content), + }); + + const result = await connector.preview(mockClient, "/short.txt"); + + expect(result.textPreview).toBe(content); + }); + }); +}); diff --git a/packages/appkit/src/connectors/files/tests/defaults.test.ts b/packages/appkit/src/connectors/files/tests/defaults.test.ts new file mode 100644 index 00000000..578eb91c --- /dev/null +++ b/packages/appkit/src/connectors/files/tests/defaults.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, test } from "vitest"; +import { + contentTypeFromPath, + isSafeInlineContentType, + SAFE_INLINE_CONTENT_TYPES, +} from "../defaults"; + +describe("isSafeInlineContentType", () => { + const safeTypes = [ + "image/png", + "image/jpeg", + "image/gif", + "image/webp", + "image/bmp", + "image/vnd.microsoft.icon", + "text/plain", + "text/csv", + "text/markdown", + "application/json", + "application/pdf", + ]; + + for (const type of safeTypes) { + test(`returns true for safe type: ${type}`, () => { + expect(isSafeInlineContentType(type)).toBe(true); + }); + } + + const dangerousTypes = [ + "text/html", + "text/javascript", + "image/svg+xml", + "text/css", + "application/xml", + ]; + + for (const type of dangerousTypes) { + test(`returns false for dangerous type: ${type}`, () => { + expect(isSafeInlineContentType(type)).toBe(false); + }); + } + + test("returns false for unknown types", () => { + expect(isSafeInlineContentType("application/octet-stream")).toBe(false); + expect(isSafeInlineContentType("application/x-yaml")).toBe(false); + expect(isSafeInlineContentType("video/mp4")).toBe(false); + }); + + test("SAFE_INLINE_CONTENT_TYPES is frozen (ReadonlySet)", () => { + expect(SAFE_INLINE_CONTENT_TYPES.size).toBe(safeTypes.length); + for (const type of safeTypes) { + expect(SAFE_INLINE_CONTENT_TYPES.has(type)).toBe(true); + } + }); +}); + +describe("contentTypeFromPath", () => { + test("returns octet-stream for files without an extension", () => { + expect(contentTypeFromPath("Makefile")).toBe("application/octet-stream"); + expect(contentTypeFromPath("/path/to/Makefile")).toBe( + "application/octet-stream", + ); + }); + + test("falls back to reported type for files without an extension", () => { + expect(contentTypeFromPath("LICENSE", "text/plain")).toBe("text/plain"); + }); + + test("returns octet-stream for dotfiles without a real extension", () => { + expect(contentTypeFromPath(".gitignore")).toBe("application/octet-stream"); + expect(contentTypeFromPath(".env")).toBe("application/octet-stream"); + }); + + test("resolves dotfiles that have an extension", () => { + expect(contentTypeFromPath(".eslintrc.json")).toBe("application/json"); + expect(contentTypeFromPath(".config.yaml")).toBe("application/x-yaml"); + }); + + test("returns octet-stream for empty string", () => { + expect(contentTypeFromPath("")).toBe("application/octet-stream"); + }); +}); diff --git a/packages/appkit/src/connectors/index.ts b/packages/appkit/src/connectors/index.ts index fdb1cc69..19181323 100644 --- a/packages/appkit/src/connectors/index.ts +++ b/packages/appkit/src/connectors/index.ts @@ -1,3 +1,4 @@ +export * from "./files"; export * from "./lakebase"; export * from "./lakebase-v1"; export * from "./sql-warehouse"; diff --git a/packages/appkit/src/index.ts b/packages/appkit/src/index.ts index 8ba528ef..67a93cf7 100644 --- a/packages/appkit/src/index.ts +++ b/packages/appkit/src/index.ts @@ -46,7 +46,8 @@ export { } from "./errors"; // Plugin authoring export { Plugin, toPlugin } from "./plugin"; -export { analytics, server } from "./plugins"; +export { analytics, files, server } from "./plugins"; +export { contentTypeFromPath } from "./plugins/files/helpers"; // Registry types and utilities for plugin manifests export type { ConfigSchema, @@ -73,5 +74,6 @@ export { SpanStatusCode, type TelemetryConfig, } from "./telemetry"; + // Vite plugin and type generation export { appKitTypesPlugin } from "./type-generator/vite-plugin"; diff --git a/packages/appkit/src/plugin/plugin.ts b/packages/appkit/src/plugin/plugin.ts index 4f60f195..374e2f77 100644 --- a/packages/appkit/src/plugin/plugin.ts +++ b/packages/appkit/src/plugin/plugin.ts @@ -51,6 +51,7 @@ const EXCLUDED_FROM_PROXY = new Set([ "shutdown", "injectRoutes", "getEndpoints", + "getSkipBodyParsingPaths", "abortActiveOperations", // asUser itself - prevent chaining like .asUser().asUser() "asUser", @@ -157,6 +158,9 @@ export abstract class Plugin< /** Registered endpoints for this plugin */ private registeredEndpoints: PluginEndpointMap = {}; + /** Paths that opt out of JSON body parsing (e.g. file upload routes) */ + private skipBodyParsingPaths: Set = new Set(); + /** * Plugin initialization phase. * - 'core': Initialized first (e.g., config plugins) @@ -191,6 +195,10 @@ export abstract class Plugin< return this.registeredEndpoints; } + getSkipBodyParsingPaths(): ReadonlySet { + return this.skipBodyParsingPaths; + } + abortActiveOperations(): void { this.streamManager.abortAll(); } @@ -372,7 +380,12 @@ export abstract class Plugin< router[method](path, handler); - this.registerEndpoint(name, `/api/${this.name}${path}`); + const fullPath = `/api/${this.name}${path}`; + this.registerEndpoint(name, fullPath); + + if (config.skipBodyParsing) { + this.skipBodyParsingPaths.add(fullPath); + } } // build execution options by merging defaults, plugin config, and user overrides diff --git a/packages/appkit/src/plugin/tests/plugin.test.ts b/packages/appkit/src/plugin/tests/plugin.test.ts index 51f677a8..6c0732d8 100644 --- a/packages/appkit/src/plugin/tests/plugin.test.ts +++ b/packages/appkit/src/plugin/tests/plugin.test.ts @@ -552,6 +552,50 @@ describe("Plugin", () => { }); }); + describe("getSkipBodyParsingPaths", () => { + test("should return empty set by default", () => { + const plugin = new TestPlugin(config); + + expect(plugin.getSkipBodyParsingPaths().size).toBe(0); + }); + + test("should include paths from routes with skipBodyParsing: true", () => { + const plugin = new TestPlugin({ ...config, name: "test" }); + const mockRouter = { + post: vi.fn(), + } as any; + + (plugin as any).route(mockRouter, { + name: "upload", + method: "post", + path: "/upload", + skipBodyParsing: true, + handler: vi.fn(), + }); + + const paths = plugin.getSkipBodyParsingPaths(); + expect(paths.has("/api/test/upload")).toBe(true); + expect(paths.size).toBe(1); + }); + + test("should not include paths from routes without skipBodyParsing", () => { + const plugin = new TestPlugin({ ...config, name: "test" }); + const mockRouter = { + post: vi.fn(), + } as any; + + (plugin as any).route(mockRouter, { + name: "create", + method: "post", + path: "/create", + handler: vi.fn(), + }); + + const paths = plugin.getSkipBodyParsingPaths(); + expect(paths.size).toBe(0); + }); + }); + describe("static properties", () => { test("should have default phase of 'normal'", () => { expect(Plugin.phase).toBe("normal"); diff --git a/packages/appkit/src/plugins/files/defaults.ts b/packages/appkit/src/plugins/files/defaults.ts new file mode 100644 index 00000000..e0284808 --- /dev/null +++ b/packages/appkit/src/plugins/files/defaults.ts @@ -0,0 +1,59 @@ +import type { PluginExecuteConfig } from "shared"; + +/** + * Execution defaults for read-tier operations (list, read, exists, metadata, preview). + * Cache 60s + * Retry 3x with 1s backoff + * Timeout 30s + **/ +export const filesReadDefaults: PluginExecuteConfig = { + cache: { + enabled: true, + ttl: 60_000, + }, + retry: { + enabled: true, + initialDelay: 1000, + attempts: 3, + }, + timeout: 30_000, +}; + +/** + * Execution defaults for download-tier operations (download, raw). + * No cache + * Retry 3x with 1s backoff + * Timeout 30s (stream start only) + **/ +export const filesDownloadDefaults: PluginExecuteConfig = { + cache: { + enabled: false, + }, + retry: { + enabled: true, + initialDelay: 1000, + attempts: 3, + }, + /** + * @info this timeout is for the stream to start, not for the full download. + */ + timeout: 30_000, +}; + +/** + * Execution defaults for write-tier operations (upload, mkdir, delete). + * No cache + * No retry + * Timeout 600s. + **/ +export const filesWriteDefaults: PluginExecuteConfig = { + cache: { + enabled: false, + }, + retry: { + enabled: false, + }, + timeout: 600_000, +}; + +export { EXTENSION_CONTENT_TYPES } from "../../connectors/files/defaults"; diff --git a/packages/appkit/src/plugins/files/helpers.ts b/packages/appkit/src/plugins/files/helpers.ts new file mode 100644 index 00000000..e30115a8 --- /dev/null +++ b/packages/appkit/src/plugins/files/helpers.ts @@ -0,0 +1,31 @@ +export { + contentTypeFromPath, + isTextContentType, +} from "../../connectors/files/defaults"; + +/** + * Extract the parent directory from a file or directory path. + * + * Handles edge cases such as root-level files (`"/file.txt"` → `"/"`), + * paths without slashes (`"file.txt"` → `""`), and trailing slashes. + */ +export function parentDirectory(path: string): string { + const normalized = + path.length > 1 && path.endsWith("/") ? path.slice(0, -1) : path; + const lastSlash = normalized.lastIndexOf("/"); + + if (lastSlash > 0) return normalized.substring(0, lastSlash); + if (normalized.startsWith("/")) return "/"; + return ""; +} + +/** + * Sanitize a filename for use in a `Content-Disposition` HTTP header. + * + * Redundancy check – Unity Catalog is unlikely to allow filenames with + * quotes or control characters, but we sanitize defensively to prevent + * HTTP header injection if upstream constraints ever change. + */ +export function sanitizeFilename(raw: string): string { + return raw.replace(/["\\]/g, "\\$&").replace(/[\r\n]/g, ""); +} diff --git a/packages/appkit/src/plugins/files/index.ts b/packages/appkit/src/plugins/files/index.ts new file mode 100644 index 00000000..7da6ec4d --- /dev/null +++ b/packages/appkit/src/plugins/files/index.ts @@ -0,0 +1,4 @@ +export * from "./defaults"; +export * from "./manifest"; +export * from "./plugin"; +export * from "./types"; diff --git a/packages/appkit/src/plugins/files/manifest.json b/packages/appkit/src/plugins/files/manifest.json new file mode 100644 index 00000000..db1726e0 --- /dev/null +++ b/packages/appkit/src/plugins/files/manifest.json @@ -0,0 +1,36 @@ +{ + "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json", + "name": "files", + "displayName": "Files Plugin", + "description": "File operations against Databricks Volumes and Unity Catalog", + "resources": { + "required": [ + { + "type": "volume", + "alias": "volume", + "resourceKey": "volume", + "description": "Unity Catalog Volume for file storage", + "permission": "WRITE_VOLUME", + "fields": { + "path": { + "env": "DATABRICKS_DEFAULT_VOLUME", + "description": "Volume path (e.g. /Volumes/catalog/schema/volume_name)" + } + } + } + ], + "optional": [] + }, + "config": { + "schema": { + "type": "object", + "properties": { + "timeout": { + "type": "number", + "default": 30000, + "description": "File operation timeout in milliseconds" + } + } + } + } +} diff --git a/packages/appkit/src/plugins/files/manifest.ts b/packages/appkit/src/plugins/files/manifest.ts new file mode 100644 index 00000000..59ce59ce --- /dev/null +++ b/packages/appkit/src/plugins/files/manifest.ts @@ -0,0 +1,17 @@ +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import type { PluginManifest } from "../../registry"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +/** + * Files plugin manifest. + * + * @remarks + * The source of truth for this manifest is `manifest.json` in the same directory. + * This file loads the JSON and exports it with proper TypeScript typing. + */ +export const filesManifest: PluginManifest = JSON.parse( + readFileSync(join(__dirname, "manifest.json"), "utf-8"), +) as PluginManifest; diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts new file mode 100644 index 00000000..3460c2b0 --- /dev/null +++ b/packages/appkit/src/plugins/files/plugin.ts @@ -0,0 +1,633 @@ +import { Readable } from "node:stream"; +import type express from "express"; +import type { IAppRouter, PluginExecutionSettings } from "shared"; +import { + contentTypeFromPath, + FilesConnector, + isSafeInlineContentType, +} from "../../connectors/files"; +import { getCurrentUserId, getWorkspaceClient } from "../../context"; +import { createLogger } from "../../logging/logger"; +import { Plugin, toPlugin } from "../../plugin"; +import { + filesDownloadDefaults, + filesReadDefaults, + filesWriteDefaults, +} from "./defaults"; +import { parentDirectory, sanitizeFilename } from "./helpers"; +import { filesManifest } from "./manifest"; +import type { DownloadResponse, IFilesConfig } from "./types"; + +const logger = createLogger("files"); + +export class FilesPlugin extends Plugin { + name = "files"; + + /** Plugin manifest declaring metadata and resource requirements. */ + static manifest = filesManifest; + protected static description = "Files plugin for Databricks file operations"; + protected declare config: IFilesConfig; + + private filesConnector: FilesConnector; + + constructor(config: IFilesConfig) { + super(config); + this.config = config; + this.filesConnector = new FilesConnector({ + defaultVolume: config.defaultVolume, + timeout: config.timeout, + telemetry: config.telemetry, + customContentTypes: config.customContentTypes, + }); + } + + /** + * List entries in a directory. + * + * @param directoryPath - Absolute or relative path. Defaults to the configured `defaultVolume` root. + * @returns Array of directory entries. + */ + async list(directoryPath?: string) { + return this.filesConnector.list(getWorkspaceClient(), directoryPath); + } + + /** + * Read a file and return its contents as a string. + * + * @param filePath - Absolute or relative path to the file. + * @returns The file contents as a UTF-8 string. + */ + async read(filePath: string) { + return this.filesConnector.read(getWorkspaceClient(), filePath); + } + + /** + * Download a file as a readable stream. + * + * @param filePath - Absolute or relative path to the file. + * @returns A response containing a readable stream of the file contents. + */ + async download(filePath: string): Promise { + return this.filesConnector.download(getWorkspaceClient(), filePath); + } + + /** + * Check whether a file exists. + * + * @param filePath - Absolute or relative path to the file. + * @returns `true` if the file exists, `false` otherwise. + */ + async exists(filePath: string) { + return this.filesConnector.exists(getWorkspaceClient(), filePath); + } + + /** + * Retrieve metadata (size, content type, last modified) for a file. + * + * @param filePath - Absolute or relative path to the file. + * @returns File metadata including content length, type, and last modified date. + */ + async metadata(filePath: string) { + return this.filesConnector.metadata(getWorkspaceClient(), filePath); + } + + /** + * Upload a file to a Unity Catalog volume. + * + * @param filePath - Absolute or relative destination path. + * @param contents - File body as a readable stream, Buffer, or string. + * @param options - Upload options. + * @param options.overwrite - When `true`, overwrite an existing file at the same path. + */ + async upload( + filePath: string, + contents: ReadableStream | Buffer | string, + options?: { overwrite?: boolean }, + ) { + return this.filesConnector.upload( + getWorkspaceClient(), + filePath, + contents, + options, + ); + } + + /** + * Create a directory in a Unity Catalog volume. + * + * @param directoryPath - Absolute or relative path for the new directory. + */ + async createDirectory(directoryPath: string) { + return this.filesConnector.createDirectory( + getWorkspaceClient(), + directoryPath, + ); + } + + /** + * Delete a file or directory from a Unity Catalog volume. + * + * @param filePath - Absolute or relative path to the file or directory. + */ + async delete(filePath: string) { + return this.filesConnector.delete(getWorkspaceClient(), filePath); + } + + /** + * Get a preview of a file including metadata and a text excerpt. + * + * @param filePath - Absolute or relative path to the file. + * @returns Preview with metadata, text content hint, and format flags. + */ + async preview(filePath: string) { + return this.filesConnector.preview(getWorkspaceClient(), filePath); + } + + injectRoutes(router: IAppRouter) { + this.route(router, { + name: "root", + method: "get", + path: "/root", + handler: async (_req: express.Request, res: express.Response) => { + res.json({ root: this.config.defaultVolume ?? null }); + }, + }); + + this.route(router, { + name: "list", + method: "get", + path: "/list", + handler: async (req: express.Request, res: express.Response) => { + await this._handleList(req, res); + }, + }); + + this.route(router, { + name: "read", + method: "get", + path: "/read", + handler: async (req: express.Request, res: express.Response) => { + await this._handleRead(req, res); + }, + }); + + this.route(router, { + name: "download", + method: "get", + path: "/download", + handler: async (req: express.Request, res: express.Response) => { + await this._handleDownload(req, res); + }, + }); + + this.route(router, { + name: "raw", + method: "get", + path: "/raw", + handler: async (req: express.Request, res: express.Response) => { + await this._handleRaw(req, res); + }, + }); + + this.route(router, { + name: "exists", + method: "get", + path: "/exists", + handler: async (req: express.Request, res: express.Response) => { + await this._handleExists(req, res); + }, + }); + + this.route(router, { + name: "metadata", + method: "get", + path: "/metadata", + handler: async (req: express.Request, res: express.Response) => { + await this._handleMetadata(req, res); + }, + }); + + this.route(router, { + name: "preview", + method: "get", + path: "/preview", + handler: async (req: express.Request, res: express.Response) => { + await this._handlePreview(req, res); + }, + }); + + this.route(router, { + name: "upload", + method: "post", + path: "/upload", + skipBodyParsing: true, + handler: async (req: express.Request, res: express.Response) => { + await this._handleUpload(req, res); + }, + }); + + this.route(router, { + name: "mkdir", + method: "post", + path: "/mkdir", + handler: async (req: express.Request, res: express.Response) => { + await this._handleMkdir(req, res); + }, + }); + + this.route(router, { + name: "delete", + method: "post", + path: "/delete", + handler: async (req: express.Request, res: express.Response) => { + await this._handleDelete(req, res); + }, + }); + } + + private _readSettings( + cacheKey: (string | number | object)[], + ): PluginExecutionSettings { + return { + default: { + ...filesReadDefaults, + cache: { ...filesReadDefaults.cache, cacheKey }, + }, + }; + } + + private _resolvePath(path: string): string { + return this.filesConnector.resolvePath(path); + } + + /** + * Invalidate cached list entries for a directory after a write operation. + */ + private _invalidateListCache(directoryPath: string): void { + const userKey = getCurrentUserId(); + const listKey = this.cache.generateKey( + ["files:list", directoryPath], + userKey, + ); + this.cache.delete(listKey); + } + + private async _handleList( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string | undefined; + const executor = this.asUser(req); + + const result = await executor.execute( + async () => executor.list(path), + this._readSettings([ + "files:list", + path ? this._resolvePath(path) : "__root__", + ]), + ); + + if (result === undefined) { + res.status(500).json({ error: "List failed", plugin: this.name }); + return; + } + res.json(result); + } + + private async _handleRead( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const result = await executor.execute( + async () => executor.read(path), + this._readSettings(["files:read", this._resolvePath(path)]), + ); + + if (result === undefined) { + res.status(500).json({ error: "Read failed", plugin: this.name }); + return; + } + res.type("text/plain").send(result); + } + + private async _handleDownload( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const settings: PluginExecutionSettings = { + default: filesDownloadDefaults, + }; + const response = await executor.execute( + async () => executor.download(path), + settings, + ); + + if (response === undefined) { + res.status(500).json({ error: "Download failed", plugin: this.name }); + return; + } + + const fileName = sanitizeFilename(path.split("/").pop() ?? "download"); + res.setHeader("Content-Disposition", `attachment; filename="${fileName}"`); + res.setHeader( + "Content-Type", + contentTypeFromPath(path, undefined, this.config.customContentTypes), + ); + res.setHeader("X-Content-Type-Options", "nosniff"); + if (response.contents) { + const nodeStream = Readable.fromWeb( + response.contents as import("node:stream/web").ReadableStream, + ); + nodeStream.pipe(res); + } else { + res.end(); + } + } + + private async _handleRaw( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const settings: PluginExecutionSettings = { + default: filesDownloadDefaults, + }; + const response = await executor.execute( + async () => executor.download(path), + settings, + ); + + if (response === undefined) { + res.status(500).json({ error: "Raw fetch failed", plugin: this.name }); + return; + } + + const resolvedType = contentTypeFromPath( + path, + undefined, + this.config.customContentTypes, + ); + + res.setHeader("Content-Type", resolvedType); + res.setHeader("X-Content-Type-Options", "nosniff"); + res.setHeader("Content-Security-Policy", "sandbox"); + + if (!isSafeInlineContentType(resolvedType)) { + const fileName = sanitizeFilename(path.split("/").pop() ?? "download"); + res.setHeader( + "Content-Disposition", + `attachment; filename="${fileName}"`, + ); + } + + if (response.contents) { + const nodeStream = Readable.fromWeb( + response.contents as import("node:stream/web").ReadableStream, + ); + nodeStream.pipe(res); + } else { + res.end(); + } + } + + private async _handleExists( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const result = await executor.execute( + async () => executor.exists(path), + this._readSettings(["files:exists", this._resolvePath(path)]), + ); + + if (result === undefined) { + res.status(500).json({ error: "Exists check failed", plugin: this.name }); + return; + } + res.json({ exists: result }); + } + + private async _handleMetadata( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const result = await executor.execute( + async () => executor.metadata(path), + this._readSettings(["files:metadata", this._resolvePath(path)]), + ); + + if (result === undefined) { + res + .status(500) + .json({ error: "Metadata fetch failed", plugin: this.name }); + return; + } + res.json(result); + } + + private async _handlePreview( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const result = await executor.execute( + async () => executor.preview(path), + this._readSettings(["files:preview", this._resolvePath(path)]), + ); + + if (result === undefined) { + res.status(500).json({ error: "Preview failed", plugin: this.name }); + return; + } + res.json(result); + } + + private async _handleUpload( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.query.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + logger.debug(req, "Upload started: path=%s", path); + + const webStream: ReadableStream = Readable.toWeb(req); + + logger.debug( + req, + "Upload body received: path=%s, size=%d bytes", + path, + req.headers["content-length"] + ? parseInt(req.headers["content-length"], 10) + : 0, + ); + + const executor = this.asUser(req); + const settings: PluginExecutionSettings = { + default: filesWriteDefaults, + }; + const result = await executor.execute(async () => { + await executor.upload(path, webStream); + return { success: true as const }; + }, settings); + + if (result === undefined) { + logger.error( + req, + "Upload failed: path=%s, size=%d bytes", + path, + req.headers["content-length"] + ? parseInt(req.headers["content-length"], 10) + : 0, + ); + res.status(500).json({ error: "Upload failed", plugin: this.name }); + return; + } + + this._invalidateListCache(this._resolvePath(parentDirectory(path))); + + logger.debug(req, "Upload complete: path=%s", path); + res.json(result); + } + + private async _handleMkdir( + req: express.Request, + res: express.Response, + ): Promise { + const dirPath = req.body?.path as string; + if (!dirPath) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const settings: PluginExecutionSettings = { + default: filesWriteDefaults, + }; + const result = await executor.execute(async () => { + await executor.createDirectory(dirPath); + return { success: true as const }; + }, settings); + + if (result === undefined) { + res + .status(500) + .json({ error: "Create directory failed", plugin: this.name }); + return; + } + + this._invalidateListCache(this._resolvePath(parentDirectory(dirPath))); + + res.json(result); + } + + private async _handleDelete( + req: express.Request, + res: express.Response, + ): Promise { + const path = req.body?.path as string; + if (!path) { + res.status(400).json({ error: "path is required", plugin: this.name }); + return; + } + + const executor = this.asUser(req); + const settings: PluginExecutionSettings = { + default: filesWriteDefaults, + }; + const result = await executor.execute(async () => { + await executor.delete(path); + return { success: true as const }; + }, settings); + + if (result === undefined) { + res.status(500).json({ error: "Delete failed", plugin: this.name }); + return; + } + + this._invalidateListCache(this._resolvePath(parentDirectory(path))); + + res.json(result); + } + + async shutdown(): Promise { + this.streamManager.abortAll(); + } + + /** + * Returns the programmatic API for the Files plugin. + * Note: `asUser()` is automatically added by AppKit. + */ + exports() { + return { + /** List entries in a directory. */ + list: this.list, + /** Read a file as a string. */ + read: this.read, + /** Download a file as a readable stream. */ + download: this.download, + /** Check whether a file exists. */ + exists: this.exists, + /** Retrieve file metadata. */ + metadata: this.metadata, + /** Upload a file. */ + upload: this.upload, + /** Create a directory. */ + createDirectory: this.createDirectory, + /** Delete a file or directory. */ + delete: this.delete, + /** Get a file preview with text excerpt. */ + preview: this.preview, + }; + } +} + +/** + * @internal + */ +export const files = toPlugin( + FilesPlugin, + "files", +); diff --git a/packages/appkit/src/plugins/files/tests/helpers.test.ts b/packages/appkit/src/plugins/files/tests/helpers.test.ts new file mode 100644 index 00000000..48ae4218 --- /dev/null +++ b/packages/appkit/src/plugins/files/tests/helpers.test.ts @@ -0,0 +1,183 @@ +import { describe, expect, test } from "vitest"; +import { + contentTypeFromPath, + isTextContentType, + parentDirectory, + sanitizeFilename, +} from "../helpers"; + +describe("contentTypeFromPath", () => { + test("works without reported type", () => { + expect(contentTypeFromPath("/data.json")).toBe("application/json"); + }); + + test("returns application/octet-stream for unknown extensions with no reported type", () => { + expect(contentTypeFromPath("/file.xyz")).toBe("application/octet-stream"); + }); + + test("handles case-insensitive extensions", () => { + expect(contentTypeFromPath("/image.PNG")).toBe("image/png"); + expect(contentTypeFromPath("/data.Json")).toBe("application/json"); + }); + + test("uses extension when reported is undefined", () => { + expect(contentTypeFromPath("/style.css", undefined)).toBe("text/css"); + }); + + test("prefers extension type over reported type for known extensions", () => { + // Extension takes priority to prevent MIME type mismatch attacks + expect(contentTypeFromPath("/file.json", "text/html")).toBe( + "application/json", + ); + }); + + test("falls back to reported type for unknown extensions", () => { + expect(contentTypeFromPath("/file.xyz", "text/plain")).toBe("text/plain"); + }); + + test("handles paths with multiple dots", () => { + expect(contentTypeFromPath("/archive.tar.gz")).toBe("application/gzip"); + expect(contentTypeFromPath("/data.backup.json")).toBe("application/json"); + }); + + test("resolves .ico to IANA standard type", () => { + expect(contentTypeFromPath("/favicon.ico")).toBe( + "image/vnd.microsoft.icon", + ); + }); + + test("resolves Databricks-relevant file types", () => { + expect(contentTypeFromPath("/config.yaml")).toBe("application/x-yaml"); + expect(contentTypeFromPath("/config.yml")).toBe("application/x-yaml"); + expect(contentTypeFromPath("/query.sql")).toBe("application/sql"); + expect(contentTypeFromPath("/data.parquet")).toBe( + "application/vnd.apache.parquet", + ); + expect(contentTypeFromPath("/events.jsonl")).toBe("application/x-ndjson"); + expect(contentTypeFromPath("/notebook.ipynb")).toBe( + "application/x-ipynb+json", + ); + expect(contentTypeFromPath("/script.py")).toBe("text/x-python"); + expect(contentTypeFromPath("/archive.zip")).toBe("application/zip"); + expect(contentTypeFromPath("/data.gz")).toBe("application/gzip"); + expect(contentTypeFromPath("/app.ts")).toBe("text/typescript"); + }); + + test("custom types override defaults", () => { + const custom = { ".json": "text/json", ".custom": "application/custom" }; + expect(contentTypeFromPath("/data.json", undefined, custom)).toBe( + "text/json", + ); + expect(contentTypeFromPath("/file.custom", undefined, custom)).toBe( + "application/custom", + ); + }); + + test("falls back to defaults when custom types don't match", () => { + const custom = { ".custom": "application/custom" }; + expect(contentTypeFromPath("/data.json", undefined, custom)).toBe( + "application/json", + ); + }); + + test("custom types take priority over reported type", () => { + const custom = { ".xyz": "application/xyz" }; + expect(contentTypeFromPath("/file.xyz", "text/plain", custom)).toBe( + "application/xyz", + ); + }); +}); + +describe("isTextContentType", () => { + test("returns false for undefined", () => { + expect(isTextContentType(undefined)).toBe(false); + }); + + test("returns true for text/* types", () => { + expect(isTextContentType("text/plain")).toBe(true); + expect(isTextContentType("text/html")).toBe(true); + expect(isTextContentType("text/markdown")).toBe(true); + expect(isTextContentType("text/x-python")).toBe(true); + expect(isTextContentType("text/typescript")).toBe(true); + }); + + test("returns true for text-based application/ types", () => { + expect(isTextContentType("application/json")).toBe(true); + expect(isTextContentType("application/xml")).toBe(true); + expect(isTextContentType("application/sql")).toBe(true); + expect(isTextContentType("application/javascript")).toBe(true); + expect(isTextContentType("application/x-yaml")).toBe(true); + expect(isTextContentType("application/x-ndjson")).toBe(true); + expect(isTextContentType("application/x-ipynb+json")).toBe(true); + }); + + test("returns false for binary application/ types", () => { + expect(isTextContentType("application/pdf")).toBe(false); + expect(isTextContentType("application/zip")).toBe(false); + expect(isTextContentType("application/gzip")).toBe(false); + expect(isTextContentType("application/octet-stream")).toBe(false); + expect(isTextContentType("application/vnd.apache.parquet")).toBe(false); + }); + + test("returns false for image types", () => { + expect(isTextContentType("image/png")).toBe(false); + expect(isTextContentType("image/jpeg")).toBe(false); + }); +}); + +describe("parentDirectory", () => { + test("extracts parent from nested path", () => { + expect(parentDirectory("/Volumes/catalog/schema/vol/file.txt")).toBe( + "/Volumes/catalog/schema/vol", + ); + }); + + test("extracts parent from two-segment path", () => { + expect(parentDirectory("/dir/file.txt")).toBe("/dir"); + }); + + test("returns root for root-level file", () => { + expect(parentDirectory("/file.txt")).toBe("/"); + }); + + test("returns empty string for relative path without slash", () => { + expect(parentDirectory("file.txt")).toBe(""); + }); + + test("strips trailing slash before computing parent", () => { + expect(parentDirectory("/dir/subdir/")).toBe("/dir"); + }); + + test("handles root path with trailing slash", () => { + expect(parentDirectory("/")).toBe("/"); + }); + + test("handles relative nested path", () => { + expect(parentDirectory("subdir/file.txt")).toBe("subdir"); + }); +}); + +describe("sanitizeFilename", () => { + test("passes through clean filenames unchanged", () => { + expect(sanitizeFilename("report.pdf")).toBe("report.pdf"); + expect(sanitizeFilename("my-file_v2.txt")).toBe("my-file_v2.txt"); + }); + + test("escapes double quotes", () => { + expect(sanitizeFilename('file"name.txt')).toBe('file\\"name.txt'); + }); + + test("escapes backslashes", () => { + expect(sanitizeFilename("file\\name.txt")).toBe("file\\\\name.txt"); + }); + + test("strips carriage returns and newlines", () => { + expect(sanitizeFilename("file\r\nname.txt")).toBe("filename.txt"); + expect(sanitizeFilename("file\rname.txt")).toBe("filename.txt"); + expect(sanitizeFilename("file\nname.txt")).toBe("filename.txt"); + }); + + test("handles combined special characters", () => { + expect(sanitizeFilename('a"b\\c\r\nd.txt')).toBe('a\\"b\\\\cd.txt'); + }); +}); diff --git a/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts new file mode 100644 index 00000000..fb7ec39a --- /dev/null +++ b/packages/appkit/src/plugins/files/tests/plugin.integration.test.ts @@ -0,0 +1,447 @@ +import type { Server } from "node:http"; +import { mockServiceContext, setupDatabricksEnv } from "@tools/test-helpers"; +import { + afterAll, + beforeAll, + beforeEach, + describe, + expect, + test, + vi, +} from "vitest"; +import { ServiceContext } from "../../../context/service-context"; +import { createApp } from "../../../core"; +import { server as serverPlugin } from "../../server"; +import { files } from "../index"; +import { streamFromString } from "./utils"; + +const { mockFilesApi, mockSdkClient, MockApiError } = vi.hoisted(() => { + const mockFilesApi = { + listDirectoryContents: vi.fn(), + download: vi.fn(), + getMetadata: vi.fn(), + upload: vi.fn(), + createDirectory: vi.fn(), + delete: vi.fn(), + }; + + const mockSdkClient = { + files: mockFilesApi, + config: { + host: "https://test.databricks.com", + authenticate: vi.fn(), + }, + currentUser: { + me: vi.fn().mockResolvedValue({ id: "test-user" }), + }, + }; + + class MockApiError extends Error { + statusCode: number; + constructor(message: string, statusCode: number) { + super(message); + this.name = "ApiError"; + this.statusCode = statusCode; + } + } + + return { mockFilesApi, mockSdkClient, MockApiError }; +}); + +vi.mock("@databricks/sdk-experimental", async (importOriginal) => { + const actual = + await importOriginal(); + return { + ...actual, + ApiError: MockApiError, + }; +}); + +const authHeaders = { + "x-forwarded-access-token": "test-token", + "x-forwarded-user": "test-user", +}; + +describe("Files Plugin Integration", () => { + let server: Server; + let baseUrl: string; + let serviceContextMock: Awaited>; + const TEST_PORT = 9880; + + beforeAll(async () => { + setupDatabricksEnv({ + DATABRICKS_DEFAULT_VOLUME: "/Volumes/catalog/schema/vol", + }); + ServiceContext.reset(); + + serviceContextMock = await mockServiceContext({ + serviceDatabricksClient: mockSdkClient, + userDatabricksClient: mockSdkClient, + }); + + const appkit = await createApp({ + plugins: [ + serverPlugin({ + port: TEST_PORT, + host: "127.0.0.1", + autoStart: false, + }), + files({ defaultVolume: "/Volumes/catalog/schema/vol" }), + ], + }); + + await appkit.server.start(); + server = appkit.server.getServer(); + baseUrl = `http://127.0.0.1:${TEST_PORT}`; + }); + + afterAll(async () => { + delete process.env.DATABRICKS_DEFAULT_VOLUME; + serviceContextMock?.restore(); + if (server) { + await new Promise((resolve, reject) => { + server.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); + } + }); + + beforeEach(() => { + mockFilesApi.listDirectoryContents.mockReset(); + mockFilesApi.download.mockReset(); + mockFilesApi.getMetadata.mockReset(); + mockFilesApi.upload.mockReset(); + mockFilesApi.createDirectory.mockReset(); + mockFilesApi.delete.mockReset(); + }); + + describe("List Directory", () => { + test("GET /api/files/list returns directory entries", async () => { + const entries = [ + { + name: "file1.txt", + path: "/Volumes/catalog/schema/vol/file1.txt", + is_directory: false, + }, + { + name: "subdir", + path: "/Volumes/catalog/schema/vol/subdir", + is_directory: true, + }, + ]; + + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () { + for (const entry of entries) { + yield entry; + } + })(), + ); + + const response = await fetch(`${baseUrl}/api/files/list`, { + headers: authHeaders, + }); + + expect(response.status).toBe(200); + const data = await response.json(); + expect(data).toEqual(entries); + }); + + test("GET /api/files/list?path=/abs/path uses provided path", async () => { + mockFilesApi.listDirectoryContents.mockReturnValue( + (async function* () {})(), + ); + + const response = await fetch( + `${baseUrl}/api/files/list?path=/Volumes/other/path`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(200); + expect(mockFilesApi.listDirectoryContents).toHaveBeenCalledWith({ + directory_path: "/Volumes/other/path", + }); + }); + }); + + describe("Read File", () => { + test("GET /api/files/read?path=/file.txt returns text content", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString("file content here"), + }); + + const response = await fetch( + `${baseUrl}/api/files/read?path=/Volumes/catalog/schema/vol/file.txt`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(200); + const text = await response.text(); + expect(text).toBe("file content here"); + }); + + test("GET /api/files/read without path returns 400", async () => { + const response = await fetch(`${baseUrl}/api/files/read`, { + headers: authHeaders, + }); + + expect(response.status).toBe(400); + const data = await response.json(); + expect(data).toEqual({ error: "path is required", plugin: "files" }); + }); + }); + + describe("Exists", () => { + test("GET /api/files/exists returns { exists: true }", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 100, + "content-type": "text/plain", + "last-modified": "2025-01-01", + }); + + const response = await fetch( + `${baseUrl}/api/files/exists?path=/Volumes/catalog/schema/vol/file.txt`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(200); + const data = await response.json(); + expect(data).toEqual({ exists: true }); + }); + + test("GET /api/files/exists returns { exists: false } on 404", async () => { + mockFilesApi.getMetadata.mockRejectedValue( + new MockApiError("Not found", 404), + ); + + const response = await fetch( + `${baseUrl}/api/files/exists?path=/Volumes/missing.txt`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(200); + const data = await response.json(); + expect(data).toEqual({ exists: false }); + }); + }); + + describe("Metadata", () => { + test("GET /api/files/metadata returns correct metadata", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 256, + "content-type": "application/json", + "last-modified": "2025-06-15T10:00:00Z", + }); + + const response = await fetch( + `${baseUrl}/api/files/metadata?path=/Volumes/catalog/schema/vol/file.json`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(200); + const data = await response.json(); + expect(data).toEqual({ + contentLength: 256, + contentType: "application/json", + lastModified: "2025-06-15T10:00:00Z", + }); + }); + }); + + describe("Preview", () => { + test("GET /api/files/preview returns text preview", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 20, + "content-type": "text/plain", + "last-modified": "2025-01-01", + }); + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString("Hello preview!"), + }); + + const response = await fetch( + `${baseUrl}/api/files/preview?path=/Volumes/catalog/schema/vol/file.txt`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(200); + const data = (await response.json()) as { + isText: boolean; + isImage: boolean; + textPreview: string | null; + }; + expect(data.isText).toBe(true); + expect(data.isImage).toBe(false); + expect(data.textPreview).toBe("Hello preview!"); + }); + + test("GET /api/files/preview returns image metadata", async () => { + mockFilesApi.getMetadata.mockResolvedValue({ + "content-length": 5000, + "content-type": "image/png", + "last-modified": "2025-01-01", + }); + + const response = await fetch( + `${baseUrl}/api/files/preview?path=/Volumes/catalog/schema/vol/image.png`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(200); + const data = (await response.json()) as { + isText: boolean; + isImage: boolean; + textPreview: string | null; + }; + expect(data.isImage).toBe(true); + expect(data.isText).toBe(false); + expect(data.textPreview).toBeNull(); + }); + }); + + describe("Raw Endpoint Security Headers", () => { + test("safe type (image/png) sets security headers without Content-Disposition", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString("PNG data"), + }); + + const response = await fetch( + `${baseUrl}/api/files/raw?path=/Volumes/catalog/schema/vol/image.png`, + { headers: authHeaders, redirect: "manual" }, + ); + + expect(response.status).toBe(200); + expect(response.headers.get("content-type")).toBe("image/png"); + expect(response.headers.get("x-content-type-options")).toBe("nosniff"); + expect(response.headers.get("content-security-policy")).toBe("sandbox"); + expect(response.headers.get("content-disposition")).toBeNull(); + }); + + test("dangerous type (text/html) forces download via Content-Disposition", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString(""), + }); + + const response = await fetch( + `${baseUrl}/api/files/raw?path=/Volumes/catalog/schema/vol/malicious.html`, + { headers: authHeaders, redirect: "manual" }, + ); + + expect(response.status).toBe(200); + expect(response.headers.get("content-type")).toBe("text/html"); + expect(response.headers.get("x-content-type-options")).toBe("nosniff"); + expect(response.headers.get("content-security-policy")).toBe("sandbox"); + expect(response.headers.get("content-disposition")).toBe( + 'attachment; filename="malicious.html"', + ); + }); + + test("SVG (image/svg+xml) is treated as dangerous", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString(""), + }); + + const response = await fetch( + `${baseUrl}/api/files/raw?path=/Volumes/catalog/schema/vol/icon.svg`, + { headers: authHeaders, redirect: "manual" }, + ); + + expect(response.status).toBe(200); + expect(response.headers.get("content-type")).toBe("image/svg+xml"); + expect(response.headers.get("content-security-policy")).toBe("sandbox"); + expect(response.headers.get("content-disposition")).toBe( + 'attachment; filename="icon.svg"', + ); + }); + + test("JavaScript (text/javascript) is treated as dangerous", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString("alert('xss')"), + }); + + const response = await fetch( + `${baseUrl}/api/files/raw?path=/Volumes/catalog/schema/vol/script.js`, + { headers: authHeaders, redirect: "manual" }, + ); + + expect(response.status).toBe(200); + expect(response.headers.get("content-type")).toBe("text/javascript"); + expect(response.headers.get("content-security-policy")).toBe("sandbox"); + expect(response.headers.get("content-disposition")).toBe( + 'attachment; filename="script.js"', + ); + }); + + test("safe type (application/json) is served inline", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString('{"key":"value"}'), + }); + + const response = await fetch( + `${baseUrl}/api/files/raw?path=/Volumes/catalog/schema/vol/data.json`, + { headers: authHeaders, redirect: "manual" }, + ); + + expect(response.status).toBe(200); + expect(response.headers.get("content-type")).toBe("application/json"); + expect(response.headers.get("x-content-type-options")).toBe("nosniff"); + expect(response.headers.get("content-security-policy")).toBe("sandbox"); + expect(response.headers.get("content-disposition")).toBeNull(); + }); + }); + + describe("Download Endpoint Security Headers", () => { + test("sets X-Content-Type-Options: nosniff", async () => { + mockFilesApi.download.mockResolvedValue({ + contents: streamFromString("file data"), + }); + + const response = await fetch( + `${baseUrl}/api/files/download?path=/Volumes/catalog/schema/vol/file.txt`, + { headers: authHeaders, redirect: "manual" }, + ); + + expect(response.status).toBe(200); + expect(response.headers.get("x-content-type-options")).toBe("nosniff"); + expect(response.headers.get("content-disposition")).toBe( + 'attachment; filename="file.txt"', + ); + }); + }); + + describe("Error Handling", () => { + test("SDK exceptions return 500 with generic error", async () => { + mockFilesApi.getMetadata.mockRejectedValue( + new Error("SDK connection failed"), + ); + + const response = await fetch( + `${baseUrl}/api/files/metadata?path=/Volumes/catalog/schema/vol/file.txt`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(500); + const data = (await response.json()) as { error: string; plugin: string }; + expect(data.error).toBe("Metadata fetch failed"); + expect(data.plugin).toBe("files"); + }); + + test("list errors return 500", async () => { + mockFilesApi.listDirectoryContents.mockRejectedValue( + new Error("Permission denied"), + ); + + const response = await fetch( + `${baseUrl}/api/files/list?path=/Volumes/uncached/path`, + { headers: authHeaders }, + ); + + expect(response.status).toBe(500); + const data = (await response.json()) as { error: string; plugin: string }; + expect(data.error).toBe("List failed"); + expect(data.plugin).toBe("files"); + }); + }); +}); diff --git a/packages/appkit/src/plugins/files/tests/plugin.test.ts b/packages/appkit/src/plugins/files/tests/plugin.test.ts new file mode 100644 index 00000000..9ea52118 --- /dev/null +++ b/packages/appkit/src/plugins/files/tests/plugin.test.ts @@ -0,0 +1,134 @@ +import { mockServiceContext, setupDatabricksEnv } from "@tools/test-helpers"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { ServiceContext } from "../../../context/service-context"; +import { FilesPlugin, files } from "../plugin"; + +const { mockClient, MockApiError, mockCacheInstance } = vi.hoisted(() => { + const mockFilesApi = { + listDirectoryContents: vi.fn(), + download: vi.fn(), + getMetadata: vi.fn(), + upload: vi.fn(), + createDirectory: vi.fn(), + delete: vi.fn(), + }; + + const mockClient = { + files: mockFilesApi, + config: { + host: "https://test.databricks.com", + authenticate: vi.fn(), + }, + }; + + class MockApiError extends Error { + statusCode: number; + constructor(message: string, statusCode: number) { + super(message); + this.name = "ApiError"; + this.statusCode = statusCode; + } + } + + const mockCacheInstance = { + get: vi.fn(), + set: vi.fn(), + delete: vi.fn(), + getOrExecute: vi.fn(async (_key: unknown[], fn: () => Promise) => + fn(), + ), + generateKey: vi.fn(), + }; + + return { mockFilesApi, mockClient, MockApiError, mockCacheInstance }; +}); + +vi.mock("@databricks/sdk-experimental", () => ({ + WorkspaceClient: vi.fn(() => mockClient), + ApiError: MockApiError, +})); + +vi.mock("../../../context", () => ({ + getWorkspaceClient: vi.fn(() => mockClient), +})); + +vi.mock("../../../cache", () => ({ + CacheManager: { + getInstanceSync: vi.fn(() => mockCacheInstance), + }, +})); + +describe("FilesPlugin", () => { + let serviceContextMock: Awaited>; + + beforeEach(async () => { + vi.clearAllMocks(); + setupDatabricksEnv(); + ServiceContext.reset(); + serviceContextMock = await mockServiceContext(); + }); + + afterEach(() => { + serviceContextMock?.restore(); + }); + + test('plugin name is "files"', () => { + const pluginData = files({ defaultVolume: "/Volumes/test" }); + expect(pluginData.name).toBe("files"); + }); + + test("plugin instance has correct name", () => { + const plugin = new FilesPlugin({ defaultVolume: "/Volumes/test" }); + expect(plugin.name).toBe("files"); + }); + + test("exports() returns all expected methods", () => { + const plugin = new FilesPlugin({ defaultVolume: "/Volumes/test" }); + const exported = plugin.exports(); + + expect(exported).toHaveProperty("list"); + expect(exported).toHaveProperty("read"); + expect(exported).toHaveProperty("download"); + expect(exported).toHaveProperty("exists"); + expect(exported).toHaveProperty("metadata"); + expect(exported).toHaveProperty("upload"); + expect(exported).toHaveProperty("createDirectory"); + expect(exported).toHaveProperty("delete"); + expect(exported).toHaveProperty("preview"); + + for (const value of Object.values(exported)) { + expect(typeof value).toBe("function"); + } + }); + + test("injectRoutes registers GET and POST routes", () => { + const plugin = new FilesPlugin({ defaultVolume: "/Volumes/test" }); + const mockRouter = { + get: vi.fn(), + post: vi.fn(), + put: vi.fn(), + delete: vi.fn(), + patch: vi.fn(), + } as any; + + plugin.injectRoutes(mockRouter); + + // 8 GET routes + // root, list, read, download, raw, exists, metadata, preview + expect(mockRouter.get).toHaveBeenCalledTimes(8); + // 3 POST routes: + // upload, mkdir, delete + expect(mockRouter.post).toHaveBeenCalledTimes(3); + expect(mockRouter.put).not.toHaveBeenCalled(); + expect(mockRouter.patch).not.toHaveBeenCalled(); + }); + + test("shutdown() calls streamManager.abortAll()", async () => { + const plugin = new FilesPlugin({ defaultVolume: "/Volumes/test" }); + const abortAllSpy = vi.spyOn((plugin as any).streamManager, "abortAll"); + + await plugin.shutdown(); + + expect(abortAllSpy).toHaveBeenCalled(); + }); +}); diff --git a/packages/appkit/src/plugins/files/tests/utils.ts b/packages/appkit/src/plugins/files/tests/utils.ts new file mode 100644 index 00000000..d204d8da --- /dev/null +++ b/packages/appkit/src/plugins/files/tests/utils.ts @@ -0,0 +1,22 @@ +export function streamFromString(text: string): ReadableStream { + const encoder = new TextEncoder(); + return new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(text)); + controller.close(); + }, + }); +} + +// Creates a ReadableStream that yields multiple chunks +export function streamFromChunks(chunks: string[]): ReadableStream { + const encoder = new TextEncoder(); + return new ReadableStream({ + start(controller) { + for (const chunk of chunks) { + controller.enqueue(encoder.encode(chunk)); + } + controller.close(); + }, + }); +} diff --git a/packages/appkit/src/plugins/files/types.ts b/packages/appkit/src/plugins/files/types.ts new file mode 100644 index 00000000..4a0baa75 --- /dev/null +++ b/packages/appkit/src/plugins/files/types.ts @@ -0,0 +1,44 @@ +import type { files } from "@databricks/sdk-experimental"; +import type { BasePluginConfig } from "shared"; + +/** + * Configuration for the Files plugin. + */ +export interface IFilesConfig extends BasePluginConfig { + /** Operation timeout in milliseconds. Overrides the per-tier defaults. */ + timeout?: number; + /** Absolute volume path used to resolve relative file paths (e.g. `"/Volumes/catalog/schema/vol"`). */ + defaultVolume?: string; + /** Map of file extensions to MIME types that takes priority over the built-in extension map. */ + customContentTypes?: Record; +} + +/** A single entry returned when listing a directory. Re-exported from `@databricks/sdk-experimental`. */ +export type DirectoryEntry = files.DirectoryEntry; + +/** Response object for file downloads containing a readable stream. Re-exported from `@databricks/sdk-experimental`. */ +export type DownloadResponse = files.DownloadResponse; + +/** + * Metadata for a file stored in a Unity Catalog volume. + */ +export interface FileMetadata { + /** File size in bytes. */ + contentLength: number | undefined; + /** MIME content type of the file. */ + contentType: string | undefined; + /** ISO 8601 timestamp of the last modification. */ + lastModified: string | undefined; +} + +/** + * Preview information for a file, extending {@link FileMetadata} with content hints. + */ +export interface FilePreview extends FileMetadata { + /** First portion of text content, or `null` for non-text files. */ + textPreview: string | null; + /** Whether the file is detected as a text format. */ + isText: boolean; + /** Whether the file is detected as an image format. */ + isImage: boolean; +} diff --git a/packages/appkit/src/plugins/index.ts b/packages/appkit/src/plugins/index.ts index aba6f26b..ccc652ff 100644 --- a/packages/appkit/src/plugins/index.ts +++ b/packages/appkit/src/plugins/index.ts @@ -1,2 +1,3 @@ export * from "./analytics"; +export * from "./files"; export * from "./server"; diff --git a/packages/appkit/src/plugins/server/index.ts b/packages/appkit/src/plugins/server/index.ts index 40cf01e0..a2fb3354 100644 --- a/packages/appkit/src/plugins/server/index.ts +++ b/packages/appkit/src/plugins/server/index.ts @@ -50,6 +50,7 @@ export class ServerPlugin extends Plugin { private remoteTunnelController?: RemoteTunnelController; protected declare config: ServerConfig; private serverExtensions: ((app: express.Application) => void)[] = []; + private rawBodyPaths: Set = new Set(); static phase: PluginPhase = "deferred"; constructor(config: ServerConfig) { @@ -92,7 +93,20 @@ export class ServerPlugin extends Plugin { * @returns The express application. */ async start(): Promise { - this.serverApplication.use(express.json()); + this.serverApplication.use( + express.json({ + type: (req) => { + // Skip JSON parsing for routes that declared skipBodyParsing + // (e.g. file uploads where the raw body must flow through). + // rawBodyPaths is populated by extendRoutes() below; the type + // callback runs per-request so the set is already filled. + const urlPath = req.url?.split("?")[0]; + if (urlPath && this.rawBodyPaths.has(urlPath)) return false; + const ct = req.headers["content-type"] ?? ""; + return ct.includes("json"); + }, + }), + ); const endpoints = await this.extendRoutes(); @@ -194,6 +208,16 @@ export class ServerPlugin extends Plugin { // Collect named endpoints from the plugin endpoints[plugin.name] = plugin.getEndpoints(); + + // Collect paths that should skip body parsing + if ( + plugin.getSkipBodyParsingPaths && + typeof plugin.getSkipBodyParsingPaths === "function" + ) { + for (const p of plugin.getSkipBodyParsingPaths()) { + this.rawBodyPaths.add(p); + } + } } } diff --git a/packages/appkit/src/plugins/server/tests/server.test.ts b/packages/appkit/src/plugins/server/tests/server.test.ts index 31305fc7..099f2b13 100644 --- a/packages/appkit/src/plugins/server/tests/server.test.ts +++ b/packages/appkit/src/plugins/server/tests/server.test.ts @@ -284,6 +284,52 @@ describe("ServerPlugin", () => { ); }); + test("should skip body parsing for paths declared by plugins", async () => { + process.env.NODE_ENV = "production"; + + const plugins: any = { + files: { + name: "files", + injectRoutes: vi.fn(), + getEndpoints: vi.fn().mockReturnValue({}), + getSkipBodyParsingPaths: vi + .fn() + .mockReturnValue(new Set(["/api/files/upload"])), + }, + }; + + const plugin = new ServerPlugin({ autoStart: false, plugins }); + await plugin.start(); + + // Get the type function passed to express.json + const jsonCall = vi.mocked(express.json).mock.calls[0][0] as any; + const typeFn = jsonCall.type; + + // Should skip body parsing for the declared path + expect(typeFn({ url: "/api/files/upload", headers: {} })).toBe(false); + + // Should skip body parsing for declared path with query string + expect(typeFn({ url: "/api/files/upload?path=foo", headers: {} })).toBe( + false, + ); + + // Should NOT skip body parsing for other routes (no hardcoded /upload check) + expect( + typeFn({ + url: "/api/other/upload", + headers: { "content-type": "application/json" }, + }), + ).toBe(true); + + // Should still parse JSON for normal routes + expect( + typeFn({ + url: "/api/analytics/query", + headers: { "content-type": "application/json" }, + }), + ).toBe(true); + }); + test("extendRoutes registers plugin routes correctly", async () => { process.env.NODE_ENV = "production"; diff --git a/packages/appkit/src/stream/stream-manager.ts b/packages/appkit/src/stream/stream-manager.ts index 41764772..429efe73 100644 --- a/packages/appkit/src/stream/stream-manager.ts +++ b/packages/appkit/src/stream/stream-manager.ts @@ -1,6 +1,7 @@ import { randomUUID } from "node:crypto"; import { context } from "@opentelemetry/api"; import type { IAppResponse, StreamConfig } from "shared"; +import { createLogger } from "../logging/logger"; import { EventRingBuffer } from "./buffers"; import { streamDefaults } from "./defaults"; import { SSEWriter } from "./sse-writer"; diff --git a/packages/appkit/tsdown.config.ts b/packages/appkit/tsdown.config.ts index 2472c084..dd1fad3c 100644 --- a/packages/appkit/tsdown.config.ts +++ b/packages/appkit/tsdown.config.ts @@ -42,6 +42,10 @@ export default defineConfig([ from: "src/plugins/analytics/manifest.json", to: "dist/plugins/analytics/manifest.json", }, + { + from: "src/plugins/files/manifest.json", + to: "dist/plugins/files/manifest.json", + }, { from: "src/plugins/server/manifest.json", to: "dist/plugins/server/manifest.json", diff --git a/packages/shared/src/plugin.ts b/packages/shared/src/plugin.ts index 54d8f583..6de7b06d 100644 --- a/packages/shared/src/plugin.ts +++ b/packages/shared/src/plugin.ts @@ -13,6 +13,8 @@ export interface BasePlugin { getEndpoints(): PluginEndpointMap; + getSkipBodyParsingPaths?(): ReadonlySet; + exports?(): unknown; } @@ -203,6 +205,8 @@ export type RouteConfig = { method: HttpMethod; path: string; handler: (req: IAppRequest, res: IAppResponse) => Promise; + /** When true, the server will skip JSON body parsing for this route (e.g. file uploads). */ + skipBodyParsing?: boolean; }; /** Map of endpoint names to their full paths for a plugin */