diff --git a/__fixtures__/generated/generated.json b/__fixtures__/generated/generated.json index a2a04ff9..22d5b189 100644 --- a/__fixtures__/generated/generated.json +++ b/__fixtures__/generated/generated.json @@ -60,6 +60,15 @@ "pretty/quoting-5.sql": "CREATE FUNCTION faker.boolean() RETURNS boolean AS $EOFCODE$\nBEGIN\n RETURN random() < 0.5;\nEND;\n$EOFCODE$ LANGUAGE plpgsql", "pretty/quoting-6.sql": "CREATE FUNCTION faker.\"boolean\"() RETURNS boolean AS $EOFCODE$\nBEGIN\n RETURN random() < 0.5;\nEND;\n$EOFCODE$ LANGUAGE plpgsql", "pretty/quoting-7.sql": "CREATE DOMAIN origin AS text CHECK (value = pg_catalog.\"substring\"(value, '^(https?://[^/]*)'))", + "pretty/quoting-8.sql": "SELECT '{\"a\":1}'::json", + "pretty/quoting-9.sql": "SELECT '{\"b\":2}'::jsonb", + "pretty/quoting-10.sql": "SELECT true::boolean", + "pretty/quoting-11.sql": "SELECT '1 day'::interval", + "pretty/quoting-12.sql": "SELECT 42::int", + "pretty/quoting-13.sql": "INSERT INTO test_table (data) VALUES ('{\"c\":3}'::json)", + "pretty/quoting-14.sql": "SELECT '{\"d\":4}'::myschema.json", + "pretty/quoting-15.sql": "SELECT 100::custom.int", + "pretty/quoting-16.sql": "SELECT true::myapp.boolean", "pretty/procedures-1.sql": "SELECT handle_insert('TYPE_A')", "pretty/procedures-2.sql": "SELECT \"HandleInsert\"('TYPE_A', 'Region-1')", "pretty/procedures-3.sql": "SELECT compute_score(42, TRUE)", diff --git a/__fixtures__/kitchen-sink/pretty/quoting.sql b/__fixtures__/kitchen-sink/pretty/quoting.sql index 869e5fb7..429aec54 100644 --- a/__fixtures__/kitchen-sink/pretty/quoting.sql +++ b/__fixtures__/kitchen-sink/pretty/quoting.sql @@ -45,3 +45,33 @@ $EOFCODE$ LANGUAGE plpgsql; -- Note: SUBSTRING(value FROM 'pattern') SQL syntax gets deparsed to pg_catalog."substring"(value, 'pattern') -- The SQL syntax form cannot be tested here due to AST round-trip differences (COERCE_SQL_SYNTAX vs COERCE_EXPLICIT_CALL) CREATE DOMAIN origin AS text CHECK (value = pg_catalog."substring"(value, '^(https?://[^/]*)')); + +-- 8. Type name quoting: json type should NOT be quoted (COL_NAME_KEYWORD in type position) +-- Type names follow a less strict quoting policy than standalone identifiers +SELECT '{"a":1}'::json; + +-- 9. Type name quoting: jsonb type should NOT be quoted +SELECT '{"b":2}'::jsonb; + +-- 10. Type name quoting: boolean type should NOT be quoted (TYPE_FUNC_NAME_KEYWORD in type position) +SELECT true::boolean; + +-- 11. Type name quoting: interval type should NOT be quoted (TYPE_FUNC_NAME_KEYWORD in type position) +SELECT '1 day'::interval; + +-- 12. Type name quoting: int type should NOT be quoted (COL_NAME_KEYWORD in type position) +SELECT 42::int; + +-- 13. Type cast in INSERT VALUES - json type should NOT be quoted +INSERT INTO test_table (data) VALUES ('{"c":3}'::json); + +-- 14. User-defined schema-qualified type with keyword name - should NOT quote the type name +-- This tests the bug where non-pg_catalog types use quoteIdentifier() for ALL parts +-- The type name 'json' is a COL_NAME_KEYWORD and should NOT be quoted in type position +SELECT '{"d":4}'::myschema.json; + +-- 15. User-defined schema-qualified type with keyword name 'int' - should NOT quote +SELECT 100::custom.int; + +-- 16. User-defined schema-qualified type with keyword name 'boolean' - should NOT quote +SELECT true::myapp.boolean; diff --git a/packages/deparser/QUOTING-RULES.md b/packages/deparser/QUOTING-RULES.md index ec8208f3..32892a36 100644 --- a/packages/deparser/QUOTING-RULES.md +++ b/packages/deparser/QUOTING-RULES.md @@ -203,6 +203,99 @@ The only difference is that `quoteIdentifierAfterDot()` does not check for keywo | `interval` | `"interval"` | `interval` | | `my-col` | `"my-col"` | `"my-col"` | +## Type-Name Quoting Policy + +Type names in PostgreSQL have their own quoting policy that is less strict than standalone identifiers but different from after-dot identifiers. + +### The Problem + +When you have a user-defined schema-qualified type like `myschema.json`, the type name `json` is a `COL_NAME_KEYWORD`. Using strict quoting (`quoteIdentifier()`) would produce `myschema."json"`, which is unnecessarily verbose. + +However, we cannot use the fully relaxed after-dot policy (`quoteIdentifierAfterDot()`) because type names are not in the same permissive grammar slot as identifiers after a dot in qualified names. + +### The Type-Name Quoting Algorithm + +The `QuoteUtils.quoteIdentifierTypeName()` function implements a middle-ground policy: + +- **Quote for lexical reasons**: uppercase, special characters, leading digits, embedded quotes +- **Quote only RESERVED_KEYWORD**: keywords like `select`, `from`, `where` must be quoted +- **Allow COL_NAME_KEYWORD unquoted**: keywords like `json`, `int`, `boolean` are allowed +- **Allow TYPE_FUNC_NAME_KEYWORD unquoted**: keywords like `interval`, `left`, `right` are allowed + +``` +function quoteIdentifierTypeName(ident): + if ident is empty: + return ident + + safe = true + + // Rule 1: First character must be lowercase letter or underscore + if first_char not in [a-z_]: + safe = false + + // Rule 2: All characters must be in safe set + for each char in ident: + if char not in [a-z0-9_]: + safe = false + + // Rule 3: Only quote RESERVED_KEYWORD (not COL_NAME_KEYWORD or TYPE_FUNC_NAME_KEYWORD) + if safe: + kwKind = keywordKindOf(ident) + if kwKind == RESERVED_KEYWORD: + safe = false + + if safe: + return ident // No quoting needed + + // Build quoted identifier with escaped embedded quotes + result = '"' + for each char in ident: + if char == '"': + result += '"' + result += char + result += '"' + + return result +``` + +### Comparison of Quoting Policies + +| Input | quoteIdentifier() | quoteIdentifierAfterDot() | quoteIdentifierTypeName() | +|-------|-------------------|---------------------------|---------------------------| +| `mytable` | `mytable` | `mytable` | `mytable` | +| `MyTable` | `"MyTable"` | `"MyTable"` | `"MyTable"` | +| `json` | `"json"` | `json` | `json` | +| `int` | `"int"` | `int` | `int` | +| `boolean` | `"boolean"` | `boolean` | `boolean` | +| `interval` | `"interval"` | `interval` | `interval` | +| `select` | `"select"` | `select` | `"select"` | +| `from` | `"from"` | `from` | `"from"` | + +### quoteTypeDottedName(parts: string[]) + +For schema-qualified type names, use `quoteTypeDottedName()` which applies type-name quoting to all parts: + +```typescript +static quoteTypeDottedName(parts: string[]): string { + if (!parts || parts.length === 0) return ''; + return parts.map(part => QuoteUtils.quoteIdentifierTypeName(part)).join('.'); +} +``` + +### Examples + +| Input Parts | Output | +|-------------|--------| +| `['json']` | `json` | +| `['myschema', 'json']` | `myschema.json` | +| `['custom', 'int']` | `custom.int` | +| `['myapp', 'boolean']` | `myapp.boolean` | +| `['myschema', 'select']` | `myschema."select"` | + +### When to Use Type-Name Quoting + +Use `quoteTypeDottedName()` in the `TypeName` handler for non-pg_catalog types. This ensures that user-defined types with keyword names are emitted with minimal quoting. + ## Composition Helpers ### quoteDottedName(parts: string[]) @@ -359,6 +452,8 @@ When updating to support new PostgreSQL versions, ensure `kwlist.ts` is synchron | Dotted name (multi-part) | `quoteDottedName()` | `schema.table`, `schema.function` | | Two-part qualified name | `quoteQualifiedIdentifier()` | `schema.table` | | After-dot component only | `quoteIdentifierAfterDot()` | Indirection field access | +| Type name (single or multi-part) | `quoteTypeDottedName()` | `myschema.json`, `custom.int` | +| Type name component only | `quoteIdentifierTypeName()` | Type name part | | String literal | `escape()` or `formatEString()` | String values in SQL | ## Test Fixtures @@ -366,6 +461,8 @@ When updating to support new PostgreSQL versions, ensure `kwlist.ts` is synchron The quoting behavior is verified by test fixtures in `__fixtures__/kitchen-sink/pretty/`: - `quoting-1.sql` through `quoting-7.sql`: Test cases for `faker.float`, `faker.interval`, `faker.boolean`, and `pg_catalog.substring` +- `quoting-8.sql` through `quoting-13.sql`: Test cases for type casts with `json`, `jsonb`, `boolean`, `interval`, `int` +- `quoting-14.sql` through `quoting-16.sql`: Test cases for user-defined schema-qualified types with keyword names (`myschema.json`, `custom.int`, `myapp.boolean`) The corresponding snapshots in `__tests__/pretty/__snapshots__/quoting-pretty.test.ts.snap` demonstrate the expected output with minimal quoting. diff --git a/packages/deparser/__tests__/pretty/__snapshots__/quoting-pretty.test.ts.snap b/packages/deparser/__tests__/pretty/__snapshots__/quoting-pretty.test.ts.snap index 5eee750a..f46113e1 100644 --- a/packages/deparser/__tests__/pretty/__snapshots__/quoting-pretty.test.ts.snap +++ b/packages/deparser/__tests__/pretty/__snapshots__/quoting-pretty.test.ts.snap @@ -50,6 +50,24 @@ $$ LANGUAGE plpgsql" exports[`non-pretty: pretty/quoting-7.sql 1`] = `"CREATE DOMAIN origin AS text CHECK (value = pg_catalog.substring(value, '^(https?://[^/]*)'))"`; +exports[`non-pretty: pretty/quoting-8.sql 1`] = `"SELECT '{"a":1}'::json"`; + +exports[`non-pretty: pretty/quoting-9.sql 1`] = `"SELECT '{"b":2}'::jsonb"`; + +exports[`non-pretty: pretty/quoting-10.sql 1`] = `"SELECT CAST(true AS boolean)"`; + +exports[`non-pretty: pretty/quoting-11.sql 1`] = `"SELECT '1 day'::interval"`; + +exports[`non-pretty: pretty/quoting-12.sql 1`] = `"SELECT 42::int"`; + +exports[`non-pretty: pretty/quoting-13.sql 1`] = `"INSERT INTO test_table (data) VALUES ('{"c":3}'::json)"`; + +exports[`non-pretty: pretty/quoting-14.sql 1`] = `"SELECT CAST('{"d":4}' AS myschema.json)"`; + +exports[`non-pretty: pretty/quoting-15.sql 1`] = `"SELECT CAST(100 AS custom.int)"`; + +exports[`non-pretty: pretty/quoting-16.sql 1`] = `"SELECT CAST(true AS myapp.boolean)"`; + exports[`pretty: pretty/quoting-1.sql 1`] = ` "CREATE FUNCTION faker.float(min double precision DEFAULT 0, max double precision DEFAULT 100) RETURNS double precision AS $$ BEGIN @@ -102,3 +120,26 @@ exports[`pretty: pretty/quoting-7.sql 1`] = ` "CREATE DOMAIN origin AS text CHECK (value = pg_catalog.substring(value, '^(https?://[^/]*)'))" `; + +exports[`pretty: pretty/quoting-8.sql 1`] = `"SELECT '{"a":1}'::json"`; + +exports[`pretty: pretty/quoting-9.sql 1`] = `"SELECT '{"b":2}'::jsonb"`; + +exports[`pretty: pretty/quoting-10.sql 1`] = `"SELECT CAST(true AS boolean)"`; + +exports[`pretty: pretty/quoting-11.sql 1`] = `"SELECT '1 day'::interval"`; + +exports[`pretty: pretty/quoting-12.sql 1`] = `"SELECT 42::int"`; + +exports[`pretty: pretty/quoting-13.sql 1`] = ` +"INSERT INTO test_table ( + data +) VALUES + ('{"c":3}'::json)" +`; + +exports[`pretty: pretty/quoting-14.sql 1`] = `"SELECT CAST('{"d":4}' AS myschema.json)"`; + +exports[`pretty: pretty/quoting-15.sql 1`] = `"SELECT CAST(100 AS custom.int)"`; + +exports[`pretty: pretty/quoting-16.sql 1`] = `"SELECT CAST(true AS myapp.boolean)"`; diff --git a/packages/deparser/__tests__/pretty/quoting-pretty.test.ts b/packages/deparser/__tests__/pretty/quoting-pretty.test.ts index 1d663374..aa88c812 100644 --- a/packages/deparser/__tests__/pretty/quoting-pretty.test.ts +++ b/packages/deparser/__tests__/pretty/quoting-pretty.test.ts @@ -7,6 +7,15 @@ const prettyTest = new PrettyTest([ 'pretty/quoting-5.sql', 'pretty/quoting-6.sql', 'pretty/quoting-7.sql', + 'pretty/quoting-8.sql', + 'pretty/quoting-9.sql', + 'pretty/quoting-10.sql', + 'pretty/quoting-11.sql', + 'pretty/quoting-12.sql', + 'pretty/quoting-13.sql', + 'pretty/quoting-14.sql', + 'pretty/quoting-15.sql', + 'pretty/quoting-16.sql', ]); prettyTest.generateTests(); diff --git a/packages/deparser/src/deparser.ts b/packages/deparser/src/deparser.ts index 9cdf9db3..66302a43 100644 --- a/packages/deparser/src/deparser.ts +++ b/packages/deparser/src/deparser.ts @@ -1972,8 +1972,9 @@ export class Deparser implements DeparserVisitor { } } - const quotedNames = names.map((name: string) => QuoteUtils.quoteIdentifier(name)); - let result = mods(quotedNames.join('.'), args); + // Use type-name quoting for non-pg_catalog types + // This allows keywords like 'json', 'int', 'boolean' to remain unquoted in type positions + let result = mods(QuoteUtils.quoteTypeDottedName(names), args); if (node.arrayBounds && node.arrayBounds.length > 0) { result += formatArrayBounds(node.arrayBounds); diff --git a/packages/deparser/src/utils/quote-utils.ts b/packages/deparser/src/utils/quote-utils.ts index 853572a9..5e839d49 100644 --- a/packages/deparser/src/utils/quote-utils.ts +++ b/packages/deparser/src/utils/quote-utils.ts @@ -185,5 +185,80 @@ export class QuoteUtils { } return QuoteUtils.quoteIdentifier(ident); } + + /** + * Quote an identifier that appears as a type name. + * + * Type names in PostgreSQL have a less strict quoting policy than standalone identifiers. + * In type positions, COL_NAME_KEYWORD and TYPE_FUNC_NAME_KEYWORD are allowed unquoted + * (e.g., 'json', 'int', 'boolean', 'interval'). Only RESERVED_KEYWORD must be quoted. + * + * This is different from: + * - quoteIdentifier(): quotes all keywords except UNRESERVED_KEYWORD + * - quoteIdentifierAfterDot(): only quotes for lexical reasons (no keyword checking) + * + * Type names still need quoting for lexical reasons (uppercase, special chars, etc.). + */ + static quoteIdentifierTypeName(ident: string): string { + if (!ident) return ident; + + let safe = true; + + // Check first character: must be lowercase letter or underscore + const firstChar = ident[0]; + if (!((firstChar >= 'a' && firstChar <= 'z') || firstChar === '_')) { + safe = false; + } + + // Check all characters + for (let i = 0; i < ident.length; i++) { + const ch = ident[i]; + if ((ch >= 'a' && ch <= 'z') || + (ch >= '0' && ch <= '9') || + (ch === '_')) { + // okay + } else { + safe = false; + } + } + + if (safe) { + // For type names, only quote RESERVED_KEYWORD + // COL_NAME_KEYWORD and TYPE_FUNC_NAME_KEYWORD are allowed unquoted in type positions + const kwKind = keywordKindOf(ident); + if (kwKind === 'RESERVED_KEYWORD') { + safe = false; + } + } + + if (safe) { + return ident; // no change needed + } + + // Build quoted identifier with escaped embedded quotes + let result = '"'; + for (let i = 0; i < ident.length; i++) { + const ch = ident[i]; + if (ch === '"') { + result += '"'; // escape " as "" + } + result += ch; + } + result += '"'; + + return result; + } + + /** + * Quote a dotted type name (e.g., schema.typename). + * + * For type names, we use type-name quoting for all parts since the entire + * qualified name is in a type context. This allows keywords like 'json', + * 'int', 'boolean' to remain unquoted in user-defined schema-qualified types. + */ + static quoteTypeDottedName(parts: string[]): string { + if (!parts || parts.length === 0) return ''; + return parts.map(part => QuoteUtils.quoteIdentifierTypeName(part)).join('.'); + } }