From 60c3f318cf405a04b11de187262d7a1b3b310c4f Mon Sep 17 00:00:00 2001 From: quobix Date: Sun, 21 Dec 2025 12:27:42 -0500 Subject: [PATCH 1/8] Address https://github.com/daveshanley/vacuum/issues/485 --- utils/utils.go | 23 ++++-------- utils/utils_test.go | 87 ++++++++++++++++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 29 deletions(-) diff --git a/utils/utils.go b/utils/utils.go index 4ef5071c..4492912a 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -852,7 +852,11 @@ func ConvertComponentIdIntoFriendlyPathSearch(id string) (string, string) { } } - // Use single string builder for final assembly with # -> $ replacement + // use single string builder for final assembly. + // note: we do NOT replace # with $ here. the leading # from JSON Pointer notation + // (e.g., "#/components/...") is already stripped when we split by "/", and any # + // characters within component names (e.g., "async_search.submit#wait_for_completion_timeout") + // should be preserved literally in the JSONPath query. see issue #485. var finalBuilder strings.Builder if len(cleaned) > 1 { // Estimate final size @@ -867,27 +871,14 @@ func ConvertComponentIdIntoFriendlyPathSearch(id string) (string, string) { if i > 0 { finalBuilder.WriteByte('.') } - // Replace # with $ as we write - for _, ch := range segment { - if ch == '#' { - finalBuilder.WriteByte('$') - } else { - finalBuilder.WriteRune(ch) - } - } + finalBuilder.WriteString(segment) } } else { // Handle single segment case if len(cleaned) == 1 { finalBuilder.Grow(len(cleaned[0]) + 5) finalBuilder.WriteString("$.") - for _, ch := range cleaned[0] { - if ch == '#' { - finalBuilder.WriteByte('$') - } else { - finalBuilder.WriteRune(ch) - } - } + finalBuilder.WriteString(cleaned[0]) } else { finalBuilder.WriteString("$.") } diff --git a/utils/utils_test.go b/utils/utils_test.go index 67c77678..0a14473c 100644 --- a/utils/utils_test.go +++ b/utils/utils_test.go @@ -1046,9 +1046,9 @@ func TestConvertComponentIdIntoFriendlyPathSearch_SpecialCharacterEdgeCases(t *t } func TestConvertComponentIdIntoFriendlyPathSearch_CleanedSingleSegment(t *testing.T) { - // Test case that results in single cleaned segment with # replacement + // Test case that results in single cleaned segment - # should be preserved (issue #485) _, path := ConvertComponentIdIntoFriendlyPathSearch("#single") - assert.Equal(t, "$.['$single']", path) + assert.Equal(t, "$.['#single']", path) // Test empty cleaned result _, path = ConvertComponentIdIntoFriendlyPathSearch("/") @@ -1064,14 +1064,14 @@ func TestConvertComponentIdIntoFriendlyPathSearch_PathWithoutDotAfterDollar(t *t expected string }{ { - // Path with only non-path chars that get wrapped - # gets replaced with $ + // Path with only non-path chars that get wrapped - # is preserved (issue #485) input: "!@#", - expected: "$.['!@$']", + expected: "$.['!@#']", }, { - // Complex path to test final builder logic - # in segment name causes wrapping + // Complex path to test final builder logic - # in segment name is preserved (issue #485) input: "#/test#value", - expected: "$.['test$value']", + expected: "$.['test#value']", }, } @@ -1082,13 +1082,13 @@ func TestConvertComponentIdIntoFriendlyPathSearch_PathWithoutDotAfterDollar(t *t } func TestConvertComponentIdIntoFriendlyPathSearch_HashCharacterHandling(t *testing.T) { - // Test # character replacement in segments - # causes segments to be wrapped + // Test # character in segments - # should be preserved in component names (issue #485) _, path := ConvertComponentIdIntoFriendlyPathSearch("#/path/with#hash/in#middle") - assert.Equal(t, "$.path['with$hash']['in$middle']", path) + assert.Equal(t, "$.path['with#hash']['in#middle']", path) - // Test multiple # in single segment + // Test multiple # in single segment - all should be preserved (issue #485) _, path = ConvertComponentIdIntoFriendlyPathSearch("#/seg#ment#with#many") - assert.Equal(t, "$.['seg$ment$with$many']", path) + assert.Equal(t, "$.['seg#ment#with#many']", path) } // Additional tests to hit uncovered branches @@ -1164,13 +1164,13 @@ func TestConvertComponentIdIntoFriendlyPathSearch_IntegerWithoutCleanedArray(t * // Test to hit line 870 - # character in multi-segment cleaned path func TestConvertComponentIdIntoFriendlyPathSearch_HashInMultiSegment(t *testing.T) { - // This creates multiple cleaned segments with # that needs replacement + // This creates multiple cleaned segments _, path := ConvertComponentIdIntoFriendlyPathSearch("#/segment1/segment2") assert.Equal(t, "$.segment1.segment2", path) - // Another test with # in actual segment names that go through multi-segment processing + // Another test with # in actual segment names - # should be preserved (issue #485) _, path = ConvertComponentIdIntoFriendlyPathSearch("#/test/another#segment/end") - assert.Equal(t, "$.test['another$segment'].end", path) + assert.Equal(t, "$.test['another#segment'].end", path) } // Test appendSegmentOptimized with no cleaned array @@ -1255,6 +1255,67 @@ func TestConvertComponentIdIntoFriendlyPathSearch_Brackets(t *testing.T) { assert.Equal(t, "OhNoWhy[HaveYouDoneThis]", segment) } +// Issue #485 tests - Hash character in component names should be preserved +func TestConvertComponentIdIntoFriendlyPathSearch_Issue485_HashInComponentName(t *testing.T) { + // Real-world Elasticsearch example from issue #485 + segment, path := ConvertComponentIdIntoFriendlyPathSearch("#/components/parameters/async_search.submit#wait_for_completion_timeout") + assert.Equal(t, "$.components.parameters['async_search.submit#wait_for_completion_timeout']", path) + assert.Equal(t, "async_search.submit#wait_for_completion_timeout", segment) +} + +func TestConvertComponentIdIntoFriendlyPathSearch_Issue485_MultipleHashesInName(t *testing.T) { + // Component name with multiple # characters + segment, path := ConvertComponentIdIntoFriendlyPathSearch("#/components/schemas/model#v1#beta") + assert.Equal(t, "$.components.schemas['model#v1#beta']", path) + assert.Equal(t, "model#v1#beta", segment) +} + +func TestConvertComponentIdIntoFriendlyPathSearch_Issue485_HashAtEndOfName(t *testing.T) { + // Component name with # at the end + segment, path := ConvertComponentIdIntoFriendlyPathSearch("#/components/schemas/deprecated#") + assert.Equal(t, "$.components.schemas['deprecated#']", path) + assert.Equal(t, "deprecated#", segment) +} + +func TestConvertComponentIdIntoFriendlyPathSearch_Issue485_HashAtStartOfName(t *testing.T) { + // Component name with # at the start (after the path) + segment, path := ConvertComponentIdIntoFriendlyPathSearch("#/components/schemas/#internal") + assert.Equal(t, "$.components.schemas['#internal']", path) + assert.Equal(t, "#internal", segment) +} + +func TestConvertComponentIdIntoFriendlyPathSearch_Issue485_OnlyHashInName(t *testing.T) { + // Component name that is just a # + segment, path := ConvertComponentIdIntoFriendlyPathSearch("#/components/schemas/#") + assert.Equal(t, "$.components.schemas['#']", path) + assert.Equal(t, "#", segment) +} + +func TestConvertComponentIdIntoFriendlyPathSearch_Issue485_HashWithSpecialChars(t *testing.T) { + // Component name with # mixed with other special characters + segment, path := ConvertComponentIdIntoFriendlyPathSearch("#/components/schemas/model#v1-beta.final") + assert.Equal(t, "$.components.schemas['model#v1-beta.final']", path) + assert.Equal(t, "model#v1-beta.final", segment) +} + +func TestConvertComponentIdIntoFriendlyPathSearch_Issue485_NormalPathsUnaffected(t *testing.T) { + // Verify normal paths (without # in component names) still work correctly + testCases := []struct { + input string + expectedPath string + expectedName string + }{ + {"#/components/schemas/Pet", "$.components.schemas['Pet']", "Pet"}, + {"#/components/parameters/page-size", "$.components.parameters['page-size']", "page-size"}, + } + + for _, tc := range testCases { + segment, path := ConvertComponentIdIntoFriendlyPathSearch(tc.input) + assert.Equal(t, tc.expectedPath, path, "Path mismatch for input: %s", tc.input) + assert.Equal(t, tc.expectedName, segment, "Name mismatch for input: %s", tc.input) + } +} + func TestDetermineYAMLWhitespaceLength(t *testing.T) { someBytes, _ := os.ReadFile("../test_specs/burgershop.openapi.yaml") assert.Equal(t, 2, DetermineWhitespaceLength(string(someBytes))) From 9ac354938e7d480c05aad352b2e2c6bfcfffcab6 Mon Sep 17 00:00:00 2001 From: quobix Date: Sun, 21 Dec 2025 17:53:14 -0500 Subject: [PATCH 2/8] Fully address #471 `$id` is now fully supported, and will also perform the correct resolution chain. Bringing `libopenain` completely inline with JSON Schema 2020-12 When schemas declare $id, relative $ref values should resolve against that $id per JSON Schema 2020-12 spec. Previously, libopenapi would try to find files directly instead of resolving against the $id base URI. components: schemas: s1: $id: "https://example.com/a.json" type: string s2: $ref: "a.json" # Should resolve to s1 via its $id --- datamodel/high/base/schema.go | 6 + datamodel/low/base/constants.go | 1 + datamodel/low/base/schema.go | 13 + index/extract_refs.go | 120 ++ index/index_model.go | 2 + index/rolodex.go | 62 + index/schema_id_context.go | 57 + index/schema_id_registry.go | 77 ++ index/schema_id_resolve.go | 202 +++ index/schema_id_test.go | 1157 +++++++++++++++++ index/schema_id_types.go | 73 ++ index/search_index.go | 9 + index/spec_index.go | 32 + what-changed/model/breaking_rules.go | 1 + .../model/breaking_rules_constants.go | 1 + what-changed/model/breaking_rules_model.go | 1 + what-changed/model/schema.go | 21 + 17 files changed, 1835 insertions(+) create mode 100644 index/schema_id_context.go create mode 100644 index/schema_id_registry.go create mode 100644 index/schema_id_resolve.go create mode 100644 index/schema_id_test.go create mode 100644 index/schema_id_types.go diff --git a/datamodel/high/base/schema.go b/datamodel/high/base/schema.go index e5177bff..34732aa7 100644 --- a/datamodel/high/base/schema.go +++ b/datamodel/high/base/schema.go @@ -74,6 +74,9 @@ type Schema struct { // in 3.1 Items can be a Schema or a boolean Items *DynamicValue[*SchemaProxy, bool] `json:"items,omitempty" yaml:"items,omitempty"` + // 3.1+ only, JSON Schema 2020-12 $id - declares this schema as a schema resource with a URI identifier + Id string `json:"$id,omitempty" yaml:"$id,omitempty"` + // 3.1 only, part of the JSON Schema spec provides a way to identify a sub-schema Anchor string `json:"$anchor,omitempty" yaml:"$anchor,omitempty"` @@ -312,6 +315,9 @@ func NewSchema(schema *base.Schema) *Schema { } s.Required = req + if !schema.Id.IsEmpty() { + s.Id = schema.Id.Value + } if !schema.Anchor.IsEmpty() { s.Anchor = schema.Anchor.Value } diff --git a/datamodel/low/base/constants.go b/datamodel/low/base/constants.go index 84b3627b..ebfc7afa 100644 --- a/datamodel/low/base/constants.go +++ b/datamodel/low/base/constants.go @@ -56,6 +56,7 @@ const ( ExclusiveMaximumLabel = "exclusiveMaximum" SchemaLabel = "schema" SchemaTypeLabel = "$schema" + IdLabel = "$id" AnchorLabel = "$anchor" DynamicAnchorLabel = "$dynamicAnchor" DynamicRefLabel = "$dynamicRef" diff --git a/datamodel/low/base/schema.go b/datamodel/low/base/schema.go index d16931dd..07b50f63 100644 --- a/datamodel/low/base/schema.go +++ b/datamodel/low/base/schema.go @@ -107,6 +107,7 @@ type Schema struct { PropertyNames low.NodeReference[*SchemaProxy] UnevaluatedItems low.NodeReference[*SchemaProxy] UnevaluatedProperties low.NodeReference[*SchemaDynamicValue[*SchemaProxy, bool]] + Id low.NodeReference[string] // JSON Schema 2020-12 $id - schema resource identifier Anchor low.NodeReference[string] DynamicAnchor low.NodeReference[string] DynamicRef low.NodeReference[string] @@ -489,6 +490,10 @@ func (s *Schema) hash(quick bool) [32]byte { sb.WriteString(low.GenerateHashString(s.UnevaluatedItems.Value)) sb.WriteByte('|') } + if !s.Id.IsEmpty() { + sb.WriteString(s.Id.Value) + sb.WriteByte('|') + } if !s.Anchor.IsEmpty() { sb.WriteString(s.Anchor.Value) sb.WriteByte('|') @@ -830,6 +835,14 @@ func (s *Schema) Build(ctx context.Context, root *yaml.Node, idx *index.SpecInde } } + // handle $id if set. (3.1+, JSON Schema 2020-12) + _, idLabel, idNode := utils.FindKeyNodeFullTop(IdLabel, root.Content) + if idNode != nil { + s.Id = low.NodeReference[string]{ + Value: idNode.Value, KeyNode: idLabel, ValueNode: idNode, + } + } + // handle anchor if set. (3.1) _, anchorLabel, anchorNode := utils.FindKeyNodeFullTop(AnchorLabel, root.Content) if anchorNode != nil { diff --git a/index/extract_refs.go b/index/extract_refs.go index 89a64ada..d195f74b 100644 --- a/index/extract_refs.go +++ b/index/extract_refs.go @@ -26,6 +26,20 @@ import ( // Set LIBOPENAPI_LEGACY_REF_ORDER=true to use the old non-deterministic ordering. var preserveLegacyRefOrder = os.Getenv("LIBOPENAPI_LEGACY_REF_ORDER") == "true" +// findSchemaIdInNode looks for a $id key in a mapping node and returns its value. +// Returns empty string if not found or if the node is not a mapping. +func findSchemaIdInNode(node *yaml.Node) string { + if node == nil || node.Kind != yaml.MappingNode { + return "" + } + for i := 0; i < len(node.Content)-1; i += 2 { + if node.Content[i].Value == "$id" && utils.IsNodeStringValue(node.Content[i+1]) { + return node.Content[i+1].Value + } + } + return "" +} + // indexedRef pairs a resolved reference with its original input position for deterministic ordering. type indexedRef struct { ref *Reference @@ -38,6 +52,33 @@ func (index *SpecIndex) ExtractRefs(ctx context.Context, node, parent *yaml.Node if node == nil { return nil } + + // Initialize $id scope if not present (uses document base as initial scope) + scope := GetSchemaIdScope(ctx) + if scope == nil { + scope = NewSchemaIdScope(index.specAbsolutePath) + ctx = WithSchemaIdScope(ctx, scope) + } + + // Capture the parent's base URI BEFORE any $id in this node is processed + // This is used for registering any $id found in this node + parentBaseUri := scope.BaseUri + + // Check if THIS node has a $id and update scope for processing children + // This must happen before iterating children so they see the updated scope + if node.Kind == yaml.MappingNode { + if nodeId := findSchemaIdInNode(node); nodeId != "" { + resolvedNodeId, _ := ResolveSchemaId(nodeId, parentBaseUri) + if resolvedNodeId == "" { + resolvedNodeId = nodeId + } + // Update scope for children of this node + scope = scope.Copy() + scope.PushId(resolvedNodeId) + ctx = WithSchemaIdScope(ctx, scope) + } + } + var found []*Reference if len(node.Content) > 0 { var prev, polyName string @@ -54,6 +95,7 @@ func (index *SpecIndex) ExtractRefs(ctx context.Context, node, parent *yaml.Node polyName = prev } } + found = append(found, index.ExtractRefs(ctx, n, node, seenPath, level, poly, polyName)...) } @@ -510,6 +552,84 @@ func (index *SpecIndex) ExtractRefs(ctx context.Context, node, parent *yaml.Node } } + // Detect and register JSON Schema 2020-12 $id declarations + if i%2 == 0 && n.Value == "$id" { + if len(node.Content) > i+1 && utils.IsNodeStringValue(node.Content[i+1]) { + idValue := node.Content[i+1].Value + idNode := node.Content[i+1] + + // Build the definition path for this schema + var definitionPath string + if len(seenPath) > 0 { + definitionPath = "#/" + strings.Join(seenPath, "/") + } else { + definitionPath = "#" + } + + // Validate the $id (must not contain fragment) + if err := ValidateSchemaId(idValue); err != nil { + index.errorLock.Lock() + index.refErrors = append(index.refErrors, &IndexingError{ + Err: fmt.Errorf("invalid $id value '%s': %w", idValue, err), + Node: idNode, + KeyNode: node.Content[i], + Path: definitionPath, + }) + index.errorLock.Unlock() + continue + } + + // Resolve the $id against the PARENT scope's base URI (nearest ancestor $id) + // This implements JSON Schema 2020-12 hierarchical $id resolution + // We use parentBaseUri which was captured before this node's $id was pushed + baseUri := parentBaseUri + if baseUri == "" { + baseUri = index.specAbsolutePath + } + resolvedUri, resolveErr := ResolveSchemaId(idValue, baseUri) + if resolveErr != nil { + if index.logger != nil { + index.logger.Warn("failed to resolve $id", + "id", idValue, + "base", baseUri, + "definitionPath", definitionPath, + "error", resolveErr.Error(), + "line", idNode.Line) + } + resolvedUri = idValue // Use original as fallback + } + + // Create and register the schema ID entry + // ParentId is the parent scope's base URI (if it differs from document base) + parentId := "" + if parentBaseUri != index.specAbsolutePath && parentBaseUri != "" { + parentId = parentBaseUri + } + entry := &SchemaIdEntry{ + Id: idValue, + ResolvedUri: resolvedUri, + SchemaNode: node, + ParentId: parentId, + Index: index, + DefinitionPath: definitionPath, + Line: idNode.Line, + Column: idNode.Column, + } + + // Register in the index - propagate errors + if regErr := index.RegisterSchemaId(entry); regErr != nil { + index.errorLock.Lock() + index.refErrors = append(index.refErrors, &IndexingError{ + Err: fmt.Errorf("failed to register $id '%s': %w", idValue, regErr), + Node: idNode, + KeyNode: node.Content[i], + Path: definitionPath, + }) + index.errorLock.Unlock() + } + } + } + if i%2 == 0 && n.Value != "$ref" && n.Value != "" { v := n.Value diff --git a/index/index_model.go b/index/index_model.go index db935233..2739e978 100644 --- a/index/index_model.go +++ b/index/index_model.go @@ -405,6 +405,8 @@ type SpecIndex struct { nodeMapCompleted chan struct{} pendingResolve []refMap highModelCache Cache + schemaIdRegistry map[string]*SchemaIdEntry // registry of $id declarations for JSON Schema 2020-12 + schemaIdRegistryLock sync.RWMutex // lock for concurrent access to schemaIdRegistry } // GetResolver returns the resolver for this index. diff --git a/index/rolodex.go b/index/rolodex.go index c7ad8aec..9027d7f0 100644 --- a/index/rolodex.go +++ b/index/rolodex.go @@ -84,6 +84,8 @@ type Rolodex struct { ignoredCircularReferences []*CircularReferenceResult logger *slog.Logger id string // unique ID for the rolodex, can be used to identify it in logs or other contexts. + globalSchemaIdRegistry map[string]*SchemaIdEntry + schemaIdRegistryLock sync.RWMutex } // NewRolodex creates a new rolodex with the provided index configuration. @@ -249,6 +251,9 @@ func (r *Rolodex) AddExternalIndex(idx *SpecIndex, location string) { if r.indexMap[location] == nil { r.indexMap[location] = idx } + + // Aggregate $id registrations from this index into the global registry + r.RegisterIdsFromIndex(idx) } func (r *Rolodex) AddIndex(idx *SpecIndex) { @@ -884,3 +889,60 @@ func (r *Rolodex) ClearIndexCaches() { idx.GetHighCache().Clear() } } + +// RegisterGlobalSchemaId registers a schema $id in the Rolodex global registry. +// Returns an error if the $id is invalid. +func (r *Rolodex) RegisterGlobalSchemaId(entry *SchemaIdEntry) error { + if r == nil { + return fmt.Errorf("cannot register $id on nil Rolodex") + } + + r.schemaIdRegistryLock.Lock() + defer r.schemaIdRegistryLock.Unlock() + + if r.globalSchemaIdRegistry == nil { + r.globalSchemaIdRegistry = make(map[string]*SchemaIdEntry) + } + + _, err := registerSchemaIdToRegistry(r.globalSchemaIdRegistry, entry, r.logger, "global registry") + return err +} + +// LookupSchemaById looks up a schema by its $id URI across all indexes. +func (r *Rolodex) LookupSchemaById(uri string) *SchemaIdEntry { + if r == nil { + return nil + } + + r.schemaIdRegistryLock.RLock() + defer r.schemaIdRegistryLock.RUnlock() + + if r.globalSchemaIdRegistry == nil { + return nil + } + return r.globalSchemaIdRegistry[uri] +} + +// GetAllGlobalSchemaIds returns a copy of all registered $id entries across all indexes. +func (r *Rolodex) GetAllGlobalSchemaIds() map[string]*SchemaIdEntry { + if r == nil { + return make(map[string]*SchemaIdEntry) + } + + r.schemaIdRegistryLock.RLock() + defer r.schemaIdRegistryLock.RUnlock() + return copySchemaIdRegistry(r.globalSchemaIdRegistry) +} + +// RegisterIdsFromIndex aggregates all $id registrations from an index into the global registry. +// Called after each index is built to populate the Rolodex global registry. +func (r *Rolodex) RegisterIdsFromIndex(idx *SpecIndex) { + if r == nil || idx == nil { + return + } + + entries := idx.GetAllSchemaIds() + for _, entry := range entries { + _ = r.RegisterGlobalSchemaId(entry) + } +} diff --git a/index/schema_id_context.go b/index/schema_id_context.go new file mode 100644 index 00000000..5b999515 --- /dev/null +++ b/index/schema_id_context.go @@ -0,0 +1,57 @@ +// Copyright 2022-2025 Princess Beef Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "context" +) + +// ResolvingIdsKey is the context key for tracking $id values currently being resolved. +const ResolvingIdsKey ContextKey = "resolvingIds" + +// SchemaIdScopeKey is the context key for tracking the current $id scope during extraction. +const SchemaIdScopeKey ContextKey = "schemaIdScope" + +// GetSchemaIdScope returns the current $id scope from the context. +func GetSchemaIdScope(ctx context.Context) *SchemaIdScope { + if v := ctx.Value(SchemaIdScopeKey); v != nil { + return v.(*SchemaIdScope) + } + return nil +} + +// WithSchemaIdScope returns a new context with the given $id scope. +func WithSchemaIdScope(ctx context.Context, scope *SchemaIdScope) context.Context { + return context.WithValue(ctx, SchemaIdScopeKey, scope) +} + +// GetResolvingIds returns the set of $id values currently being resolved in the call chain. +func GetResolvingIds(ctx context.Context) map[string]bool { + if v := ctx.Value(ResolvingIdsKey); v != nil { + return v.(map[string]bool) + } + return nil +} + +// AddResolvingId adds a $id to the resolving set in the context. +// Returns a new context with the updated set (copy-on-write for thread safety). +func AddResolvingId(ctx context.Context, id string) context.Context { + existing := GetResolvingIds(ctx) + newSet := make(map[string]bool, len(existing)+1) + for k, v := range existing { + newSet[k] = v + } + newSet[id] = true + return context.WithValue(ctx, ResolvingIdsKey, newSet) +} + +// IsIdBeingResolved checks if a $id is currently being resolved in the call chain. +// Used to detect and prevent circular $id resolution. +func IsIdBeingResolved(ctx context.Context, id string) bool { + ids := GetResolvingIds(ctx) + if ids == nil { + return false + } + return ids[id] +} diff --git a/index/schema_id_registry.go b/index/schema_id_registry.go new file mode 100644 index 00000000..0661c0e6 --- /dev/null +++ b/index/schema_id_registry.go @@ -0,0 +1,77 @@ +// Copyright 2022-2025 Princess Beef Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "fmt" + "log/slog" +) + +// schemaIdRegistrationResult holds the result of a schema ID registration attempt. +type schemaIdRegistrationResult struct { + registered bool // true if successfully registered + duplicate bool // true if a duplicate was found (first-wins policy applied) + key string // the key used for registration +} + +// registerSchemaIdToRegistry is the common registration logic for both SpecIndex and Rolodex. +// Returns the registration result. Duplicates are logged but not treated as errors. +func registerSchemaIdToRegistry( + registry map[string]*SchemaIdEntry, + entry *SchemaIdEntry, + logger *slog.Logger, + registryName string, +) (*schemaIdRegistrationResult, error) { + if entry == nil { + return nil, fmt.Errorf("cannot register nil SchemaIdEntry") + } + + if err := ValidateSchemaId(entry.Id); err != nil { + if logger != nil { + logger.Warn("invalid $id value, skipping registration", + "registry", registryName, + "id", entry.Id, + "error", err.Error(), + "line", entry.Line, + "column", entry.Column) + } + return nil, err + } + + key := entry.GetKey() + + if existing, ok := registry[key]; ok { + if logger != nil { + existingPath := "" + newPath := "" + if existing.Index != nil { + existingPath = existing.Index.GetSpecAbsolutePath() + } + if entry.Index != nil { + newPath = entry.Index.GetSpecAbsolutePath() + } + logger.Warn("duplicate $id detected, keeping first registration", + "registry", registryName, + "id", key, + "first_location", fmt.Sprintf("%s:%d:%d", existingPath, existing.Line, existing.Column), + "duplicate_location", fmt.Sprintf("%s:%d:%d", newPath, entry.Line, entry.Column)) + } + return &schemaIdRegistrationResult{registered: false, duplicate: true, key: key}, nil + } + + registry[key] = entry + return &schemaIdRegistrationResult{registered: true, duplicate: false, key: key}, nil +} + +// copySchemaIdRegistry creates a defensive copy of a schema ID registry. +func copySchemaIdRegistry(registry map[string]*SchemaIdEntry) map[string]*SchemaIdEntry { + if registry == nil { + return make(map[string]*SchemaIdEntry) + } + result := make(map[string]*SchemaIdEntry, len(registry)) + for k, v := range registry { + result[k] = v + } + return result +} diff --git a/index/schema_id_resolve.go b/index/schema_id_resolve.go new file mode 100644 index 00000000..a9bfd137 --- /dev/null +++ b/index/schema_id_resolve.go @@ -0,0 +1,202 @@ +// Copyright 2022-2025 Princess Beef Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "fmt" + "net/url" + "strings" + + "go.yaml.in/yaml/v4" +) + +// ValidateSchemaId checks if a $id value is valid per JSON Schema 2020-12 spec. +// Per the spec, $id MUST NOT contain a fragment identifier (#). +func ValidateSchemaId(id string) error { + if id == "" { + return fmt.Errorf("$id cannot be empty") + } + if strings.Contains(id, "#") { + return fmt.Errorf("$id must not contain fragment identifier '#': %s (use $anchor instead)", id) + } + return nil +} + +// ResolveSchemaId resolves a potentially relative $id against a base URI. +// Returns the fully resolved absolute URI. +func ResolveSchemaId(id string, baseUri string) (string, error) { + if id == "" { + return "", fmt.Errorf("$id cannot be empty") + } + + parsedId, err := url.Parse(id) + if err != nil { + return "", fmt.Errorf("invalid $id URI: %s: %w", id, err) + } + + // Absolute $id is used directly + if parsedId.IsAbs() { + return id, nil + } + + // Relative $id without base - return as-is for later resolution + if baseUri == "" { + return id, nil + } + + parsedBase, err := url.Parse(baseUri) + if err != nil { + return "", fmt.Errorf("invalid base URI: %s: %w", baseUri, err) + } + + resolved := parsedBase.ResolveReference(parsedId) + return resolved.String(), nil +} + +// ResolveRefAgainstSchemaId resolves a $ref value against the current $id scope. +// Absolute refs are returned as-is; relative refs are resolved against the nearest ancestor $id. +func ResolveRefAgainstSchemaId(ref string, scope *SchemaIdScope) (string, error) { + if ref == "" { + return "", fmt.Errorf("$ref cannot be empty") + } + + parsedRef, err := url.Parse(ref) + if err != nil { + return "", fmt.Errorf("invalid $ref URI: %s: %w", ref, err) + } + + if parsedRef.IsAbs() { + return ref, nil + } + + if scope == nil || scope.BaseUri == "" { + return ref, nil + } + + parsedBase, err := url.Parse(scope.BaseUri) + if err != nil { + return "", fmt.Errorf("invalid base URI in scope: %s: %w", scope.BaseUri, err) + } + + resolved := parsedBase.ResolveReference(parsedRef) + return resolved.String(), nil +} + +// SplitRefFragment splits a reference into base URI and fragment components. +// Example: "https://example.com/schema.json#/definitions/Pet" -> +// baseUri="https://example.com/schema.json", fragment="#/definitions/Pet" +func SplitRefFragment(ref string) (baseUri string, fragment string) { + idx := strings.Index(ref, "#") + if idx == -1 { + return ref, "" + } + return ref[:idx], ref[idx:] +} + +// ResolveRefViaSchemaId attempts to resolve a $ref via the $id registry. +// Implements JSON Schema 2020-12 $id-based resolution: +// 1. Split ref into base URI and fragment +// 2. Look up base URI in $id registry +// 3. Navigate to fragment within found schema if present +// Returns nil if the ref cannot be resolved via $id. +func (index *SpecIndex) ResolveRefViaSchemaId(ref string) *Reference { + if ref == "" { + return nil + } + + baseUri, fragment := SplitRefFragment(ref) + + // Local fragment refs are not $id-based + if baseUri == "" { + return nil + } + + // Check local index first, then rolodex global registry + entry := index.GetSchemaById(baseUri) + if entry == nil && index.rolodex != nil { + entry = index.rolodex.LookupSchemaById(baseUri) + } + + if entry == nil { + return nil + } + + r := &Reference{ + FullDefinition: ref, + Definition: ref, + Name: baseUri, + Node: entry.SchemaNode, + IsRemote: entry.Index != index, + RemoteLocation: entry.Index.GetSpecAbsolutePath(), + Index: entry.Index, + } + + // Navigate to fragment if present + if fragment != "" && entry.SchemaNode != nil { + if fragmentNode := navigateToFragment(entry.SchemaNode, fragment); fragmentNode != nil { + r.Node = fragmentNode + } + } + + return r +} + +// navigateToFragment navigates to a JSON pointer fragment within a YAML node. +// Fragment format: "#/path/to/node" or "/path/to/node" +func navigateToFragment(root *yaml.Node, fragment string) *yaml.Node { + if root == nil || fragment == "" { + return nil + } + + path := strings.TrimPrefix(fragment, "#") + if path == "" || path == "/" { + return root + } + + segments := strings.Split(strings.TrimPrefix(path, "/"), "/") + + current := root + if current.Kind == yaml.DocumentNode && len(current.Content) > 0 { + current = current.Content[0] + } + + for _, segment := range segments { + if segment == "" { + continue + } + + // Decode JSON pointer escapes (~1 = /, ~0 = ~) + segment = strings.ReplaceAll(segment, "~1", "/") + segment = strings.ReplaceAll(segment, "~0", "~") + + found := false + if current.Kind == yaml.MappingNode { + for i := 0; i < len(current.Content)-1; i += 2 { + if current.Content[i].Value == segment { + current = current.Content[i+1] + found = true + break + } + } + } else if current.Kind == yaml.SequenceNode { + idx := 0 + for _, c := range segment { + if c < '0' || c > '9' { + return nil + } + idx = idx*10 + int(c-'0') + } + if idx < len(current.Content) { + current = current.Content[idx] + found = true + } + } + + if !found { + return nil + } + } + + return current +} diff --git a/index/schema_id_test.go b/index/schema_id_test.go new file mode 100644 index 00000000..295c967d --- /dev/null +++ b/index/schema_id_test.go @@ -0,0 +1,1157 @@ +// Copyright 2022-2025 Princess Beef Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "go.yaml.in/yaml/v4" +) + +func TestSchemaIdEntry(t *testing.T) { + entry := &SchemaIdEntry{ + Id: "https://example.com/schema.json", + ResolvedUri: "https://example.com/schema.json", + SchemaNode: &yaml.Node{Kind: yaml.MappingNode}, + ParentId: "", + Index: nil, + DefinitionPath: "#/components/schemas/Pet", + Line: 10, + Column: 5, + } + + assert.Equal(t, "https://example.com/schema.json", entry.Id) + assert.Equal(t, "https://example.com/schema.json", entry.ResolvedUri) + assert.NotNil(t, entry.SchemaNode) + assert.Equal(t, "", entry.ParentId) + assert.Nil(t, entry.Index) + assert.Equal(t, "#/components/schemas/Pet", entry.DefinitionPath) + assert.Equal(t, 10, entry.Line) + assert.Equal(t, 5, entry.Column) +} + +func TestNewSchemaIdScope(t *testing.T) { + scope := NewSchemaIdScope("https://example.com/base.json") + + assert.Equal(t, "https://example.com/base.json", scope.BaseUri) + assert.Empty(t, scope.Chain) +} + +func TestSchemaIdScope_PushId(t *testing.T) { + scope := NewSchemaIdScope("https://example.com/base.json") + + scope.PushId("https://example.com/schema1.json") + assert.Equal(t, "https://example.com/schema1.json", scope.BaseUri) + assert.Len(t, scope.Chain, 1) + assert.Equal(t, "https://example.com/schema1.json", scope.Chain[0]) + + scope.PushId("https://example.com/schema2.json") + assert.Equal(t, "https://example.com/schema2.json", scope.BaseUri) + assert.Len(t, scope.Chain, 2) + assert.Equal(t, "https://example.com/schema2.json", scope.Chain[1]) +} + +func TestSchemaIdScope_PopId(t *testing.T) { + scope := NewSchemaIdScope("https://example.com/base.json") + scope.PushId("https://example.com/schema1.json") + scope.PushId("https://example.com/schema2.json") + + scope.PopId() + assert.Equal(t, "https://example.com/schema1.json", scope.BaseUri) + assert.Len(t, scope.Chain, 1) + + scope.PopId() + assert.Empty(t, scope.Chain) + + // Pop on empty chain should not panic + scope.PopId() + assert.Empty(t, scope.Chain) +} + +func TestSchemaIdScope_Copy(t *testing.T) { + scope := NewSchemaIdScope("https://example.com/base.json") + scope.PushId("https://example.com/schema1.json") + + copied := scope.Copy() + + assert.Equal(t, scope.BaseUri, copied.BaseUri) + assert.Equal(t, scope.Chain, copied.Chain) + + // Modifying original should not affect copy + scope.PushId("https://example.com/schema2.json") + assert.NotEqual(t, scope.BaseUri, copied.BaseUri) + assert.Len(t, copied.Chain, 1) + assert.Len(t, scope.Chain, 2) +} + +func TestValidateSchemaId(t *testing.T) { + tests := []struct { + name string + id string + wantErr bool + errMsg string + }{ + { + name: "valid absolute URI", + id: "https://example.com/schema.json", + wantErr: false, + }, + { + name: "valid relative URI", + id: "schema.json", + wantErr: false, + }, + { + name: "valid relative path", + id: "./schemas/pet.json", + wantErr: false, + }, + { + name: "empty $id", + id: "", + wantErr: true, + errMsg: "$id cannot be empty", + }, + { + name: "$id with fragment", + id: "https://example.com/schema.json#/definitions", + wantErr: true, + errMsg: "$id must not contain fragment identifier '#'", + }, + { + name: "$id with just fragment", + id: "#anchor", + wantErr: true, + errMsg: "$id must not contain fragment identifier '#'", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateSchemaId(tt.id) + if tt.wantErr { + assert.Error(t, err) + assert.Contains(t, err.Error(), tt.errMsg) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestResolveSchemaId(t *testing.T) { + tests := []struct { + name string + id string + baseUri string + expected string + wantErr bool + }{ + { + name: "absolute $id ignores base", + id: "https://example.com/schema.json", + baseUri: "https://other.com/base.json", + expected: "https://example.com/schema.json", + wantErr: false, + }, + { + name: "relative $id resolved against base", + id: "schema.json", + baseUri: "https://example.com/schemas/", + expected: "https://example.com/schemas/schema.json", + wantErr: false, + }, + { + name: "relative path with directory", + id: "../common/types.json", + baseUri: "https://example.com/schemas/pets/", + expected: "https://example.com/schemas/common/types.json", + wantErr: false, + }, + { + name: "relative $id without base", + id: "schema.json", + baseUri: "", + expected: "schema.json", + wantErr: false, + }, + { + name: "empty $id", + id: "", + baseUri: "https://example.com/", + expected: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ResolveSchemaId(tt.id, tt.baseUri) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expected, result) + } + }) + } +} + +func TestResolveRefAgainstSchemaId(t *testing.T) { + tests := []struct { + name string + ref string + scope *SchemaIdScope + expected string + wantErr bool + }{ + { + name: "absolute ref returns as-is", + ref: "https://example.com/schema.json", + scope: NewSchemaIdScope("https://other.com/base.json"), + expected: "https://example.com/schema.json", + wantErr: false, + }, + { + name: "relative ref resolved against scope base", + ref: "pet.json", + scope: NewSchemaIdScope("https://example.com/schemas/"), + expected: "https://example.com/schemas/pet.json", + wantErr: false, + }, + { + name: "relative ref with fragment", + ref: "common.json#/definitions/Error", + scope: NewSchemaIdScope("https://example.com/schemas/"), + expected: "https://example.com/schemas/common.json#/definitions/Error", + wantErr: false, + }, + { + name: "relative ref without scope", + ref: "pet.json", + scope: nil, + expected: "pet.json", + wantErr: false, + }, + { + name: "relative ref with empty base", + ref: "pet.json", + scope: NewSchemaIdScope(""), + expected: "pet.json", + wantErr: false, + }, + { + name: "empty ref", + ref: "", + scope: NewSchemaIdScope("https://example.com/"), + expected: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ResolveRefAgainstSchemaId(tt.ref, tt.scope) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + assert.Equal(t, tt.expected, result) + } + }) + } +} + +func TestSplitRefFragment(t *testing.T) { + tests := []struct { + name string + ref string + wantBase string + wantFragment string + }{ + { + name: "ref with fragment", + ref: "https://example.com/schema.json#/definitions/Pet", + wantBase: "https://example.com/schema.json", + wantFragment: "#/definitions/Pet", + }, + { + name: "ref without fragment", + ref: "https://example.com/schema.json", + wantBase: "https://example.com/schema.json", + wantFragment: "", + }, + { + name: "relative ref with fragment", + ref: "pet.json#/properties/name", + wantBase: "pet.json", + wantFragment: "#/properties/name", + }, + { + name: "just fragment", + ref: "#/definitions/Pet", + wantBase: "", + wantFragment: "#/definitions/Pet", + }, + { + name: "empty ref", + ref: "", + wantBase: "", + wantFragment: "", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + base, fragment := SplitRefFragment(tt.ref) + assert.Equal(t, tt.wantBase, base) + assert.Equal(t, tt.wantFragment, fragment) + }) + } +} + +func TestResolveSchemaId_InvalidURIs(t *testing.T) { + // Test invalid base URI + _, err := ResolveSchemaId("schema.json", "://invalid-base") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid base URI") + + // Test invalid $id URI (control characters) + _, err = ResolveSchemaId("schema\x00.json", "https://example.com/") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid $id URI") +} + +func TestResolveRefAgainstSchemaId_InvalidBaseInScope(t *testing.T) { + // Test invalid base URI in scope + scope := &SchemaIdScope{ + BaseUri: "://invalid-base", + Chain: []string{}, + } + _, err := ResolveRefAgainstSchemaId("schema.json", scope) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid base URI in scope") +} + +func TestSchemaIdScope_NestedScopes(t *testing.T) { + // Test a realistic nested $id scenario + // Document base: https://example.com/openapi.yaml + // Schema 1: $id: "https://example.com/schemas/pet.json" + // Schema 2 (nested): $id: "definitions/category.json" (relative) + + scope := NewSchemaIdScope("https://example.com/openapi.yaml") + + // First $id is absolute + scope.PushId("https://example.com/schemas/pet.json") + assert.Equal(t, "https://example.com/schemas/pet.json", scope.BaseUri) + + // Resolve a relative $id against current base + resolved, err := ResolveSchemaId("definitions/category.json", scope.BaseUri) + assert.NoError(t, err) + assert.Equal(t, "https://example.com/schemas/definitions/category.json", resolved) + + // Push the nested $id + scope.PushId(resolved) + assert.Equal(t, "https://example.com/schemas/definitions/category.json", scope.BaseUri) + assert.Len(t, scope.Chain, 2) + + // Resolve a relative $ref from this nested scope + refResolved, err := ResolveRefAgainstSchemaId("../common/types.json", scope) + assert.NoError(t, err) + assert.Equal(t, "https://example.com/schemas/common/types.json", refResolved) +} + +// SpecIndex registry tests + +func TestSpecIndex_RegisterSchemaId(t *testing.T) { + index := &SpecIndex{} + + entry := &SchemaIdEntry{ + Id: "https://example.com/schema.json", + ResolvedUri: "https://example.com/schema.json", + SchemaNode: &yaml.Node{Kind: yaml.MappingNode}, + Index: index, + DefinitionPath: "#/components/schemas/Pet", + Line: 10, + Column: 5, + } + + err := index.RegisterSchemaId(entry) + assert.NoError(t, err) + + // Verify registration + found := index.GetSchemaById("https://example.com/schema.json") + assert.NotNil(t, found) + assert.Equal(t, entry.Id, found.Id) +} + +func TestSpecIndex_RegisterSchemaId_Nil(t *testing.T) { + index := &SpecIndex{} + err := index.RegisterSchemaId(nil) + assert.Error(t, err) + assert.Contains(t, err.Error(), "cannot register nil") +} + +func TestSpecIndex_RegisterSchemaId_Invalid(t *testing.T) { + index := &SpecIndex{} + entry := &SchemaIdEntry{ + Id: "https://example.com/schema.json#fragment", + Line: 10, + } + + err := index.RegisterSchemaId(entry) + assert.Error(t, err) + assert.Contains(t, err.Error(), "fragment") +} + +func TestSpecIndex_RegisterSchemaId_Duplicate(t *testing.T) { + index := &SpecIndex{} + + entry1 := &SchemaIdEntry{ + Id: "https://example.com/schema.json", + ResolvedUri: "https://example.com/schema.json", + Index: index, + Line: 10, + } + + entry2 := &SchemaIdEntry{ + Id: "https://example.com/schema.json", + ResolvedUri: "https://example.com/schema.json", + Index: index, + Line: 20, + } + + err := index.RegisterSchemaId(entry1) + assert.NoError(t, err) + + // Second registration should not error (first-wins) + err = index.RegisterSchemaId(entry2) + assert.NoError(t, err) + + // Verify first entry is kept + found := index.GetSchemaById("https://example.com/schema.json") + assert.Equal(t, 10, found.Line) +} + +func TestSpecIndex_RegisterSchemaId_UsesIdWhenResolvedUriEmpty(t *testing.T) { + index := &SpecIndex{} + + entry := &SchemaIdEntry{ + Id: "schema.json", + ResolvedUri: "", // Empty resolved URI + Index: index, + Line: 10, + } + + err := index.RegisterSchemaId(entry) + assert.NoError(t, err) + + // Should be registered under Id, not ResolvedUri + found := index.GetSchemaById("schema.json") + assert.NotNil(t, found) +} + +func TestSpecIndex_GetSchemaById_Empty(t *testing.T) { + index := &SpecIndex{} + + found := index.GetSchemaById("https://example.com/not-found.json") + assert.Nil(t, found) +} + +func TestSpecIndex_GetAllSchemaIds(t *testing.T) { + index := &SpecIndex{} + + entry1 := &SchemaIdEntry{ + Id: "https://example.com/a.json", + ResolvedUri: "https://example.com/a.json", + Index: index, + } + entry2 := &SchemaIdEntry{ + Id: "https://example.com/b.json", + ResolvedUri: "https://example.com/b.json", + Index: index, + } + + _ = index.RegisterSchemaId(entry1) + _ = index.RegisterSchemaId(entry2) + + all := index.GetAllSchemaIds() + assert.Len(t, all, 2) + assert.NotNil(t, all["https://example.com/a.json"]) + assert.NotNil(t, all["https://example.com/b.json"]) +} + +func TestSpecIndex_GetAllSchemaIds_Empty(t *testing.T) { + index := &SpecIndex{} + all := index.GetAllSchemaIds() + assert.NotNil(t, all) + assert.Empty(t, all) +} + +// Rolodex registry tests + +func TestRolodex_RegisterGlobalSchemaId(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + index := &SpecIndex{} + + entry := &SchemaIdEntry{ + Id: "https://example.com/schema.json", + ResolvedUri: "https://example.com/schema.json", + Index: index, + Line: 10, + } + + err := rolodex.RegisterGlobalSchemaId(entry) + assert.NoError(t, err) + + // Verify registration + found := rolodex.LookupSchemaById("https://example.com/schema.json") + assert.NotNil(t, found) + assert.Equal(t, entry.Id, found.Id) +} + +func TestRolodex_RegisterGlobalSchemaId_NilRolodex(t *testing.T) { + var rolodex *Rolodex + entry := &SchemaIdEntry{Id: "test"} + err := rolodex.RegisterGlobalSchemaId(entry) + assert.Error(t, err) + assert.Contains(t, err.Error(), "nil Rolodex") +} + +func TestRolodex_RegisterGlobalSchemaId_NilEntry(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + err := rolodex.RegisterGlobalSchemaId(nil) + assert.Error(t, err) + assert.Contains(t, err.Error(), "nil SchemaIdEntry") +} + +func TestRolodex_RegisterGlobalSchemaId_Invalid(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + entry := &SchemaIdEntry{ + Id: "https://example.com/schema.json#fragment", + Line: 10, + } + + err := rolodex.RegisterGlobalSchemaId(entry) + assert.Error(t, err) + assert.Contains(t, err.Error(), "fragment") +} + +func TestRolodex_RegisterGlobalSchemaId_Duplicate(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + index := &SpecIndex{} + + entry1 := &SchemaIdEntry{ + Id: "https://example.com/schema.json", + ResolvedUri: "https://example.com/schema.json", + Index: index, + Line: 10, + } + + entry2 := &SchemaIdEntry{ + Id: "https://example.com/schema.json", + ResolvedUri: "https://example.com/schema.json", + Index: index, + Line: 20, + } + + err := rolodex.RegisterGlobalSchemaId(entry1) + assert.NoError(t, err) + + // Second registration should not error (first-wins) + err = rolodex.RegisterGlobalSchemaId(entry2) + assert.NoError(t, err) + + // Verify first entry is kept + found := rolodex.LookupSchemaById("https://example.com/schema.json") + assert.Equal(t, 10, found.Line) +} + +func TestRolodex_LookupSchemaById_NilRolodex(t *testing.T) { + var rolodex *Rolodex + found := rolodex.LookupSchemaById("test") + assert.Nil(t, found) +} + +func TestRolodex_LookupSchemaById_Empty(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + found := rolodex.LookupSchemaById("https://example.com/not-found.json") + assert.Nil(t, found) +} + +func TestRolodex_GetAllGlobalSchemaIds(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + index := &SpecIndex{} + + entry1 := &SchemaIdEntry{ + Id: "https://example.com/a.json", + ResolvedUri: "https://example.com/a.json", + Index: index, + } + entry2 := &SchemaIdEntry{ + Id: "https://example.com/b.json", + ResolvedUri: "https://example.com/b.json", + Index: index, + } + + _ = rolodex.RegisterGlobalSchemaId(entry1) + _ = rolodex.RegisterGlobalSchemaId(entry2) + + all := rolodex.GetAllGlobalSchemaIds() + assert.Len(t, all, 2) + assert.NotNil(t, all["https://example.com/a.json"]) + assert.NotNil(t, all["https://example.com/b.json"]) +} + +func TestRolodex_GetAllGlobalSchemaIds_NilRolodex(t *testing.T) { + var rolodex *Rolodex + all := rolodex.GetAllGlobalSchemaIds() + assert.NotNil(t, all) + assert.Empty(t, all) +} + +func TestRolodex_GetAllGlobalSchemaIds_Empty(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + all := rolodex.GetAllGlobalSchemaIds() + assert.NotNil(t, all) + assert.Empty(t, all) +} + +func TestRolodex_RegisterIdsFromIndex(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + index := &SpecIndex{} + + // Register entries in the index + entry1 := &SchemaIdEntry{ + Id: "https://example.com/a.json", + ResolvedUri: "https://example.com/a.json", + Index: index, + } + entry2 := &SchemaIdEntry{ + Id: "https://example.com/b.json", + ResolvedUri: "https://example.com/b.json", + Index: index, + } + + _ = index.RegisterSchemaId(entry1) + _ = index.RegisterSchemaId(entry2) + + // Aggregate to rolodex + rolodex.RegisterIdsFromIndex(index) + + // Verify both are in global registry + all := rolodex.GetAllGlobalSchemaIds() + assert.Len(t, all, 2) +} + +func TestRolodex_RegisterIdsFromIndex_NilRolodex(t *testing.T) { + var rolodex *Rolodex + index := &SpecIndex{} + // Should not panic + rolodex.RegisterIdsFromIndex(index) +} + +func TestRolodex_RegisterIdsFromIndex_NilIndex(t *testing.T) { + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + // Should not panic + rolodex.RegisterIdsFromIndex(nil) +} + +// Integration test: verify $id extraction during indexing + +func TestSchemaId_ExtractionDuringIndexing(t *testing.T) { + // OpenAPI 3.1 spec with $id declarations + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object + properties: + name: + type: string + Category: + $id: "https://example.com/schemas/category.json" + type: object + properties: + id: + type: integer +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/openapi.yaml" + rolodex := NewRolodex(config) + + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Add index to rolodex (this triggers RegisterIdsFromIndex) + rolodex.AddIndex(index) + + // Verify $id entries were registered in the index + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 2) + assert.NotNil(t, allIds["https://example.com/schemas/pet.json"]) + assert.NotNil(t, allIds["https://example.com/schemas/category.json"]) + + // Verify $id entries were aggregated to rolodex + globalIds := rolodex.GetAllGlobalSchemaIds() + assert.Len(t, globalIds, 2) + assert.NotNil(t, globalIds["https://example.com/schemas/pet.json"]) + assert.NotNil(t, globalIds["https://example.com/schemas/category.json"]) + + // Verify lookup works + petEntry := rolodex.LookupSchemaById("https://example.com/schemas/pet.json") + assert.NotNil(t, petEntry) + assert.Equal(t, "https://example.com/schemas/pet.json", petEntry.Id) + assert.Contains(t, petEntry.DefinitionPath, "Pet") +} + +func TestSchemaId_ExtractionWithInvalidId(t *testing.T) { + // OpenAPI 3.1 spec with invalid $id (contains fragment) + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json#invalid" + type: object +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Invalid $id should not be registered + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 0) +} + +func TestSchemaId_ExtractionWithRelativeId(t *testing.T) { + // OpenAPI 3.1 spec with relative $id + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "schemas/pet.json" + type: object +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/api/openapi.yaml" + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Relative $id should be resolved against document base + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 1) + + // Should be resolved to absolute URI + resolved := allIds["https://example.com/api/schemas/pet.json"] + assert.NotNil(t, resolved) + assert.Equal(t, "schemas/pet.json", resolved.Id) + assert.Equal(t, "https://example.com/api/schemas/pet.json", resolved.ResolvedUri) +} + +// Resolution tests + +func TestResolveRefViaSchemaId(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object + properties: + name: + type: string +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Test resolution by $id + resolved := index.ResolveRefViaSchemaId("https://example.com/schemas/pet.json") + assert.NotNil(t, resolved) + assert.Equal(t, "https://example.com/schemas/pet.json", resolved.FullDefinition) +} + +func TestResolveRefViaSchemaId_NotFound(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Should return nil for unknown $id + resolved := index.ResolveRefViaSchemaId("https://example.com/not-found.json") + assert.Nil(t, resolved) +} + +func TestResolveRefViaSchemaId_EmptyRef(t *testing.T) { + index := &SpecIndex{} + resolved := index.ResolveRefViaSchemaId("") + assert.Nil(t, resolved) +} + +func TestResolveRefViaSchemaId_LocalFragment(t *testing.T) { + index := &SpecIndex{} + // Local fragments (starting with #) should not be resolved via $id + resolved := index.ResolveRefViaSchemaId("#/components/schemas/Pet") + assert.Nil(t, resolved) +} + +func TestResolveRefViaSchemaId_WithFragment(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object + properties: + name: + type: string +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Test resolution with fragment + resolved := index.ResolveRefViaSchemaId("https://example.com/schemas/pet.json#/properties/name") + assert.NotNil(t, resolved) + // The resolved node should be the "name" property schema + if resolved.Node != nil { + // Check it's the right node (type: string) + for i := 0; i < len(resolved.Node.Content)-1; i += 2 { + if resolved.Node.Content[i].Value == "type" { + assert.Equal(t, "string", resolved.Node.Content[i+1].Value) + break + } + } + } +} + +// Fragment navigation tests + +func TestNavigateToFragment(t *testing.T) { + yamlContent := `type: object +properties: + name: + type: string + age: + type: integer +items: + - first + - second +` + var node yaml.Node + err := yaml.Unmarshal([]byte(yamlContent), &node) + assert.NoError(t, err) + + root := node.Content[0] // Get the mapping node + + tests := []struct { + name string + fragment string + wantNil bool + checkVal string // If not empty, check this value in the result + }{ + { + name: "empty fragment returns root", + fragment: "", + wantNil: true, // Empty returns nil, not root + }, + { + name: "hash only returns root", + fragment: "#", + wantNil: false, + }, + { + name: "single slash returns root", + fragment: "#/", + wantNil: false, + }, + { + name: "navigate to type", + fragment: "#/type", + wantNil: false, + checkVal: "object", + }, + { + name: "navigate to properties/name", + fragment: "#/properties/name", + wantNil: false, + }, + { + name: "navigate to properties/name/type", + fragment: "#/properties/name/type", + wantNil: false, + checkVal: "string", + }, + { + name: "navigate to items/0", + fragment: "#/items/0", + wantNil: false, + checkVal: "first", + }, + { + name: "navigate to items/1", + fragment: "#/items/1", + wantNil: false, + checkVal: "second", + }, + { + name: "navigate to non-existent path", + fragment: "#/nonexistent", + wantNil: true, + }, + { + name: "navigate to invalid array index", + fragment: "#/items/99", + wantNil: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := navigateToFragment(root, tt.fragment) + if tt.wantNil { + assert.Nil(t, result) + } else { + assert.NotNil(t, result) + if tt.checkVal != "" { + assert.Equal(t, tt.checkVal, result.Value) + } + } + }) + } +} + +func TestNavigateToFragment_NilRoot(t *testing.T) { + result := navigateToFragment(nil, "#/test") + assert.Nil(t, result) +} + +func TestNavigateToFragment_EscapedCharacters(t *testing.T) { + // Test JSON pointer escape sequences (~0 = ~, ~1 = /) + yamlContent := `properties: + "key/with/slashes": + type: string + "key~with~tildes": + type: integer +` + var node yaml.Node + err := yaml.Unmarshal([]byte(yamlContent), &node) + assert.NoError(t, err) + + root := node.Content[0] + + // Test ~1 escaping for slashes + result := navigateToFragment(root, "#/properties/key~1with~1slashes") + assert.NotNil(t, result) + + // Test ~0 escaping for tildes + result = navigateToFragment(root, "#/properties/key~0with~0tildes") + assert.NotNil(t, result) +} + +// Circular reference protection tests + +func TestGetResolvingIds_Empty(t *testing.T) { + ctx := context.Background() + ids := GetResolvingIds(ctx) + assert.Nil(t, ids) +} + +func TestAddResolvingId(t *testing.T) { + ctx := context.Background() + + ctx = AddResolvingId(ctx, "https://example.com/a.json") + ids := GetResolvingIds(ctx) + assert.NotNil(t, ids) + assert.True(t, ids["https://example.com/a.json"]) + + ctx = AddResolvingId(ctx, "https://example.com/b.json") + ids = GetResolvingIds(ctx) + assert.Len(t, ids, 2) + assert.True(t, ids["https://example.com/a.json"]) + assert.True(t, ids["https://example.com/b.json"]) +} + +func TestIsIdBeingResolved(t *testing.T) { + ctx := context.Background() + + // Not being resolved initially + assert.False(t, IsIdBeingResolved(ctx, "https://example.com/a.json")) + + // Add to resolving set + ctx = AddResolvingId(ctx, "https://example.com/a.json") + assert.True(t, IsIdBeingResolved(ctx, "https://example.com/a.json")) + assert.False(t, IsIdBeingResolved(ctx, "https://example.com/b.json")) +} + +func TestIsIdBeingResolved_EmptyContext(t *testing.T) { + ctx := context.Background() + assert.False(t, IsIdBeingResolved(ctx, "anything")) +} + +// Test nested $id resolution - critical for JSON Schema 2020-12 compliance +func TestSchemaId_NestedIdResolution(t *testing.T) { + // This tests the critical fix: nested $id should resolve against parent $id, not document base + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Parent: + $id: "https://example.com/schemas/base.json" + type: object + properties: + child: + $id: "subschema.json" + type: string +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/openapi.yaml" + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 2, "Should have 2 $id entries") + + // Parent $id should resolve to its declared value + parentEntry := allIds["https://example.com/schemas/base.json"] + assert.NotNil(t, parentEntry, "Parent $id should be registered") + assert.Equal(t, "https://example.com/schemas/base.json", parentEntry.ResolvedUri) + + // Nested $id should resolve against parent, NOT document base + // subschema.json relative to https://example.com/schemas/base.json = https://example.com/schemas/subschema.json + nestedEntry := allIds["https://example.com/schemas/subschema.json"] + assert.NotNil(t, nestedEntry, "Nested $id should be registered with correct resolution") + assert.Equal(t, "subschema.json", nestedEntry.Id) + assert.Equal(t, "https://example.com/schemas/subschema.json", nestedEntry.ResolvedUri) + assert.Equal(t, "https://example.com/schemas/base.json", nestedEntry.ParentId, "Should track parent $id") +} + +func TestGetSchemaIdScope_Empty(t *testing.T) { + ctx := context.Background() + scope := GetSchemaIdScope(ctx) + assert.Nil(t, scope) +} + +func TestWithSchemaIdScope(t *testing.T) { + ctx := context.Background() + scope := NewSchemaIdScope("https://example.com/base.json") + ctx = WithSchemaIdScope(ctx, scope) + + retrieved := GetSchemaIdScope(ctx) + assert.NotNil(t, retrieved) + assert.Equal(t, "https://example.com/base.json", retrieved.BaseUri) +} + +func TestSchemaId_DeeplyNestedIdResolution(t *testing.T) { + // Test 3-level deep nesting + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Root: + $id: "https://example.com/root.json" + type: object + properties: + level1: + $id: "level1/" + type: object + properties: + level2: + $id: "level2.json" + type: string +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/openapi.yaml" + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 3, "Should have 3 $id entries") + + // Root: https://example.com/root.json + assert.NotNil(t, allIds["https://example.com/root.json"]) + + // Level 1: level1/ relative to https://example.com/root.json = https://example.com/level1/ + assert.NotNil(t, allIds["https://example.com/level1/"]) + + // Level 2: level2.json relative to https://example.com/level1/ = https://example.com/level1/level2.json + level2Entry := allIds["https://example.com/level1/level2.json"] + assert.NotNil(t, level2Entry, "Level 2 should resolve against level 1, not root") + assert.Equal(t, "https://example.com/level1/level2.json", level2Entry.ResolvedUri) +} diff --git a/index/schema_id_types.go b/index/schema_id_types.go new file mode 100644 index 00000000..ea118eff --- /dev/null +++ b/index/schema_id_types.go @@ -0,0 +1,73 @@ +// Copyright 2022-2025 Princess Beef Heavy Industries / Dave Shanley +// SPDX-License-Identifier: MIT + +package index + +import ( + "go.yaml.in/yaml/v4" +) + +// SchemaIdEntry represents a schema registered by its JSON Schema 2020-12 $id. +// This enables $ref resolution against $id values per JSON Schema specification. +type SchemaIdEntry struct { + Id string // The $id value as declared in the schema + ResolvedUri string // Fully resolved absolute URI after applying base URI resolution + SchemaNode *yaml.Node // The YAML node containing the schema with this $id + ParentId string // The $id of the parent scope (for nested schemas with $id) + Index *SpecIndex // Reference to the SpecIndex containing this schema + DefinitionPath string // JSON pointer path to this schema (e.g., #/components/schemas/Pet) + Line int // Line number where $id was declared (for error reporting) + Column int // Column number where $id was declared (for error reporting) +} + +// GetKey returns the registry key for this entry. +// Uses ResolvedUri if available, otherwise falls back to Id. +func (e *SchemaIdEntry) GetKey() string { + if e.ResolvedUri != "" { + return e.ResolvedUri + } + return e.Id +} + +// SchemaIdScope tracks the resolution context during tree walking. +// Used to maintain the base URI hierarchy when extracting $id values. +type SchemaIdScope struct { + BaseUri string // Current base URI for relative $id and $ref resolution + Chain []string // Stack of $id URIs from root to current location +} + +// NewSchemaIdScope initializes scope tracking for base URI resolution during schema tree traversal. +func NewSchemaIdScope(baseUri string) *SchemaIdScope { + return &SchemaIdScope{ + BaseUri: baseUri, + Chain: make([]string, 0), + } +} + +// PushId updates the base URI context when entering a schema with $id. +// The new $id becomes the base for resolving relative references in child schemas. +func (s *SchemaIdScope) PushId(id string) { + s.Chain = append(s.Chain, id) + s.BaseUri = id +} + +// PopId restores the previous base URI when exiting a schema scope. +func (s *SchemaIdScope) PopId() { + if len(s.Chain) > 0 { + s.Chain = s.Chain[:len(s.Chain)-1] + if len(s.Chain) > 0 { + s.BaseUri = s.Chain[len(s.Chain)-1] + } + } +} + +// Copy creates an independent scope for exploring alternative branches without +// affecting the parent scope's state (used in anyOf/oneOf/allOf traversal). +func (s *SchemaIdScope) Copy() *SchemaIdScope { + chainCopy := make([]string, len(s.Chain)) + copy(chainCopy, s.Chain) + return &SchemaIdScope{ + BaseUri: s.BaseUri, + Chain: chainCopy, + } +} diff --git a/index/search_index.go b/index/search_index.go index f3e0199a..ca2db2f0 100644 --- a/index/search_index.go +++ b/index/search_index.go @@ -102,6 +102,15 @@ func (index *SpecIndex) SearchIndexForReferenceByReferenceWithContext(ctx contex } } + // Try to resolve via JSON Schema 2020-12 $id registry first + // This handles refs like "a.json" resolving to schemas with $id: "https://example.com/a.json" + if resolved := index.ResolveRefViaSchemaId(searchRef.FullDefinition); resolved != nil { + if index.cache != nil { + index.cache.Store(searchRef.FullDefinition, resolved) + } + return resolved, resolved.Index, context.WithValue(ctx, CurrentPathKey, resolved.RemoteLocation) + } + ref := searchRef.FullDefinition refAlt := ref absPath := index.specAbsolutePath diff --git a/index/spec_index.go b/index/spec_index.go index d7cf7c01..8bdd69c9 100644 --- a/index/spec_index.go +++ b/index/spec_index.go @@ -1473,3 +1473,35 @@ func (index *SpecIndex) GetAllDescriptionsCount() int { func (index *SpecIndex) GetAllSummariesCount() int { return len(index.allSummaries) } + +// RegisterSchemaId registers a schema by its $id in this index. +// Returns an error if the $id is invalid (e.g., contains a fragment). +func (index *SpecIndex) RegisterSchemaId(entry *SchemaIdEntry) error { + index.schemaIdRegistryLock.Lock() + defer index.schemaIdRegistryLock.Unlock() + + if index.schemaIdRegistry == nil { + index.schemaIdRegistry = make(map[string]*SchemaIdEntry) + } + + _, err := registerSchemaIdToRegistry(index.schemaIdRegistry, entry, index.logger, "local index") + return err +} + +// GetSchemaById looks up a schema by its $id URI. +func (index *SpecIndex) GetSchemaById(uri string) *SchemaIdEntry { + index.schemaIdRegistryLock.RLock() + defer index.schemaIdRegistryLock.RUnlock() + + if index.schemaIdRegistry == nil { + return nil + } + return index.schemaIdRegistry[uri] +} + +// GetAllSchemaIds returns a copy of all registered $id entries in this index. +func (index *SpecIndex) GetAllSchemaIds() map[string]*SchemaIdEntry { + index.schemaIdRegistryLock.RLock() + defer index.schemaIdRegistryLock.RUnlock() + return copySchemaIdRegistry(index.schemaIdRegistry) +} diff --git a/what-changed/model/breaking_rules.go b/what-changed/model/breaking_rules.go index 4f640473..4604880f 100644 --- a/what-changed/model/breaking_rules.go +++ b/what-changed/model/breaking_rules.go @@ -335,6 +335,7 @@ func buildDefaultRules() *BreakingRulesConfig { UnevaluatedProperties: rule(true, true, true), DynamicAnchor: rule(false, true, true), // $dynamicAnchor: modification/removal is breaking DynamicRef: rule(false, true, true), // $dynamicRef: modification/removal is breaking + Id: rule(true, true, true), // $id: all changes are breaking (affects reference resolution) DependentRequired: rule(false, true, true), XML: rule(false, false, true), SchemaDialect: rule(true, true, true), diff --git a/what-changed/model/breaking_rules_constants.go b/what-changed/model/breaking_rules_constants.go index 115e7993..c00a1eb7 100644 --- a/what-changed/model/breaking_rules_constants.go +++ b/what-changed/model/breaking_rules_constants.go @@ -82,6 +82,7 @@ const ( PropDiscriminator = "discriminator" PropDynamicAnchor = "$dynamicAnchor" PropDynamicRef = "$dynamicRef" + PropId = "$id" PropElse = "else" PropEmail = "email" PropEnum = "enum" diff --git a/what-changed/model/breaking_rules_model.go b/what-changed/model/breaking_rules_model.go index b1e96654..8c716dc3 100644 --- a/what-changed/model/breaking_rules_model.go +++ b/what-changed/model/breaking_rules_model.go @@ -192,6 +192,7 @@ type SchemaRules struct { UnevaluatedProperties *BreakingChangeRule `json:"unevaluatedProperties,omitempty" yaml:"unevaluatedProperties,omitempty"` DynamicAnchor *BreakingChangeRule `json:"$dynamicAnchor,omitempty" yaml:"$dynamicAnchor,omitempty"` DynamicRef *BreakingChangeRule `json:"$dynamicRef,omitempty" yaml:"$dynamicRef,omitempty"` + Id *BreakingChangeRule `json:"$id,omitempty" yaml:"$id,omitempty"` DependentRequired *BreakingChangeRule `json:"dependentRequired,omitempty" yaml:"dependentRequired,omitempty"` XML *BreakingChangeRule `json:"xml,omitempty" yaml:"xml,omitempty"` SchemaDialect *BreakingChangeRule `json:"schemaDialect,omitempty" yaml:"schemaDialect,omitempty"` diff --git a/what-changed/model/schema.go b/what-changed/model/schema.go index 65efcf4d..09c7d4d1 100644 --- a/what-changed/model/schema.go +++ b/what-changed/model/schema.go @@ -1635,6 +1635,27 @@ func checkSchemaPropertyChanges( lnv = nil rnv = nil + // $id (JSON Schema 2020-12) + if lSchema != nil && lSchema.Id.ValueNode != nil { + lnv = lSchema.Id.ValueNode + } + if rSchema != nil && rSchema.Id.ValueNode != nil { + rnv = rSchema.Id.ValueNode + } + props = append(props, &PropertyCheck{ + LeftNode: lnv, + RightNode: rnv, + Label: base.IdLabel, + Changes: changes, + Breaking: BreakingModified(CompSchema, PropId), + Component: CompSchema, + Property: PropId, + Original: lSchema, + New: rSchema, + }) + lnv = nil + rnv = nil + // check extensions var lext *orderedmap.Map[low.KeyReference[string], low.ValueReference[*yaml.Node]] var rext *orderedmap.Map[low.KeyReference[string], low.ValueReference[*yaml.Node]] From 28a01eb11e545ced2dc12c494619c232e8c97ee2 Mon Sep 17 00:00:00 2001 From: quobix Date: Sun, 21 Dec 2025 20:17:54 -0500 Subject: [PATCH 3/8] bumped coverage --- datamodel/high/base/schema_test.go | 37 ++++ datamodel/low/base/schema_test.go | 78 ++++++++ index/schema_id_test.go | 297 +++++++++++++++++++++++++++++ what-changed/model/schema_test.go | 194 +++++++++++++++++++ 4 files changed, 606 insertions(+) diff --git a/datamodel/high/base/schema_test.go b/datamodel/high/base/schema_test.go index 4e68a851..d5e270e2 100644 --- a/datamodel/high/base/schema_test.go +++ b/datamodel/high/base/schema_test.go @@ -1870,3 +1870,40 @@ func TestSchema_RenderInlineWithContext_Error(t *testing.T) { assert.Nil(t, result) assert.Contains(t, err.Error(), "circular reference") } + +// TestNewSchema_Id tests that the $id field is correctly mapped from low to high level +func TestNewSchema_Id(t *testing.T) { + yml := `type: object +$id: "https://example.com/schemas/pet.json" +description: A pet schema` + + var idxNode yaml.Node + _ = yaml.Unmarshal([]byte(yml), &idxNode) + + var lowSch lowbase.Schema + _ = low.BuildModel(idxNode.Content[0], &lowSch) + _ = lowSch.Build(context.Background(), idxNode.Content[0], nil) + + highSch := NewSchema(&lowSch) + + assert.Equal(t, "https://example.com/schemas/pet.json", highSch.Id) + assert.Equal(t, "object", highSch.Type[0]) + assert.Equal(t, "A pet schema", highSch.Description) +} + +// TestNewSchema_Id_Empty tests that empty $id results in empty string +func TestNewSchema_Id_Empty(t *testing.T) { + yml := `type: object +description: A schema without $id` + + var idxNode yaml.Node + _ = yaml.Unmarshal([]byte(yml), &idxNode) + + var lowSch lowbase.Schema + _ = low.BuildModel(idxNode.Content[0], &lowSch) + _ = lowSch.Build(context.Background(), idxNode.Content[0], nil) + + highSch := NewSchema(&lowSch) + + assert.Equal(t, "", highSch.Id) +} diff --git a/datamodel/low/base/schema_test.go b/datamodel/low/base/schema_test.go index 3c718d9f..d2992126 100644 --- a/datamodel/low/base/schema_test.go +++ b/datamodel/low/base/schema_test.go @@ -2675,3 +2675,81 @@ func TestSchemaDynamicValue_Hash_IsB(t *testing.T) { assert.False(t, value.IsA()) assert.True(t, value.IsB()) } + +// TestSchema_Id tests that the $id field is correctly extracted and included in the hash +func TestSchema_Id(t *testing.T) { + yml := `type: object +$id: "https://example.com/schemas/pet.json" +description: A pet schema` + + var idxNode yaml.Node + _ = yaml.Unmarshal([]byte(yml), &idxNode) + + var sch Schema + err := low.BuildModel(idxNode.Content[0], &sch) + assert.NoError(t, err) + + err = sch.Build(context.Background(), idxNode.Content[0], nil) + assert.NoError(t, err) + + assert.Equal(t, "https://example.com/schemas/pet.json", sch.Id.Value) + assert.NotNil(t, sch.Id.KeyNode) + assert.NotNil(t, sch.Id.ValueNode) +} + +// TestSchema_Id_Hash tests that $id is included in the schema hash +func TestSchema_Id_Hash(t *testing.T) { + yml1 := `type: object +$id: "https://example.com/schemas/a.json" +description: Schema A` + + yml2 := `type: object +$id: "https://example.com/schemas/b.json" +description: Schema A` + + yml3 := `type: object +description: Schema A` + + var node1, node2, node3 yaml.Node + _ = yaml.Unmarshal([]byte(yml1), &node1) + _ = yaml.Unmarshal([]byte(yml2), &node2) + _ = yaml.Unmarshal([]byte(yml3), &node3) + + var sch1, sch2, sch3 Schema + _ = low.BuildModel(node1.Content[0], &sch1) + _ = sch1.Build(context.Background(), node1.Content[0], nil) + + _ = low.BuildModel(node2.Content[0], &sch2) + _ = sch2.Build(context.Background(), node2.Content[0], nil) + + _ = low.BuildModel(node3.Content[0], &sch3) + _ = sch3.Build(context.Background(), node3.Content[0], nil) + + hash1 := sch1.Hash() + hash2 := sch2.Hash() + hash3 := sch3.Hash() + + // Different $id values should produce different hashes + assert.NotEqual(t, hash1, hash2) + // Schema without $id should differ from schema with $id + assert.NotEqual(t, hash1, hash3) + assert.NotEqual(t, hash2, hash3) +} + +// TestSchema_Id_Empty tests that empty $id is not set +func TestSchema_Id_Empty(t *testing.T) { + yml := `type: object +description: A schema without $id` + + var idxNode yaml.Node + _ = yaml.Unmarshal([]byte(yml), &idxNode) + + var sch Schema + err := low.BuildModel(idxNode.Content[0], &sch) + assert.NoError(t, err) + + err = sch.Build(context.Background(), idxNode.Content[0], nil) + assert.NoError(t, err) + + assert.True(t, sch.Id.IsEmpty()) +} diff --git a/index/schema_id_test.go b/index/schema_id_test.go index 295c967d..1fce5110 100644 --- a/index/schema_id_test.go +++ b/index/schema_id_test.go @@ -5,6 +5,7 @@ package index import ( "context" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -1155,3 +1156,299 @@ components: assert.NotNil(t, level2Entry, "Level 2 should resolve against level 1, not root") assert.Equal(t, "https://example.com/level1/level2.json", level2Entry.ResolvedUri) } + +// Test $id lookup through rolodex global registry +func TestResolveRefViaSchemaId_WithRolodex(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object + properties: + name: + type: string +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + rolodex := NewRolodex(config) + config.SpecAbsolutePath = "https://example.com/openapi.yaml" + + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Add index to rolodex (triggers RegisterIdsFromIndex) + rolodex.AddIndex(index) + + // Create a second index that references the first via $id + spec2 := `openapi: "3.1.0" +info: + title: Test API 2 + version: 1.0.0 +` + var rootNode2 yaml.Node + err = yaml.Unmarshal([]byte(spec2), &rootNode2) + assert.NoError(t, err) + + config2 := CreateClosedAPIIndexConfig() + config2.SpecAbsolutePath = "https://example.com/api2.yaml" + index2 := NewSpecIndexWithConfig(&rootNode2, config2) + + // Set rolodex on the second index + index2.rolodex = rolodex + + // ResolveRefViaSchemaId should find the schema via rolodex global registry + resolved := index2.ResolveRefViaSchemaId("https://example.com/schemas/pet.json") + assert.NotNil(t, resolved) + assert.Equal(t, "https://example.com/schemas/pet.json", resolved.FullDefinition) +} + +// Test that findSchemaIdInNode returns empty for non-mapping nodes +func TestFindSchemaIdInNode_NonMapping(t *testing.T) { + // Sequence node + seqNode := &yaml.Node{Kind: yaml.SequenceNode} + assert.Equal(t, "", findSchemaIdInNode(seqNode)) + + // Nil node + assert.Equal(t, "", findSchemaIdInNode(nil)) +} + +// Test that findSchemaIdInNode returns empty when $id is not a string +func TestFindSchemaIdInNode_NonStringId(t *testing.T) { + yml := `$id: 123` + var node yaml.Node + _ = yaml.Unmarshal([]byte(yml), &node) + + assert.Equal(t, "", findSchemaIdInNode(node.Content[0])) +} + +// Test error path when $id contains fragment +func TestSchemaId_ExtractionWithFragmentError(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json#invalid" + type: object +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Invalid $id should not be registered + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 0) + + // Should have an error recorded + errors := index.GetReferenceIndexErrors() + assert.True(t, len(errors) > 0, "Should have recorded an error for invalid $id") + + found := false + for _, e := range errors { + if e != nil && strings.Contains(e.Error(), "invalid $id") { + found = true + break + } + } + assert.True(t, found, "Should find invalid $id error") +} + +// Test fragment navigation with DocumentNode wrapper +func TestNavigateToFragment_DocumentNode(t *testing.T) { + yamlContent := `type: object +properties: + name: + type: string +` + var node yaml.Node + err := yaml.Unmarshal([]byte(yamlContent), &node) + assert.NoError(t, err) + + // node is a DocumentNode wrapping the actual content + assert.Equal(t, yaml.DocumentNode, node.Kind) + + // Navigate should handle DocumentNode + result := navigateToFragment(&node, "#/type") + assert.NotNil(t, result) + assert.Equal(t, "object", result.Value) + + result = navigateToFragment(&node, "#/properties/name/type") + assert.NotNil(t, result) + assert.Equal(t, "string", result.Value) +} + +// Test fragment navigation with invalid array index format +func TestNavigateToFragment_InvalidArrayIndex(t *testing.T) { + yamlContent := `items: + - first + - second +` + var node yaml.Node + err := yaml.Unmarshal([]byte(yamlContent), &node) + assert.NoError(t, err) + + root := node.Content[0] + + // Non-numeric index + result := navigateToFragment(root, "#/items/abc") + assert.Nil(t, result) + + // Negative-like index (actually invalid format) + result = navigateToFragment(root, "#/items/-1") + assert.Nil(t, result) +} + +// Test ResolveRefViaSchemaId caches results +func TestResolveRefViaSchemaId_Caching(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // First resolution + resolved1 := index.ResolveRefViaSchemaId("https://example.com/schemas/pet.json") + assert.NotNil(t, resolved1) + + // Second resolution should use cache + resolved2 := index.ResolveRefViaSchemaId("https://example.com/schemas/pet.json") + assert.NotNil(t, resolved2) + + // Results should be equivalent + assert.Equal(t, resolved1.FullDefinition, resolved2.FullDefinition) +} + +// Test $id extraction uses document base when no scope exists +func TestSchemaId_ExtractionWithDocumentBase(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "schemas/pet.json" + type: object +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/api/openapi.yaml" + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 1) + + // Should be resolved relative to document base + entry := allIds["https://example.com/api/schemas/pet.json"] + assert.NotNil(t, entry) + assert.Equal(t, "schemas/pet.json", entry.Id) + assert.Equal(t, "https://example.com/api/schemas/pet.json", entry.ResolvedUri) +} + +// Test that SearchIndexForReferenceByReferenceWithContext uses $id lookup +func TestSearchIndexForReferenceByReferenceWithContext_ViaSchemaId(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object + properties: + name: + type: string + Owner: + type: object + properties: + pet: + $ref: "https://example.com/schemas/pet.json" +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // Search for the reference using the $id + ref, foundIdx, ctx := index.SearchIndexForReferenceWithContext(context.Background(), "https://example.com/schemas/pet.json") + + assert.NotNil(t, ref, "Should find reference via $id") + assert.NotNil(t, foundIdx) + assert.NotNil(t, ctx) + assert.Equal(t, "https://example.com/schemas/pet.json", ref.FullDefinition) +} + +// Test SchemaIdEntry GetKey with empty ResolvedUri falls back to Id +func TestSchemaIdEntry_GetKey_FallbackToId(t *testing.T) { + entry := &SchemaIdEntry{ + Id: "schema.json", + ResolvedUri: "", // Empty + } + + assert.Equal(t, "schema.json", entry.GetKey()) +} + +// Test copySchemaIdRegistry with nil registry +func TestCopySchemaIdRegistry_Nil(t *testing.T) { + result := copySchemaIdRegistry(nil) + assert.NotNil(t, result) + assert.Empty(t, result) +} + +// Test copySchemaIdRegistry creates independent copy +func TestCopySchemaIdRegistry_IndependentCopy(t *testing.T) { + original := make(map[string]*SchemaIdEntry) + original["key1"] = &SchemaIdEntry{Id: "a.json"} + + copied := copySchemaIdRegistry(original) + + // Should be equal initially + assert.Len(t, copied, 1) + assert.NotNil(t, copied["key1"]) + + // Modify original + original["key2"] = &SchemaIdEntry{Id: "b.json"} + + // Copy should not be affected + assert.Len(t, copied, 1) + _, exists := copied["key2"] + assert.False(t, exists) +} diff --git a/what-changed/model/schema_test.go b/what-changed/model/schema_test.go index 75a2382b..aacafaff 100644 --- a/what-changed/model/schema_test.go +++ b/what-changed/model/schema_test.go @@ -4962,3 +4962,197 @@ components: assert.NotNil(t, changes2) assert.Equal(t, 0, changes2.TotalBreakingChanges(), "With custom config, modifying $dynamicRef should not be breaking") } + +// TestCompareSchemas_Id_Added tests detection of $id being added +func TestCompareSchemas_Id_Added(t *testing.T) { + ResetDefaultBreakingRules() + ResetActiveBreakingRulesConfig() + low.ClearHashCache() + defer func() { + ResetActiveBreakingRulesConfig() + ResetDefaultBreakingRules() + }() + + left := `openapi: "3.1.0" +info: + title: left + version: "1.0" +components: + schemas: + Pet: + type: object` + + right := `openapi: "3.1.0" +info: + title: right + version: "1.0" +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object` + + leftDoc, rightDoc := test_BuildDoc(left, right) + + lSchemaProxy := leftDoc.Components.Value.FindSchema("Pet").Value + rSchemaProxy := rightDoc.Components.Value.FindSchema("Pet").Value + + changes := CompareSchemas(lSchemaProxy, rSchemaProxy) + assert.NotNil(t, changes) + assert.Equal(t, 1, changes.TotalChanges()) + + // Find the $id change + found := false + for _, change := range changes.Changes { + if change.Property == PropId { + found = true + assert.Equal(t, PropertyAdded, change.ChangeType) + assert.Equal(t, "https://example.com/schemas/pet.json", change.New) + break + } + } + assert.True(t, found, "Should find $id property change") +} + +// TestCompareSchemas_Id_Removed tests detection of $id being removed +func TestCompareSchemas_Id_Removed(t *testing.T) { + ResetDefaultBreakingRules() + ResetActiveBreakingRulesConfig() + low.ClearHashCache() + defer func() { + ResetActiveBreakingRulesConfig() + ResetDefaultBreakingRules() + }() + + left := `openapi: "3.1.0" +info: + title: left + version: "1.0" +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object` + + right := `openapi: "3.1.0" +info: + title: right + version: "1.0" +components: + schemas: + Pet: + type: object` + + leftDoc, rightDoc := test_BuildDoc(left, right) + + lSchemaProxy := leftDoc.Components.Value.FindSchema("Pet").Value + rSchemaProxy := rightDoc.Components.Value.FindSchema("Pet").Value + + changes := CompareSchemas(lSchemaProxy, rSchemaProxy) + assert.NotNil(t, changes) + assert.Equal(t, 1, changes.TotalChanges()) + + // Find the $id change + found := false + for _, change := range changes.Changes { + if change.Property == PropId { + found = true + assert.Equal(t, PropertyRemoved, change.ChangeType) + assert.Equal(t, "https://example.com/schemas/pet.json", change.Original) + break + } + } + assert.True(t, found, "Should find $id property change") +} + +// TestCompareSchemas_Id_Modified tests detection of $id being modified +func TestCompareSchemas_Id_Modified(t *testing.T) { + ResetDefaultBreakingRules() + ResetActiveBreakingRulesConfig() + low.ClearHashCache() + defer func() { + ResetActiveBreakingRulesConfig() + ResetDefaultBreakingRules() + }() + + left := `openapi: "3.1.0" +info: + title: left + version: "1.0" +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet-v1.json" + type: object` + + right := `openapi: "3.1.0" +info: + title: right + version: "1.0" +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet-v2.json" + type: object` + + leftDoc, rightDoc := test_BuildDoc(left, right) + + lSchemaProxy := leftDoc.Components.Value.FindSchema("Pet").Value + rSchemaProxy := rightDoc.Components.Value.FindSchema("Pet").Value + + changes := CompareSchemas(lSchemaProxy, rSchemaProxy) + assert.NotNil(t, changes) + assert.Equal(t, 1, changes.TotalChanges()) + + // Find the $id change + found := false + for _, change := range changes.Changes { + if change.Property == PropId { + found = true + assert.Equal(t, Modified, change.ChangeType) + assert.Equal(t, "https://example.com/schemas/pet-v1.json", change.Original) + assert.Equal(t, "https://example.com/schemas/pet-v2.json", change.New) + break + } + } + assert.True(t, found, "Should find $id property change") +} + +// TestCompareSchemas_Id_NoChange tests that identical $id produces no changes +func TestCompareSchemas_Id_NoChange(t *testing.T) { + ResetDefaultBreakingRules() + ResetActiveBreakingRulesConfig() + low.ClearHashCache() + defer func() { + ResetActiveBreakingRulesConfig() + ResetDefaultBreakingRules() + }() + + left := `openapi: "3.1.0" +info: + title: left + version: "1.0" +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object` + + right := `openapi: "3.1.0" +info: + title: right + version: "1.0" +components: + schemas: + Pet: + $id: "https://example.com/schemas/pet.json" + type: object` + + leftDoc, rightDoc := test_BuildDoc(left, right) + + lSchemaProxy := leftDoc.Components.Value.FindSchema("Pet").Value + rSchemaProxy := rightDoc.Components.Value.FindSchema("Pet").Value + + changes := CompareSchemas(lSchemaProxy, rSchemaProxy) + assert.Nil(t, changes) +} From 0cb4d3c552524a54cfc945db0b7d7c021d787611 Mon Sep 17 00:00:00 2001 From: quobix Date: Mon, 22 Dec 2025 07:16:13 -0500 Subject: [PATCH 4/8] removing unreachable code and adding more coverage. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit some code can’t be reached, therefore it should not exist. --- index/extract_refs.go | 13 +---- index/schema_id_test.go | 104 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 11 deletions(-) diff --git a/index/extract_refs.go b/index/extract_refs.go index d195f74b..8395eeae 100644 --- a/index/extract_refs.go +++ b/index/extract_refs.go @@ -616,17 +616,8 @@ func (index *SpecIndex) ExtractRefs(ctx context.Context, node, parent *yaml.Node Column: idNode.Column, } - // Register in the index - propagate errors - if regErr := index.RegisterSchemaId(entry); regErr != nil { - index.errorLock.Lock() - index.refErrors = append(index.refErrors, &IndexingError{ - Err: fmt.Errorf("failed to register $id '%s': %w", idValue, regErr), - Node: idNode, - KeyNode: node.Content[i], - Path: definitionPath, - }) - index.errorLock.Unlock() - } + // Register in the index (validation already done above) + _ = index.RegisterSchemaId(entry) } } diff --git a/index/schema_id_test.go b/index/schema_id_test.go index 1fce5110..e894b54f 100644 --- a/index/schema_id_test.go +++ b/index/schema_id_test.go @@ -1452,3 +1452,107 @@ func TestCopySchemaIdRegistry_IndependentCopy(t *testing.T) { _, exists := copied["key2"] assert.False(t, exists) } + +// Test $id at document root level (definitionPath = "#") +func TestSchemaId_RootLevelId(t *testing.T) { + // A schema with $id at the root level (not nested under components/schemas) + spec := `$id: "https://example.com/root-schema.json" +type: object +properties: + name: + type: string +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/openapi.yaml" + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 1) + + entry := allIds["https://example.com/root-schema.json"] + assert.NotNil(t, entry) + assert.Equal(t, "https://example.com/root-schema.json", entry.Id) + // Root level should have definitionPath of "#" + assert.Equal(t, "#", entry.DefinitionPath) +} + +// Test malformed $id URL that causes ResolveSchemaId to fail +// This tests the fallback path where resolvedNodeId == "" or resolveErr != nil +func TestSchemaId_MalformedUrlFallback(t *testing.T) { + // A $id with a malformed URL that url.Parse will reject + // "://missing-scheme" should cause a parse error + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Pet: + $id: "://missing-scheme" + type: object +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/openapi.yaml" + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + // The $id should still be registered using the original value as fallback + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 1) + + // Should be registered with original value since resolution failed + entry := allIds["://missing-scheme"] + assert.NotNil(t, entry, "Malformed $id should still be registered with original value") + assert.Equal(t, "://missing-scheme", entry.Id) + assert.Equal(t, "://missing-scheme", entry.ResolvedUri) // Falls back to original +} + +// Test malformed $id URL in nested context (tests scope update fallback) +func TestSchemaId_MalformedUrlInNestedContext(t *testing.T) { + spec := `openapi: "3.1.0" +info: + title: Test API + version: 1.0.0 +components: + schemas: + Parent: + $id: "https://example.com/parent.json" + type: object + properties: + child: + $id: "://bad-child-url" + type: string +` + + var rootNode yaml.Node + err := yaml.Unmarshal([]byte(spec), &rootNode) + assert.NoError(t, err) + + config := CreateClosedAPIIndexConfig() + config.SpecAbsolutePath = "https://example.com/openapi.yaml" + index := NewSpecIndexWithConfig(&rootNode, config) + assert.NotNil(t, index) + + allIds := index.GetAllSchemaIds() + assert.Len(t, allIds, 2) + + // Parent should resolve normally + parentEntry := allIds["https://example.com/parent.json"] + assert.NotNil(t, parentEntry) + + // Child has malformed URL, should fall back to original value + childEntry := allIds["://bad-child-url"] + assert.NotNil(t, childEntry, "Malformed nested $id should still be registered") + assert.Equal(t, "://bad-child-url", childEntry.Id) +} From 31203ce967bd52885d4d079d786b0fe59d10d6bb Mon Sep 17 00:00:00 2001 From: quobix Date: Mon, 22 Dec 2025 07:31:45 -0500 Subject: [PATCH 5/8] fix co-pilot issue the rest are trash, but this is legit. --- index/extract_refs.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index/extract_refs.go b/index/extract_refs.go index 8395eeae..4fc482f8 100644 --- a/index/extract_refs.go +++ b/index/extract_refs.go @@ -621,7 +621,8 @@ func (index *SpecIndex) ExtractRefs(ctx context.Context, node, parent *yaml.Node } } - if i%2 == 0 && n.Value != "$ref" && n.Value != "" { + // Skip $ref and $id from path building - they are keywords, not schema properties + if i%2 == 0 && n.Value != "$ref" && n.Value != "$id" && n.Value != "" { v := n.Value if strings.HasPrefix(v, "/") { From 411aa135c676afa6b054543e34eeab495d2f4cf2 Mon Sep 17 00:00:00 2001 From: quobix Date: Mon, 22 Dec 2025 08:37:06 -0500 Subject: [PATCH 6/8] cover invalid ID url --- index/schema_id_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/index/schema_id_test.go b/index/schema_id_test.go index e894b54f..31b11512 100644 --- a/index/schema_id_test.go +++ b/index/schema_id_test.go @@ -337,6 +337,14 @@ func TestResolveRefAgainstSchemaId_InvalidBaseInScope(t *testing.T) { assert.Contains(t, err.Error(), "invalid base URI in scope") } +func TestResolveRefAgainstSchemaId_InvalidRefUri(t *testing.T) { + // Test invalid ref URI with bad percent-encoding + scope := NewSchemaIdScope("https://example.com/base.json") + _, err := ResolveRefAgainstSchemaId("schema%zz.json", scope) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid $ref URI") +} + func TestSchemaIdScope_NestedScopes(t *testing.T) { // Test a realistic nested $id scenario // Document base: https://example.com/openapi.yaml From 9b14381339663f8a191c6a60e96e5b28fb72172b Mon Sep 17 00:00:00 2001 From: quobix Date: Mon, 22 Dec 2025 08:52:15 -0500 Subject: [PATCH 7/8] coverage increase --- index/schema_id_test.go | 5 +++++ index/search_index_test.go | 21 +++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/index/schema_id_test.go b/index/schema_id_test.go index 31b11512..32630302 100644 --- a/index/schema_id_test.go +++ b/index/schema_id_test.go @@ -975,6 +975,11 @@ items: fragment: "#/items/99", wantNil: true, }, + { + name: "empty segments skipped (double slash)", + fragment: "#//properties//name", + wantNil: false, + }, } for _, tt := range tests { diff --git a/index/search_index_test.go b/index/search_index_test.go index b15f870b..c8dcf790 100644 --- a/index/search_index_test.go +++ b/index/search_index_test.go @@ -139,3 +139,24 @@ paths: {}`), &rootNode) assert.True(t, found.IsRemote) assert.Equal(t, "external.yaml", filepath.Base(found.FullDefinition)) } + + +func TestIsFileBeingIndexed_HTTPPathMatch(t *testing.T) { + // Test that HTTP paths match when the path portion is the same + ctx := context.Background() + + // Add an HTTP file to the indexing context + files := make(map[string]bool) + files["https://example.com/schemas/pet.yaml"] = true + ctx = context.WithValue(ctx, IndexingFilesKey, files) + + // Same path, different host - should match + assert.True(t, IsFileBeingIndexed(ctx, "https://different-host.com/schemas/pet.yaml")) + + // Same exact URL - should match + assert.True(t, IsFileBeingIndexed(ctx, "https://example.com/schemas/pet.yaml")) + + // Different path - should not match + assert.False(t, IsFileBeingIndexed(ctx, "https://example.com/other/file.yaml")) +} + From a89430f2022841dc8a0d19500604e861ff69f1e5 Mon Sep 17 00:00:00 2001 From: quobix Date: Mon, 22 Dec 2025 09:14:30 -0500 Subject: [PATCH 8/8] Update docs to deal with #309 I am not going to re-build the functions myself, there is little gain, however I have documented the behavior clearly. --- index/spec_index.go | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/index/spec_index.go b/index/spec_index.go index 8bdd69c9..1ef24fb6 100644 --- a/index/spec_index.go +++ b/index/spec_index.go @@ -340,11 +340,17 @@ func (index *SpecIndex) GetOperationParameterReferences() map[string]map[string] return index.paramOpRefs } -// GetAllSchemas will return references to all schemas found in the document both inline and those under components -// The first elements of at the top of the slice, are all the inline references (using GetAllInlineSchemas), -// and then following on are all the references extracted from the components section (using GetAllComponentSchemas). -// finally all the references that are not inline, but marked as $ref in the document are returned (using GetAllReferenceSchemas). -// the results are sorted by line number. +// GetAllSchemas returns references to ALL schemas found in the document: +// - Inline schemas (defined directly in operations, parameters, etc.) +// - Component schemas (defined under components/schemas or definitions) +// - Reference schemas ($ref pointers to schemas) +// +// Results are sorted by line number in the source document. +// +// Note: This is the only GetAll* function that returns inline and $ref variants. +// Other GetAll* functions (GetAllRequestBodies, GetAllResponses, etc.) only return +// items defined in the components section. Use GetAllInlineSchemas, GetAllComponentSchemas, +// and GetAllReferenceSchemas for more granular access. func (index *SpecIndex) GetAllSchemas() []*Reference { componentSchemas := index.GetAllComponentSchemas() inlineSchemas := index.GetAllInlineSchemas() @@ -413,12 +419,14 @@ func (index *SpecIndex) GetAllComponentSchemas() map[string]*Reference { return index.allComponentSchemas } -// GetAllSecuritySchemes will return all security schemes / definitions found in the document. +// GetAllSecuritySchemes returns all security schemes defined in the components section +// (components/securitySchemes in OpenAPI 3.x, or securityDefinitions in Swagger 2.0). func (index *SpecIndex) GetAllSecuritySchemes() map[string]*Reference { return syncMapToMap[string, *Reference](index.allSecuritySchemes) } -// GetAllHeaders will return all headers found in the document (under components) +// GetAllHeaders returns all headers defined in the components section (components/headers). +// This does not include inline headers defined directly in operations or $ref pointers. func (index *SpecIndex) GetAllHeaders() map[string]*Reference { return index.allHeaders } @@ -428,7 +436,8 @@ func (index *SpecIndex) GetAllExternalDocuments() map[string]*Reference { return index.allExternalDocuments } -// GetAllExamples will return all examples found in the document (under components) +// GetAllExamples returns all examples defined in the components section (components/examples). +// This does not include inline examples defined directly in operations or $ref pointers. func (index *SpecIndex) GetAllExamples() map[string]*Reference { return index.allExamples } @@ -453,32 +462,40 @@ func (index *SpecIndex) GetAllSummaries() []*DescriptionReference { return index.allSummaries } -// GetAllRequestBodies will return all requestBodies found in the document (under components) +// GetAllRequestBodies returns all request bodies defined in the components section (components/requestBodies). +// This does not include inline request bodies defined directly in operations or $ref pointers. func (index *SpecIndex) GetAllRequestBodies() map[string]*Reference { return index.allRequestBodies } -// GetAllLinks will return all links found in the document (under components) +// GetAllLinks returns all links defined in the components section (components/links). +// This does not include inline links defined directly in responses or $ref pointers. func (index *SpecIndex) GetAllLinks() map[string]*Reference { return index.allLinks } -// GetAllParameters will return all parameters found in the document (under components) +// GetAllParameters returns all parameters defined in the components section (components/parameters). +// This does not include inline parameters defined directly in operations or path items. +// For operation-level parameters, use GetOperationParameterReferences. func (index *SpecIndex) GetAllParameters() map[string]*Reference { return index.allParameters } -// GetAllResponses will return all responses found in the document (under components) +// GetAllResponses returns all responses defined in the components section (components/responses). +// This does not include inline responses defined directly in operations or $ref pointers. func (index *SpecIndex) GetAllResponses() map[string]*Reference { return index.allResponses } -// GetAllCallbacks will return all links found in the document (under components) +// GetAllCallbacks returns all callbacks defined in the components section (components/callbacks). +// This does not include inline callbacks defined directly in operations or $ref pointers. func (index *SpecIndex) GetAllCallbacks() map[string]*Reference { return index.allCallbacks } -// GetAllComponentPathItems will return all path items found in the document (under components) +// GetAllComponentPathItems returns all path items defined in the components section (components/pathItems). +// This does not include path items defined directly under the paths object or $ref pointers. +// For paths-level path items, use GetAllPaths. func (index *SpecIndex) GetAllComponentPathItems() map[string]*Reference { return index.allComponentPathItems }