Skip to content

Commit cd775bf

Browse files
authored
GENKGB-452 (#456)
* Add required property support to SchemaFromTextExtractor * add required property support * unit tests * unit tests * add required support to SchemaFromTextExtractor * handle int values for required property
1 parent 9e6b588 commit cd775bf

File tree

3 files changed

+538
-2
lines changed

3 files changed

+538
-2
lines changed

src/neo4j_graphrag/experimental/components/schema.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,92 @@ def _filter_invalid_constraints(
666666
filtered_constraints.append(constraint)
667667
return filtered_constraints
668668

669+
def _filter_properties_required_field(
670+
self, node_types: List[Dict[str, Any]]
671+
) -> List[Dict[str, Any]]:
672+
"""Sanitize the 'required' field in node type properties. Ensures 'required' is a valid boolean.
673+
converts known string values (true, yes, 1, false, no, 0) to booleans and removes unrecognized values.
674+
"""
675+
for node_type in node_types:
676+
properties = node_type.get("properties", [])
677+
if not properties:
678+
continue
679+
for prop in properties:
680+
if not isinstance(prop, dict):
681+
continue
682+
683+
required_value = prop.get("required")
684+
685+
# Not provided - will use Pydantic default (false)
686+
if required_value is None:
687+
continue
688+
689+
# already a valid boolean
690+
if isinstance(required_value, bool):
691+
continue
692+
693+
prop_name = prop.get("name", "unknown")
694+
node_label = node_type.get("label", "unknown")
695+
696+
# Convert to string to handle int values like 1 or 0
697+
required_str = str(required_value).lower()
698+
699+
if required_str in ("true", "yes", "1"):
700+
prop["required"] = True
701+
logging.info(
702+
f"Converted 'required' value '{required_value}' to True "
703+
f"for property '{prop_name}' on node '{node_label}'"
704+
)
705+
elif required_str in ("false", "no", "0"):
706+
prop["required"] = False
707+
logging.info(
708+
f"Converted 'required' value '{required_value}' to False "
709+
f"for property '{prop_name}' on node '{node_label}'"
710+
)
711+
else:
712+
logging.info(
713+
f"Removing unrecognized 'required' value '{required_value}' "
714+
f"for property '{prop_name}' on node '{node_label}'. "
715+
f"Using default (False)."
716+
)
717+
prop.pop("required", None)
718+
719+
return node_types
720+
721+
def _enforce_required_for_constraint_properties(
722+
self,
723+
node_types: List[Dict[str, Any]],
724+
constraints: List[Dict[str, Any]],
725+
) -> None:
726+
"""Ensure properties with UNIQUENESS constraints are marked as required."""
727+
if not constraints:
728+
return
729+
730+
# Build a lookup for property_names and constraints
731+
constraint_props: Dict[str, set[str]] = {}
732+
for c in constraints:
733+
if c.get("type") == "UNIQUENESS":
734+
label = c.get("node_type")
735+
prop = c.get("property_name")
736+
if label and prop:
737+
constraint_props.setdefault(label, set()).add(prop)
738+
739+
# Skip node_types without constraints
740+
for node_type in node_types:
741+
label = node_type.get("label")
742+
if label not in constraint_props:
743+
continue
744+
745+
props_to_fix = constraint_props[label]
746+
for prop in node_type.get("properties", []):
747+
if isinstance(prop, dict) and prop.get("name") in props_to_fix:
748+
if prop.get("required") is not True:
749+
logging.info(
750+
f"Auto-setting 'required' as True for property '{prop.get('name')}' "
751+
f"on node '{label}' (has UNIQUENESS constraint)."
752+
)
753+
prop["required"] = True
754+
669755
def _clean_json_content(self, content: str) -> str:
670756
content = content.strip()
671757

@@ -746,12 +832,22 @@ async def run(self, text: str, examples: str = "", **kwargs: Any) -> GraphSchema
746832
extracted_relationship_types
747833
)
748834

835+
extracted_node_types = self._filter_properties_required_field(
836+
extracted_node_types
837+
)
838+
749839
# Filter out invalid patterns before validation
750840
if extracted_patterns:
751841
extracted_patterns = self._filter_invalid_patterns(
752842
extracted_patterns, extracted_node_types, extracted_relationship_types
753843
)
754844

845+
# Enforce required=true for properties with UNIQUENESS constraints
846+
if extracted_constraints:
847+
self._enforce_required_for_constraint_properties(
848+
extracted_node_types, extracted_constraints
849+
)
850+
755851
# Filter out invalid constraints
756852
if extracted_constraints:
757853
extracted_constraints = self._filter_invalid_constraints(

src/neo4j_graphrag/generation/prompts.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,13 @@ class SchemaExtractionTemplate(PromptTemplate):
223223
8.2 Only use properties that seem to not have too many missing values in the sample.
224224
8.3 Constraints reference node_types by label and specify which property is unique.
225225
8.4 If a property appears in a uniqueness constraint it MUST also appear in the corresponding node_type as a property.
226-
226+
9. REQUIRED PROPERTIES:
227+
9.1 Mark a property as "required": true if every instance of that node/relationship type MUST have this property (non-nullable).
228+
9.2 Mark a property as "required": false if the property is optional and may be absent on some instances.
229+
9.3 Properties that are identifiers, names, or essential characteristics are typically required.
230+
9.4 Properties that are supplementary information (phone numbers, descriptions, metadata) are typically optional.
231+
9.5 When uncertain, default to "required": false.
232+
9.6 If a property has a UNIQUENESS constraint, it MUST be marked as "required": true.
227233
228234
Accepted property types are: BOOLEAN, DATE, DURATION, FLOAT, INTEGER, LIST,
229235
LOCAL_DATETIME, LOCAL_TIME, POINT, STRING, ZONED_DATETIME, ZONED_TIME.
@@ -236,7 +242,13 @@ class SchemaExtractionTemplate(PromptTemplate):
236242
"properties": [
237243
{{
238244
"name": "name",
239-
"type": "STRING"
245+
"type": "STRING",
246+
"required": true
247+
}},
248+
{{
249+
"name": "email",
250+
"type": "STRING",
251+
"required": false
240252
}}
241253
]
242254
}}

0 commit comments

Comments
 (0)