Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion demos/common/export_models/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,6 @@ def export_text_generation_model(model_repository_path, source_model, model_name
raise ValueError("max_prompt_len is only supported for NPU target device")
if task_parameters['max_prompt_len'] <= 0:
raise ValueError("max_prompt_len should be a positive integer")
plugin_config['MAX_PROMPT_LEN'] = task_parameters['max_prompt_len']
if task_parameters['ov_cache_dir'] is not None:
plugin_config['CACHE_DIR'] = task_parameters['ov_cache_dir']

Expand All @@ -459,6 +458,17 @@ def export_text_generation_model(model_repository_path, source_model, model_name
if "HETERO" in task_parameters['target_device']:
plugin_config['MODEL_DISTRIBUTION_POLICY'] = 'PIPELINE_PARALLEL'

if task_parameters['target_device'] == 'NPU':
max_prompt_len = task_parameters['max_prompt_len']
Copy link
Collaborator

@dkalinowski dkalinowski Feb 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

previously we accessed task_parameters['max_prompt_len'] only if we previously if task_parameters['max_prompt_len'] is not None:

now we dont have such check, wont there be KeyError?

npu_properties = {}
if max_prompt_len is not None:
npu_properties['MAX_PROMPT_LEN'] = max_prompt_len
if task_parameters['enable_prefix_caching']:
npu_properties['NPUW_LLM_ENABLE_PREFIX_CACHING'] = True
device_properties = { "NPU": npu_properties }
plugin_config['DEVICE_PROPERTIES'] = device_properties
Comment on lines +468 to +469
Copy link

Copilot AI Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable name 'device_properties' is ambiguous since it only contains NPU properties in this context. Consider renaming to 'npu_device_properties' or using a more descriptive name to clarify that this is specifically for NPU configuration.

Suggested change
device_properties = { "NPU": npu_properties }
plugin_config['DEVICE_PROPERTIES'] = device_properties
npu_device_properties = { "NPU": npu_properties }
plugin_config['DEVICE_PROPERTIES'] = npu_device_properties

Copilot uses AI. Check for mistakes.

Copy link

Copilot AI Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an extra blank line here. Consider removing this trailing whitespace to maintain consistent code formatting.

Suggested change

Copilot uses AI. Check for mistakes.

plugin_config_str = json.dumps(plugin_config)
task_parameters['plugin_config'] = plugin_config_str

Expand Down
35 changes: 33 additions & 2 deletions src/graph_export/graph_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,12 +597,43 @@ std::variant<std::optional<std::string>, Status> GraphExport::createPluginString
rapidjson::Value value;
value.SetBool(pluginConfig.useNpuPrefixCaching.value());
auto itr = d.FindMember("NPUW_LLM_ENABLE_PREFIX_CACHING");
if (itr != d.MemberEnd()) {
bool foundInTopLevel = (itr != d.MemberEnd());

bool foundInDeviceProperties = false;
if (!foundInTopLevel) {
auto devicePropsItr = d.FindMember("DEVICE_PROPERTIES");
if (devicePropsItr != d.MemberEnd() && devicePropsItr->value.IsObject()) {
auto npuItr = devicePropsItr->value.FindMember("NPU");
if (npuItr != devicePropsItr->value.MemberEnd() && npuItr->value.IsObject()) {
auto npuPrefixCachingItr = npuItr->value.FindMember("NPUW_LLM_ENABLE_PREFIX_CACHING");
foundInDeviceProperties = (npuPrefixCachingItr != npuItr->value.MemberEnd());
}
}
}

if (foundInTopLevel || foundInDeviceProperties) {
return Status(StatusCode::PLUGIN_CONFIG_CONFLICTING_PARAMETERS, "Doubled NPUW_LLM_ENABLE_PREFIX_CACHING parameter in plugin config.");
}
d.AddMember("NPUW_LLM_ENABLE_PREFIX_CACHING", value, d.GetAllocator());

// Add to nested structure DEVICE_PROPERTIES.NPU
auto devicePropsItr = d.FindMember("DEVICE_PROPERTIES");
if (devicePropsItr == d.MemberEnd()) {
rapidjson::Value deviceProps(rapidjson::kObjectType);
d.AddMember("DEVICE_PROPERTIES", deviceProps, d.GetAllocator());
devicePropsItr = d.FindMember("DEVICE_PROPERTIES");
}

auto npuItr = devicePropsItr->value.FindMember("NPU");
if (npuItr == devicePropsItr->value.MemberEnd()) {
rapidjson::Value npuObj(rapidjson::kObjectType);
devicePropsItr->value.AddMember("NPU", npuObj, d.GetAllocator());
npuItr = devicePropsItr->value.FindMember("NPU");
Comment on lines +623 to +630
Copy link

Copilot AI Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After adding the member in line 621, the iterator lookup in line 622 is unnecessary. The AddMember operation returns void, but you can use the newly created deviceProps object directly or leverage the fact that the member was just added to avoid the redundant FindMember call. Consider storing a reference or iterator during creation to avoid this second lookup.

Suggested change
devicePropsItr = d.FindMember("DEVICE_PROPERTIES");
}
auto npuItr = devicePropsItr->value.FindMember("NPU");
if (npuItr == devicePropsItr->value.MemberEnd()) {
rapidjson::Value npuObj(rapidjson::kObjectType);
devicePropsItr->value.AddMember("NPU", npuObj, d.GetAllocator());
npuItr = devicePropsItr->value.FindMember("NPU");
devicePropsItr = d.MemberEnd();
--devicePropsItr;
}
auto npuItr = devicePropsItr->value.FindMember("NPU");
if (npuItr == devicePropsItr->value.MemberEnd()) {
rapidjson::Value npuObj(rapidjson::kObjectType);
devicePropsItr->value.AddMember("NPU", npuObj, d.GetAllocator());
npuItr = devicePropsItr->value.MemberEnd();
--npuItr;

Copilot uses AI. Check for mistakes.
}

npuItr->value.AddMember("NPUW_LLM_ENABLE_PREFIX_CACHING", value, d.GetAllocator());
Comment on lines +629 to +633
Copy link

Copilot AI Jan 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to the DEVICE_PROPERTIES creation, after adding the NPU member in line 628, the FindMember call in line 629 is redundant. Consider refactoring to avoid this unnecessary lookup.

Suggested change
devicePropsItr->value.AddMember("NPU", npuObj, d.GetAllocator());
npuItr = devicePropsItr->value.FindMember("NPU");
}
npuItr->value.AddMember("NPUW_LLM_ENABLE_PREFIX_CACHING", value, d.GetAllocator());
npuObj.AddMember("NPUW_LLM_ENABLE_PREFIX_CACHING", value, d.GetAllocator());
devicePropsItr->value.AddMember("NPU", npuObj, d.GetAllocator());
} else {
npuItr->value.AddMember("NPUW_LLM_ENABLE_PREFIX_CACHING", value, d.GetAllocator());
}

Copilot uses AI. Check for mistakes.
configNotEmpty = true;
}

if (configNotEmpty) {
// Serialize the document to a JSON string
rapidjson::StringBuffer buffer;
Expand Down