From 998b4e3f0b51a253f98638b95ab25f74b13a3dee Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Wed, 1 Oct 2025 13:37:50 -0400 Subject: [PATCH 1/8] Preliminary impl --- centml/cli/cserve_recipe.py | 110 ++++++++++++++++++++++++++ centml/cli/main.py | 13 +++ centml/sdk/__init__.py | 7 +- centml/sdk/ops.py | 94 ++++++++++++++++++++++ examples/sdk/manage_cserve_recipes.py | 74 +++++++++++++++++ requirements.txt | 1 + 6 files changed, 298 insertions(+), 1 deletion(-) create mode 100644 centml/cli/cserve_recipe.py create mode 100644 centml/sdk/ops.py create mode 100644 examples/sdk/manage_cserve_recipes.py diff --git a/centml/cli/cserve_recipe.py b/centml/cli/cserve_recipe.py new file mode 100644 index 0000000..708c521 --- /dev/null +++ b/centml/cli/cserve_recipe.py @@ -0,0 +1,110 @@ +import json +import sys +from functools import wraps + +import click +from tabulate import tabulate + +from centml.sdk.ops import get_centml_ops_client + + +def handle_exception(func): + @wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except ImportError as e: + click.echo(f"Error: {e}") + click.echo("Please install platform-api-ops-client to use this feature.") + return None + except Exception as e: + click.echo(f"Error: {e}") + return None + + return wrapper + + +@click.command(help="Update CServe recipes from platform_db.json file") +@click.argument("platform_db_file", type=click.Path(exists=True)) +@click.option( + "--cluster-id", + type=int, + required=True, + help="The cluster ID to associate with hardware instances", +) +@handle_exception +def update(platform_db_file, cluster_id): + """ + Update CServe recipes from platform_db.json performance data. + + This command reads a platform_db.json file containing performance test results + and updates the CServe recipe configurations in the database. + + Example: + centml cserve-recipe update platform_db.json --cluster-id 1001 + """ + # Load platform_db.json file + try: + with open(platform_db_file, "r") as f: + platform_data = json.load(f) + except json.JSONDecodeError: + click.echo(f"Error: Invalid JSON file: {platform_db_file}") + sys.exit(1) + except Exception as e: + click.echo(f"Error reading file: {e}") + sys.exit(1) + + # Validate platform_data structure + if not isinstance(platform_data, dict): + click.echo("Error: platform_db.json should contain a dictionary of models") + sys.exit(1) + + click.echo(f"Processing {len(platform_data)} models from {platform_db_file}...") + click.echo(f"Target cluster ID: {cluster_id}") + + with get_centml_ops_client() as ops_client: + response = ops_client.update_cserve_recipes(cluster_id=cluster_id, platform_data=platform_data) + + # Display results + click.echo("\n" + "=" * 60) + click.echo(click.style("✓ Update Complete", fg="green", bold=True)) + click.echo("=" * 60 + "\n") + + click.echo(click.style(response.message, fg="green")) + + if response.processed_models: + click.echo(f"\nProcessed Models ({len(response.processed_models)}):") + for model in response.processed_models: + click.echo(f" ✓ {model}") + + if response.errors: + click.echo(click.style(f"\nErrors ({len(response.errors)}):", fg="red", bold=True)) + for error in response.errors: + click.echo(click.style(f" ✗ {error}", fg="red")) + sys.exit(1) + + +@click.command(help="Delete CServe recipe for a specific model") +@click.argument("model") +@click.option("--confirm", is_flag=True, help="Skip confirmation prompt") +@handle_exception +def delete(model, confirm): + """ + Delete CServe recipe configurations for a specific model. + + This will remove all recipe configurations (fastest, cheapest, best_value) + for the specified model. + + Example: + centml cserve-recipe delete "meta-llama/Llama-3.3-70B-Instruct" + centml cserve-recipe delete "Qwen/Qwen3-0.6B" --confirm + """ + if not confirm: + if not click.confirm(f"Are you sure you want to delete recipe for model '{model}'?"): + click.echo("Cancelled.") + return + + with get_centml_ops_client() as ops_client: + ops_client.delete_cserve_recipe(model=model) + click.echo(click.style(f"✓ Successfully deleted recipe for model: {model}", fg="green")) + diff --git a/centml/cli/main.py b/centml/cli/main.py index b1ecc73..3227505 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -2,6 +2,7 @@ from centml.cli.login import login, logout from centml.cli.cluster import ls, get, delete, pause, resume +from centml.cli.cserve_recipe import update as recipe_update, delete as recipe_delete @click.group() @@ -50,3 +51,15 @@ def ccluster(): cli.add_command(ccluster, name="cluster") + + +@click.group(help="CentML CServe recipe management CLI tool") +def cserve_recipe(): + pass + + +cserve_recipe.add_command(recipe_update, name="update") +cserve_recipe.add_command(recipe_delete, name="delete") + + +cli.add_command(cserve_recipe, name="cserve-recipe") diff --git a/centml/sdk/__init__.py b/centml/sdk/__init__.py index 2bed9e7..a5b566d 100644 --- a/centml/sdk/__init__.py +++ b/centml/sdk/__init__.py @@ -1,2 +1,7 @@ from platform_api_python_client import * -from . import api, auth +from . import api, auth, ops + +# Export OPS client classes and functions +from .ops import CentMLOpsClient, get_centml_ops_client + +__all__ = ['CentMLOpsClient', 'get_centml_ops_client', 'api', 'auth', 'ops'] diff --git a/centml/sdk/ops.py b/centml/sdk/ops.py new file mode 100644 index 0000000..549e424 --- /dev/null +++ b/centml/sdk/ops.py @@ -0,0 +1,94 @@ +from contextlib import contextmanager +from typing import Dict, Any + +try: + import platform_api_ops_client + from platform_api_ops_client import OPSApi + OPS_CLIENT_AVAILABLE = True +except ImportError: + OPS_CLIENT_AVAILABLE = False + +from centml.sdk import auth +from centml.sdk.config import settings + + +class CentMLOpsClient: + """ + Client for CentML OPS API operations. + Used for administrative tasks like managing CServe recipes. + """ + + def __init__(self, api: "OPSApi"): + self._api = api + + def update_cserve_recipes(self, cluster_id: int, platform_data: Dict[str, Dict[str, Dict[str, Any]]]): + """ + Update CServe recipes from platform_db.json performance data. + + Args: + cluster_id: The cluster ID to associate with hardware instances + platform_data: Platform DB data in the format: + { + "model_name": { + "fastest": {...}, + "cheapest": {...}, # optional + "best_value": {...} # optional + } + } + + Returns: + Response containing processed models and any errors + + Example: + with get_centml_ops_client() as ops_client: + with open('platform_db.json') as f: + platform_data = json.load(f) + response = ops_client.update_cserve_recipes(cluster_id=1001, platform_data=platform_data) + print(f"Processed: {response.processed_models}") + """ + return self._api.update_cserve_recipes_ops_cserve_recipes_post( + cluster_id=cluster_id, request_body=platform_data + ) + + def delete_cserve_recipe(self, model: str): + """ + Delete CServe recipe configurations for a specific model. + + Args: + model: The model name to delete (e.g., "meta-llama/Llama-3.3-70B-Instruct") + + Returns: + Success response (200 OK) + + Example: + with get_centml_ops_client() as ops_client: + ops_client.delete_cserve_recipe(model="meta-llama/Llama-3.3-70B-Instruct") + """ + return self._api.delete_cserve_recipe_ops_cserve_recipes_delete(model=model) + + +@contextmanager +def get_centml_ops_client(): + """ + Context manager for CentML OPS API client. + Requires platform-api-ops-client to be installed. + + Usage: + with get_centml_ops_client() as ops_client: + response = ops_client.update_cserve_recipes(cluster_id=1001, platform_data=data) + """ + if not OPS_CLIENT_AVAILABLE: + raise ImportError( + "platform-api-ops-client is required for OPS operations. " + "Install it with: pip install platform-api-ops-client" + ) + + configuration = platform_api_ops_client.Configuration( + host=settings.CENTML_PLATFORM_API_URL, access_token=auth.get_centml_token() + ) + + with platform_api_ops_client.ApiClient(configuration) as api_client: + api_instance = OPSApi(api_client) + + yield CentMLOpsClient(api_instance) + diff --git a/examples/sdk/manage_cserve_recipes.py b/examples/sdk/manage_cserve_recipes.py new file mode 100644 index 0000000..9a4ca23 --- /dev/null +++ b/examples/sdk/manage_cserve_recipes.py @@ -0,0 +1,74 @@ +""" +Example demonstrating how to manage CServe recipes using the CentML SDK. + +This example shows how to: +1. Update CServe recipes from platform_db.json +2. Delete CServe recipes for specific models + +Note: This requires platform-api-ops-client to be installed and +requires OPS admin permissions. +""" + +import json +from centml.sdk.ops import get_centml_ops_client + + +def update_recipes_example(): + """Update CServe recipes from platform_db.json file.""" + # Load platform_db.json data + # This file should contain performance data in the format: + # { + # "model_name": { + # "fastest": { "accelerator_type": "...", "accelerator_count": ..., ... }, + # "cheapest": { ... }, # optional + # "best_value": { ... } # optional + # }, + # ... + # } + with open('platform_db.json', 'r') as f: + platform_data = json.load(f) + + cluster_id = 1001 # Replace with your cluster ID + + with get_centml_ops_client() as ops_client: + response = ops_client.update_cserve_recipes( + cluster_id=cluster_id, + platform_data=platform_data + ) + + print(f"Message: {response.message}") + print(f"Processed Models: {response.processed_models}") + if response.errors: + print(f"Errors: {response.errors}") + + +def delete_recipe_example(): + """Delete CServe recipe for a specific model.""" + model_name = "meta-llama/Llama-3.3-70B-Instruct" + + with get_centml_ops_client() as ops_client: + ops_client.delete_cserve_recipe(model=model_name) + print(f"Successfully deleted recipe for model: {model_name}") + + +def main(): + # Example 1: Update recipes from platform_db.json + print("=== Updating CServe Recipes ===") + try: + update_recipes_example() + except FileNotFoundError: + print("platform_db.json not found. Skipping update example.") + except Exception as e: + print(f"Error updating recipes: {e}") + + print("\n=== Deleting CServe Recipe ===") + # Example 2: Delete a specific model's recipe + try: + delete_recipe_example() + except Exception as e: + print(f"Error deleting recipe: {e}") + + +if __name__ == "__main__": + main() + diff --git a/requirements.txt b/requirements.txt index 1fe82b6..98dda09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 platform-api-python-client==4.1.9 +platform-api-ops-client>=1.0.0 From 5fdcae8c2f3444f4c1523c4de95d7cd6605838f3 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Wed, 1 Oct 2025 15:04:35 -0400 Subject: [PATCH 2/8] TEMPL change to local env for now --- centml/sdk/config.py | 6 +++--- requirements.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/centml/sdk/config.py b/centml/sdk/config.py index 3e8b6ea..0b4b215 100644 --- a/centml/sdk/config.py +++ b/centml/sdk/config.py @@ -8,14 +8,14 @@ class Config(BaseSettings): # It is possible to override the default values by setting the environment variables model_config = SettingsConfigDict(env_file=Path(".env")) - CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="https://app.centml.com/") + CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="http://localhost:5173") CENTML_CONFIG_PATH: str = os.getenv("CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml")) CENTML_CRED_FILE: str = os.getenv("CENTML_CRED_FILE", default="credentials.json") CENTML_CRED_FILE_PATH: str = os.path.join(CENTML_CONFIG_PATH, CENTML_CRED_FILE) - CENTML_PLATFORM_API_URL: str = os.getenv("CENTML_PLATFORM_API_URL", default="https://api.centml.com") + CENTML_PLATFORM_API_URL: str = os.getenv("CENTML_PLATFORM_API_URL", default="http://localhost:16000") - CENTML_WORKOS_CLIENT_ID: str = os.getenv("CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWW2997MF8AYQXHJEGYR0") + CENTML_WORKOS_CLIENT_ID: str = os.getenv("CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWVNBMQ5FVC777FQZC661") # Long-term credentials - can be set via environment variables CENTML_SERVICE_ACCOUNT_SECRET: Optional[str] = os.getenv("CENTML_SERVICE_ACCOUNT_SECRET", default=None) diff --git a/requirements.txt b/requirements.txt index 98dda09..63a6224 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 platform-api-python-client==4.1.9 -platform-api-ops-client>=1.0.0 +#platform-api-ops-client>=1.0.0 From 8b43ad751157b6d1b1dffb50b725b84bbe014c78 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Wed, 1 Oct 2025 15:15:39 -0400 Subject: [PATCH 3/8] priliminary impl for adding list recipes --- centml/cli/cserve_recipe.py | 90 +++++++++++++++++++++-- centml/cli/main.py | 7 +- centml/sdk/ops.py | 100 +++++++++++++++++++++----- examples/sdk/manage_cserve_recipes.py | 56 ++++++++++++--- requirements.txt | 2 +- 5 files changed, 217 insertions(+), 38 deletions(-) diff --git a/centml/cli/cserve_recipe.py b/centml/cli/cserve_recipe.py index 708c521..6b311e8 100644 --- a/centml/cli/cserve_recipe.py +++ b/centml/cli/cserve_recipe.py @@ -3,7 +3,6 @@ from functools import wraps import click -from tabulate import tabulate from centml.sdk.ops import get_centml_ops_client @@ -63,7 +62,9 @@ def update(platform_db_file, cluster_id): click.echo(f"Target cluster ID: {cluster_id}") with get_centml_ops_client() as ops_client: - response = ops_client.update_cserve_recipes(cluster_id=cluster_id, platform_data=platform_data) + response = ops_client.update_cserve_recipes( + cluster_id=cluster_id, platform_data=platform_data + ) # Display results click.echo("\n" + "=" * 60) @@ -78,12 +79,86 @@ def update(platform_db_file, cluster_id): click.echo(f" ✓ {model}") if response.errors: - click.echo(click.style(f"\nErrors ({len(response.errors)}):", fg="red", bold=True)) + click.echo( + click.style(f"\nErrors ({len(response.errors)}):", fg="red", bold=True) + ) for error in response.errors: click.echo(click.style(f" ✗ {error}", fg="red")) sys.exit(1) +@click.command(help="List CServe recipes") +@click.option( + "--model", help="Filter by model name (e.g., 'meta-llama/Llama-3.3-70B-Instruct')" +) +@click.option("--hf-token", help="HuggingFace token for private models") +@handle_exception +def list_recipes(model, hf_token): + """ + List CServe recipe configurations. + + Example: + # List all recipes + centml cserve-recipe list + + # List recipes for a specific model + centml cserve-recipe list --model "meta-llama/Llama-3.3-70B-Instruct" + """ + with get_centml_ops_client() as ops_client: + recipes = ops_client.get_cserve_recipes(model=model, hf_token=hf_token) + + if not recipes: + click.echo("No recipes found.") + return + + click.echo( + f"\n{click.style('CServe Recipes', bold=True, fg='cyan')} ({len(recipes)} found)\n" + ) + + for recipe in recipes: + click.echo(f"{click.style('Model:', bold=True)} {recipe.model}") + + # Display fastest configuration + if recipe.fastest: + click.echo(f" {click.style('Fastest:', fg='green')}") + click.echo( + f" Hardware Instance ID: {recipe.fastest.hardware_instance_id}" + ) + click.echo(f" Recipe: {recipe.fastest.recipe.model}") + if hasattr(recipe.fastest.recipe, "additional_properties"): + tp_size = recipe.fastest.recipe.additional_properties.get( + "tensor_parallel_size", "N/A" + ) + pp_size = recipe.fastest.recipe.additional_properties.get( + "pipeline_parallel_size", "N/A" + ) + click.echo(f" Parallelism: TP={tp_size}, PP={pp_size}") + + # Display cheapest configuration + if ( + recipe.cheapest + and recipe.cheapest.hardware_instance_id + != recipe.fastest.hardware_instance_id + ): + click.echo(f" {click.style('Cheapest:', fg='yellow')}") + click.echo( + f" Hardware Instance ID: {recipe.cheapest.hardware_instance_id}" + ) + + # Display best_value configuration + if ( + recipe.best_value + and recipe.best_value.hardware_instance_id + != recipe.fastest.hardware_instance_id + ): + click.echo(f" {click.style('Best Value:', fg='blue')}") + click.echo( + f" Hardware Instance ID: {recipe.best_value.hardware_instance_id}" + ) + + click.echo("") # Empty line between recipes + + @click.command(help="Delete CServe recipe for a specific model") @click.argument("model") @click.option("--confirm", is_flag=True, help="Skip confirmation prompt") @@ -100,11 +175,14 @@ def delete(model, confirm): centml cserve-recipe delete "Qwen/Qwen3-0.6B" --confirm """ if not confirm: - if not click.confirm(f"Are you sure you want to delete recipe for model '{model}'?"): + if not click.confirm( + f"Are you sure you want to delete recipe for model '{model}'?" + ): click.echo("Cancelled.") return with get_centml_ops_client() as ops_client: ops_client.delete_cserve_recipe(model=model) - click.echo(click.style(f"✓ Successfully deleted recipe for model: {model}", fg="green")) - + click.echo( + click.style(f"✓ Successfully deleted recipe for model: {model}", fg="green") + ) diff --git a/centml/cli/main.py b/centml/cli/main.py index 3227505..9f1f4e0 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -2,7 +2,11 @@ from centml.cli.login import login, logout from centml.cli.cluster import ls, get, delete, pause, resume -from centml.cli.cserve_recipe import update as recipe_update, delete as recipe_delete +from centml.cli.cserve_recipe import ( + update as recipe_update, + delete as recipe_delete, + list_recipes, +) @click.group() @@ -58,6 +62,7 @@ def cserve_recipe(): pass +cserve_recipe.add_command(list_recipes, name="list") cserve_recipe.add_command(recipe_update, name="update") cserve_recipe.add_command(recipe_delete, name="delete") diff --git a/centml/sdk/ops.py b/centml/sdk/ops.py index 549e424..a6e12af 100644 --- a/centml/sdk/ops.py +++ b/centml/sdk/ops.py @@ -1,13 +1,16 @@ from contextlib import contextmanager -from typing import Dict, Any +from typing import Dict, Any, Optional try: import platform_api_ops_client from platform_api_ops_client import OPSApi + OPS_CLIENT_AVAILABLE = True except ImportError: OPS_CLIENT_AVAILABLE = False +import platform_api_python_client + from centml.sdk import auth from centml.sdk.config import settings @@ -18,10 +21,45 @@ class CentMLOpsClient: Used for administrative tasks like managing CServe recipes. """ - def __init__(self, api: "OPSApi"): - self._api = api + def __init__( + self, + ops_api: Optional["OPSApi"] = None, + external_api: Optional[platform_api_python_client.EXTERNALApi] = None, + ): + self._ops_api = ops_api + self._external_api = external_api + + def get_cserve_recipes( + self, model: Optional[str] = None, hf_token: Optional[str] = None + ): + """ + Get CServe recipe configurations. + + Args: + model: Optional model name to filter recipes (e.g., "meta-llama/Llama-3.3-70B-Instruct") + hf_token: Optional HuggingFace token for private models - def update_cserve_recipes(self, cluster_id: int, platform_data: Dict[str, Dict[str, Dict[str, Any]]]): + Returns: + List of CServe recipe configurations + + Example: + with get_centml_ops_client() as ops_client: + # Get all recipes + all_recipes = ops_client.get_cserve_recipes() + + # Get recipes for a specific model + recipes = ops_client.get_cserve_recipes(model="meta-llama/Llama-3.3-70B-Instruct") + """ + if self._external_api is None: + raise RuntimeError("External API client not available") + + return self._external_api.get_cserve_recipe_deployments_cserve_recipes_get( + model=model, hf_token=hf_token + ).results + + def update_cserve_recipes( + self, cluster_id: int, platform_data: Dict[str, Dict[str, Dict[str, Any]]] + ): """ Update CServe recipes from platform_db.json performance data. @@ -46,7 +84,12 @@ def update_cserve_recipes(self, cluster_id: int, platform_data: Dict[str, Dict[s response = ops_client.update_cserve_recipes(cluster_id=1001, platform_data=platform_data) print(f"Processed: {response.processed_models}") """ - return self._api.update_cserve_recipes_ops_cserve_recipes_post( + if self._ops_api is None: + raise RuntimeError( + "OPS API client not available. Install platform-api-ops-client." + ) + + return self._ops_api.update_cserve_recipes_ops_cserve_recipes_post( cluster_id=cluster_id, request_body=platform_data ) @@ -64,31 +107,50 @@ def delete_cserve_recipe(self, model: str): with get_centml_ops_client() as ops_client: ops_client.delete_cserve_recipe(model="meta-llama/Llama-3.3-70B-Instruct") """ - return self._api.delete_cserve_recipe_ops_cserve_recipes_delete(model=model) + if self._ops_api is None: + raise RuntimeError( + "OPS API client not available. Install platform-api-ops-client." + ) + + return self._ops_api.delete_cserve_recipe_ops_cserve_recipes_delete(model=model) @contextmanager def get_centml_ops_client(): """ Context manager for CentML OPS API client. - Requires platform-api-ops-client to be installed. + + This client provides: + - get_cserve_recipes(): Read recipes (uses external API, always available) + - update_cserve_recipes(): Update recipes (requires platform-api-ops-client) + - delete_cserve_recipe(): Delete recipes (requires platform-api-ops-client) Usage: with get_centml_ops_client() as ops_client: + # Get recipes (always works) + recipes = ops_client.get_cserve_recipes(model="meta-llama/Llama-3.3-70B-Instruct") + + # Update/delete requires platform-api-ops-client response = ops_client.update_cserve_recipes(cluster_id=1001, platform_data=data) """ - if not OPS_CLIENT_AVAILABLE: - raise ImportError( - "platform-api-ops-client is required for OPS operations. " - "Install it with: pip install platform-api-ops-client" - ) - - configuration = platform_api_ops_client.Configuration( + configuration = platform_api_python_client.Configuration( host=settings.CENTML_PLATFORM_API_URL, access_token=auth.get_centml_token() ) - with platform_api_ops_client.ApiClient(configuration) as api_client: - api_instance = OPSApi(api_client) - - yield CentMLOpsClient(api_instance) - + # Always initialize external API for read operations + with platform_api_python_client.ApiClient(configuration) as external_client: + external_api = platform_api_python_client.EXTERNALApi(external_client) + + # Initialize OPS API if available for write operations + ops_api = None + if OPS_CLIENT_AVAILABLE: + ops_configuration = platform_api_ops_client.Configuration( + host=settings.CENTML_PLATFORM_API_URL, + access_token=auth.get_centml_token(), + ) + with platform_api_ops_client.ApiClient(ops_configuration) as ops_client: + ops_api = OPSApi(ops_client) + yield CentMLOpsClient(ops_api=ops_api, external_api=external_api) + else: + # Still provide read-only functionality even without ops client + yield CentMLOpsClient(ops_api=None, external_api=external_api) diff --git a/examples/sdk/manage_cserve_recipes.py b/examples/sdk/manage_cserve_recipes.py index 9a4ca23..9ce4599 100644 --- a/examples/sdk/manage_cserve_recipes.py +++ b/examples/sdk/manage_cserve_recipes.py @@ -2,17 +2,46 @@ Example demonstrating how to manage CServe recipes using the CentML SDK. This example shows how to: -1. Update CServe recipes from platform_db.json -2. Delete CServe recipes for specific models +1. List/Get CServe recipes (read-only, no special permissions needed) +2. Update CServe recipes from platform_db.json (requires OPS admin) +3. Delete CServe recipes for specific models (requires OPS admin) -Note: This requires platform-api-ops-client to be installed and -requires OPS admin permissions. +Note: Update and delete operations require platform-api-ops-client to be installed +and OPS admin permissions. Get/list operations work with just the base client. """ import json from centml.sdk.ops import get_centml_ops_client +def list_recipes_example(): + """List all CServe recipes or filter by model.""" + with get_centml_ops_client() as ops_client: + # List all recipes + all_recipes = ops_client.get_cserve_recipes() + print(f"Found {len(all_recipes)} recipes") + + for recipe in all_recipes[:3]: # Show first 3 + print(f"\nModel: {recipe.model}") + if recipe.fastest: + print( + f" Fastest - Hardware Instance: {recipe.fastest.hardware_instance_id}" + ) + if recipe.cheapest: + print( + f" Cheapest - Hardware Instance: {recipe.cheapest.hardware_instance_id}" + ) + + # Filter by specific model + model_name = "meta-llama/Llama-3.3-70B-Instruct" + specific_recipes = ops_client.get_cserve_recipes(model=model_name) + if specific_recipes: + print(f"\nRecipe for {model_name}:") + print( + f" Fastest config available: {specific_recipes[0].fastest is not None}" + ) + + def update_recipes_example(): """Update CServe recipes from platform_db.json file.""" # Load platform_db.json data @@ -25,15 +54,14 @@ def update_recipes_example(): # }, # ... # } - with open('platform_db.json', 'r') as f: + with open("platform_db.json", "r") as f: platform_data = json.load(f) cluster_id = 1001 # Replace with your cluster ID with get_centml_ops_client() as ops_client: response = ops_client.update_cserve_recipes( - cluster_id=cluster_id, - platform_data=platform_data + cluster_id=cluster_id, platform_data=platform_data ) print(f"Message: {response.message}") @@ -52,8 +80,15 @@ def delete_recipe_example(): def main(): - # Example 1: Update recipes from platform_db.json - print("=== Updating CServe Recipes ===") + # Example 1: List/Get recipes (read-only, always available) + print("=== Listing CServe Recipes ===") + try: + list_recipes_example() + except Exception as e: + print(f"Error listing recipes: {e}") + + # Example 2: Update recipes from platform_db.json (requires ops client) + print("\n=== Updating CServe Recipes ===") try: update_recipes_example() except FileNotFoundError: @@ -61,8 +96,8 @@ def main(): except Exception as e: print(f"Error updating recipes: {e}") + # Example 3: Delete a specific model's recipe (requires ops client) print("\n=== Deleting CServe Recipe ===") - # Example 2: Delete a specific model's recipe try: delete_recipe_example() except Exception as e: @@ -71,4 +106,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/requirements.txt b/requirements.txt index 63a6224..9787a30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,4 @@ prometheus-client>=0.20.0 scipy>=1.6.0 scikit-learn>=1.5.1 platform-api-python-client==4.1.9 -#platform-api-ops-client>=1.0.0 +# platform-api-ops-client>=1.0.0 # Optional: only needed for update/delete CServe recipes (OPS admin operations) From edb600654985840a8342d073c8d92854458f8b95 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Wed, 1 Oct 2025 16:46:37 -0400 Subject: [PATCH 4/8] implemented get cluster --- centml/cli/cserve_recipe.py | 28 ++++++++++++++++++++++++++++ centml/cli/main.py | 2 ++ centml/sdk/ops.py | 22 ++++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/centml/cli/cserve_recipe.py b/centml/cli/cserve_recipe.py index 6b311e8..e261844 100644 --- a/centml/cli/cserve_recipe.py +++ b/centml/cli/cserve_recipe.py @@ -87,6 +87,34 @@ def update(platform_db_file, cluster_id): sys.exit(1) +@click.command(help="List available clusters") +@handle_exception +def list_clusters(): + """ + List available clusters for the organization. + + Example: + centml cserve-recipe list-clusters + """ + with get_centml_ops_client() as ops_client: + clusters = ops_client.get_clusters() + + if not clusters: + click.echo("No clusters found.") + return + + click.echo( + f"\n{click.style('Available Clusters', bold=True, fg='cyan')} ({len(clusters)} found)\n" + ) + + for cluster in clusters: + click.echo(f"{click.style('Cluster ID:', bold=True)} {cluster.id}") + click.echo(f" Display Name: {cluster.display_name}") + if cluster.region: + click.echo(f" Region: {cluster.region}") + click.echo("") + + @click.command(help="List CServe recipes") @click.option( "--model", help="Filter by model name (e.g., 'meta-llama/Llama-3.3-70B-Instruct')" diff --git a/centml/cli/main.py b/centml/cli/main.py index 9f1f4e0..24ec986 100644 --- a/centml/cli/main.py +++ b/centml/cli/main.py @@ -6,6 +6,7 @@ update as recipe_update, delete as recipe_delete, list_recipes, + list_clusters, ) @@ -62,6 +63,7 @@ def cserve_recipe(): pass +cserve_recipe.add_command(list_clusters, name="list-clusters") cserve_recipe.add_command(list_recipes, name="list") cserve_recipe.add_command(recipe_update, name="update") cserve_recipe.add_command(recipe_delete, name="delete") diff --git a/centml/sdk/ops.py b/centml/sdk/ops.py index a6e12af..f9f2bb7 100644 --- a/centml/sdk/ops.py +++ b/centml/sdk/ops.py @@ -29,6 +29,24 @@ def __init__( self._ops_api = ops_api self._external_api = external_api + def get_clusters(self): + """ + Get available clusters for the organization. + + Returns: + List of cluster configurations with id, display_name, and region + + Example: + with get_centml_ops_client() as ops_client: + clusters = ops_client.get_clusters() + for cluster in clusters: + print(f"Cluster {cluster.id}: {cluster.display_name} ({cluster.region})") + """ + if self._external_api is None: + raise RuntimeError("External API client not available") + + return self._external_api.get_clusters_clusters_get().results + def get_cserve_recipes( self, model: Optional[str] = None, hf_token: Optional[str] = None ): @@ -121,12 +139,16 @@ def get_centml_ops_client(): Context manager for CentML OPS API client. This client provides: + - get_clusters(): Get available clusters (uses external API, always available) - get_cserve_recipes(): Read recipes (uses external API, always available) - update_cserve_recipes(): Update recipes (requires platform-api-ops-client) - delete_cserve_recipe(): Delete recipes (requires platform-api-ops-client) Usage: with get_centml_ops_client() as ops_client: + # Get clusters (always works) + clusters = ops_client.get_clusters() + # Get recipes (always works) recipes = ops_client.get_cserve_recipes(model="meta-llama/Llama-3.3-70B-Instruct") From b3f747bd0f5a46483607c8418594ad7906c7b12b Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 2 Oct 2025 10:01:20 -0400 Subject: [PATCH 5/8] implemented get cluster --- centml/sdk/config.py | 2 +- examples/sdk/get_clusters.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 examples/sdk/get_clusters.py diff --git a/centml/sdk/config.py b/centml/sdk/config.py index 0b4b215..843b285 100644 --- a/centml/sdk/config.py +++ b/centml/sdk/config.py @@ -8,7 +8,7 @@ class Config(BaseSettings): # It is possible to override the default values by setting the environment variables model_config = SettingsConfigDict(env_file=Path(".env")) - CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="http://localhost:5173") + CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="http://localhost:5173/") CENTML_CONFIG_PATH: str = os.getenv("CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml")) CENTML_CRED_FILE: str = os.getenv("CENTML_CRED_FILE", default="credentials.json") CENTML_CRED_FILE_PATH: str = os.path.join(CENTML_CONFIG_PATH, CENTML_CRED_FILE) diff --git a/examples/sdk/get_clusters.py b/examples/sdk/get_clusters.py new file mode 100644 index 0000000..aa4c33e --- /dev/null +++ b/examples/sdk/get_clusters.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +""" +Example: Get available clusters + +This example demonstrates how to retrieve cluster information +using the CentML SDK. +""" + +from centml.sdk.ops import get_centml_ops_client + + +def main(): + """Get and display cluster information""" + print("Retrieving cluster information...") + + with get_centml_ops_client() as ops_client: + # Get all clusters + clusters = ops_client.get_clusters() + + if not clusters: + print("No clusters found.") + return + + print(f"\nFound {len(clusters)} cluster(s):\n") + + for cluster in clusters: + print(f"Cluster ID: {cluster.id}") + print(f" Display Name: {cluster.display_name}") + print(f" Region: {cluster.region or 'N/A'}") + print() + + +if __name__ == "__main__": + main() From 4e8b73577078608dcf28bbd06b81e1e291c27216 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 2 Oct 2025 10:18:11 -0400 Subject: [PATCH 6/8] update endpoints --- centml/cli/cserve_recipe.py | 69 ++++++++++++++++++++++++++++++++---- centml/sdk/ops.py | 59 +++++++++++++++++++++++++++--- examples/sdk/get_clusters.py | 37 +++++++++++++++++-- 3 files changed, 150 insertions(+), 15 deletions(-) diff --git a/centml/cli/cserve_recipe.py b/centml/cli/cserve_recipe.py index e261844..0871890 100644 --- a/centml/cli/cserve_recipe.py +++ b/centml/cli/cserve_recipe.py @@ -88,30 +88,69 @@ def update(platform_db_file, cluster_id): @click.command(help="List available clusters") +@click.option( + "--show-hardware", + is_flag=True, + help="Show hardware instances for each cluster", +) @handle_exception -def list_clusters(): +def list_clusters(show_hardware): """ List available clusters for the organization. Example: centml cserve-recipe list-clusters + centml cserve-recipe list-clusters --show-hardware """ with get_centml_ops_client() as ops_client: - clusters = ops_client.get_clusters() + clusters_data = ops_client.get_clusters( + include_hardware_instances=show_hardware + ) - if not clusters: + if not clusters_data: click.echo("No clusters found.") return + # Handle different return types + if show_hardware: + clusters = [item["cluster"] for item in clusters_data] + else: + clusters = clusters_data + click.echo( f"\n{click.style('Available Clusters', bold=True, fg='cyan')} ({len(clusters)} found)\n" ) - for cluster in clusters: + for i, cluster in enumerate(clusters): click.echo(f"{click.style('Cluster ID:', bold=True)} {cluster.id}") click.echo(f" Display Name: {cluster.display_name}") if cluster.region: click.echo(f" Region: {cluster.region}") + + if show_hardware: + hw_instances = clusters_data[i]["hardware_instances"] + if hw_instances: + click.echo( + f" {click.style('Hardware Instances:', fg='yellow')} ({len(hw_instances)} available)" + ) + for hw in hw_instances: + gpu_info = ( + f"{hw.num_accelerators}x{hw.gpu_type}" + if hw.num_accelerators + else hw.gpu_type + ) + click.echo( + f" • {click.style(hw.name, fg='green')} (ID: {hw.id})" + ) + click.echo(f" GPU: {gpu_info}") + click.echo(f" CPU: {hw.cpu} cores, Memory: {hw.memory} GB") + if hw.cost_per_hr: + click.echo(f" Cost: ${hw.cost_per_hr/100:.2f}/hr") + else: + click.echo( + f" {click.style('Hardware Instances:', fg='yellow')} None" + ) + click.echo("") @@ -139,10 +178,26 @@ def list_recipes(model, hf_token): click.echo("No recipes found.") return + # Get all hardware instances to map IDs to names + hardware_instances = ops_client.get_hardware_instances() + hw_map = {hw.id: hw for hw in hardware_instances} + click.echo( f"\n{click.style('CServe Recipes', bold=True, fg='cyan')} ({len(recipes)} found)\n" ) + def format_hw_info(hw_id): + """Format hardware instance information""" + if hw_id in hw_map: + hw = hw_map[hw_id] + gpu_info = ( + f"{hw.num_accelerators}x{hw.gpu_type}" + if hw.num_accelerators + else hw.gpu_type + ) + return f"{hw.name} (ID: {hw_id}, {gpu_info})" + return f"ID: {hw_id}" + for recipe in recipes: click.echo(f"{click.style('Model:', bold=True)} {recipe.model}") @@ -150,7 +205,7 @@ def list_recipes(model, hf_token): if recipe.fastest: click.echo(f" {click.style('Fastest:', fg='green')}") click.echo( - f" Hardware Instance ID: {recipe.fastest.hardware_instance_id}" + f" Hardware: {format_hw_info(recipe.fastest.hardware_instance_id)}" ) click.echo(f" Recipe: {recipe.fastest.recipe.model}") if hasattr(recipe.fastest.recipe, "additional_properties"): @@ -170,7 +225,7 @@ def list_recipes(model, hf_token): ): click.echo(f" {click.style('Cheapest:', fg='yellow')}") click.echo( - f" Hardware Instance ID: {recipe.cheapest.hardware_instance_id}" + f" Hardware: {format_hw_info(recipe.cheapest.hardware_instance_id)}" ) # Display best_value configuration @@ -181,7 +236,7 @@ def list_recipes(model, hf_token): ): click.echo(f" {click.style('Best Value:', fg='blue')}") click.echo( - f" Hardware Instance ID: {recipe.best_value.hardware_instance_id}" + f" Hardware: {format_hw_info(recipe.best_value.hardware_instance_id)}" ) click.echo("") # Empty line between recipes diff --git a/centml/sdk/ops.py b/centml/sdk/ops.py index f9f2bb7..39d9b8f 100644 --- a/centml/sdk/ops.py +++ b/centml/sdk/ops.py @@ -29,23 +29,72 @@ def __init__( self._ops_api = ops_api self._external_api = external_api - def get_clusters(self): + def get_clusters(self, include_hardware_instances: bool = False): """ Get available clusters for the organization. + Args: + include_hardware_instances: If True, also fetch hardware instances for each cluster + Returns: - List of cluster configurations with id, display_name, and region + If include_hardware_instances=False: List of cluster configurations + If include_hardware_instances=True: List of dicts with 'cluster' and 'hardware_instances' keys Example: with get_centml_ops_client() as ops_client: + # Get clusters only clusters = ops_client.get_clusters() - for cluster in clusters: - print(f"Cluster {cluster.id}: {cluster.display_name} ({cluster.region})") + + # Get clusters with hardware instances + clusters_with_hw = ops_client.get_clusters(include_hardware_instances=True) + for item in clusters_with_hw: + cluster = item['cluster'] + print(f"Cluster {cluster.id}: {cluster.display_name}") + for hw in item['hardware_instances']: + print(f" - {hw.name}: {hw.num_accelerators}x{hw.gpu_type}") + """ + if self._external_api is None: + raise RuntimeError("External API client not available") + + clusters = self._external_api.get_clusters_clusters_get().results + + if include_hardware_instances: + result = [] + for cluster in clusters: + hw_instances = ( + self._external_api.get_hardware_instances_hardware_instances_get( + cluster_id=cluster.id + ).results + ) + result.append({"cluster": cluster, "hardware_instances": hw_instances}) + return result + + return clusters + + def get_hardware_instances(self, cluster_id: Optional[int] = None): + """ + Get hardware instances, optionally filtered by cluster. + + Args: + cluster_id: Optional cluster ID to filter hardware instances + + Returns: + List of hardware instance configurations + + Example: + with get_centml_ops_client() as ops_client: + # Get all hardware instances + all_hw = ops_client.get_hardware_instances() + + # Get hardware instances for specific cluster + cluster_hw = ops_client.get_hardware_instances(cluster_id=1000) """ if self._external_api is None: raise RuntimeError("External API client not available") - return self._external_api.get_clusters_clusters_get().results + return self._external_api.get_hardware_instances_hardware_instances_get( + cluster_id=cluster_id + ).results def get_cserve_recipes( self, model: Optional[str] = None, hf_token: Optional[str] = None diff --git a/examples/sdk/get_clusters.py b/examples/sdk/get_clusters.py index aa4c33e..7421356 100644 --- a/examples/sdk/get_clusters.py +++ b/examples/sdk/get_clusters.py @@ -3,7 +3,7 @@ Example: Get available clusters This example demonstrates how to retrieve cluster information -using the CentML SDK. +using the CentML SDK, with and without hardware instance details. """ from centml.sdk.ops import get_centml_ops_client @@ -11,10 +11,13 @@ def main(): """Get and display cluster information""" - print("Retrieving cluster information...") with get_centml_ops_client() as ops_client: - # Get all clusters + # Example 1: Get clusters (basic information) + print("=" * 60) + print("Example 1: Get Clusters (Basic Info)") + print("=" * 60) + clusters = ops_client.get_clusters() if not clusters: @@ -29,6 +32,34 @@ def main(): print(f" Region: {cluster.region or 'N/A'}") print() + # Example 2: Get clusters with hardware instances + print("\n" + "=" * 60) + print("Example 2: Get Clusters (With Hardware Instances)") + print("=" * 60) + + clusters_with_hw = ops_client.get_clusters(include_hardware_instances=True) + + for item in clusters_with_hw: + cluster = item["cluster"] + hw_instances = item["hardware_instances"] + + print(f"\nCluster: {cluster.display_name} (ID: {cluster.id})") + + if hw_instances: + print(f" Hardware Instances ({len(hw_instances)} available):") + for hw in hw_instances: + gpu_info = ( + f"{hw.num_accelerators}x{hw.gpu_type}" + if hw.num_accelerators + else hw.gpu_type + ) + print(f" • {hw.name} (ID: {hw.id})") + print(f" GPU: {gpu_info}") + print(f" CPU: {hw.cpu} cores, Memory: {hw.memory} GB") + print(f" Cost: ${hw.cost_per_hr/100:.2f}/hr") + else: + print(" No hardware instances available") + if __name__ == "__main__": main() From a8d4e6722ae890f66a6b3da056ade9185b93a4a2 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 2 Oct 2025 10:58:09 -0400 Subject: [PATCH 7/8] add changes, add hang fix --- centml/cli/cserve_recipe.py | 60 ++++++++++++++++++++++++------------- centml/sdk/config.py | 25 +++++++++++----- centml/sdk/ops.py | 7 +++-- 3 files changed, 62 insertions(+), 30 deletions(-) diff --git a/centml/cli/cserve_recipe.py b/centml/cli/cserve_recipe.py index 0871890..fd94f60 100644 --- a/centml/cli/cserve_recipe.py +++ b/centml/cli/cserve_recipe.py @@ -179,8 +179,17 @@ def list_recipes(model, hf_token): return # Get all hardware instances to map IDs to names - hardware_instances = ops_client.get_hardware_instances() - hw_map = {hw.id: hw for hw in hardware_instances} + try: + hardware_instances = ops_client.get_hardware_instances() + hw_map = {hw.id: hw for hw in hardware_instances} + except Exception as e: + click.echo( + click.style( + f"Warning: Could not fetch hardware instance details: {e}", + fg="yellow", + ) + ) + hw_map = {} click.echo( f"\n{click.style('CServe Recipes', bold=True, fg='cyan')} ({len(recipes)} found)\n" @@ -196,7 +205,8 @@ def format_hw_info(hw_id): else hw.gpu_type ) return f"{hw.name} (ID: {hw_id}, {gpu_info})" - return f"ID: {hw_id}" + else: + return f"ID: {hw_id} {click.style('(details not found)', fg='yellow')}" for recipe in recipes: click.echo(f"{click.style('Model:', bold=True)} {recipe.model}") @@ -218,26 +228,34 @@ def format_hw_info(hw_id): click.echo(f" Parallelism: TP={tp_size}, PP={pp_size}") # Display cheapest configuration - if ( - recipe.cheapest - and recipe.cheapest.hardware_instance_id - != recipe.fastest.hardware_instance_id - ): - click.echo(f" {click.style('Cheapest:', fg='yellow')}") - click.echo( - f" Hardware: {format_hw_info(recipe.cheapest.hardware_instance_id)}" - ) + if recipe.cheapest: + if ( + recipe.cheapest.hardware_instance_id + != recipe.fastest.hardware_instance_id + ): + click.echo(f" {click.style('Cheapest:', fg='yellow')}") + click.echo( + f" Hardware: {format_hw_info(recipe.cheapest.hardware_instance_id)}" + ) + else: + click.echo( + f" {click.style('Cheapest:', fg='yellow')} Same as Fastest" + ) # Display best_value configuration - if ( - recipe.best_value - and recipe.best_value.hardware_instance_id - != recipe.fastest.hardware_instance_id - ): - click.echo(f" {click.style('Best Value:', fg='blue')}") - click.echo( - f" Hardware: {format_hw_info(recipe.best_value.hardware_instance_id)}" - ) + if recipe.best_value: + if ( + recipe.best_value.hardware_instance_id + != recipe.fastest.hardware_instance_id + ): + click.echo(f" {click.style('Best Value:', fg='blue')}") + click.echo( + f" Hardware: {format_hw_info(recipe.best_value.hardware_instance_id)}" + ) + else: + click.echo( + f" {click.style('Best Value:', fg='blue')} Same as Fastest" + ) click.echo("") # Empty line between recipes diff --git a/centml/sdk/config.py b/centml/sdk/config.py index 843b285..d732249 100644 --- a/centml/sdk/config.py +++ b/centml/sdk/config.py @@ -8,20 +8,31 @@ class Config(BaseSettings): # It is possible to override the default values by setting the environment variables model_config = SettingsConfigDict(env_file=Path(".env")) - CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="http://localhost:5173/") - CENTML_CONFIG_PATH: str = os.getenv("CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml")) + CENTML_WEB_URL: str = os.getenv("CENTML_WEB_URL", default="https://app.centml.org/") + CENTML_CONFIG_PATH: str = os.getenv( + "CENTML_CONFIG_PATH", default=os.path.expanduser("~/.centml") + ) CENTML_CRED_FILE: str = os.getenv("CENTML_CRED_FILE", default="credentials.json") CENTML_CRED_FILE_PATH: str = os.path.join(CENTML_CONFIG_PATH, CENTML_CRED_FILE) - CENTML_PLATFORM_API_URL: str = os.getenv("CENTML_PLATFORM_API_URL", default="http://localhost:16000") + CENTML_PLATFORM_API_URL: str = os.getenv( + "CENTML_PLATFORM_API_URL", default="https://api.centml.org" + ) - CENTML_WORKOS_CLIENT_ID: str = os.getenv("CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWVNBMQ5FVC777FQZC661") + CENTML_WORKOS_CLIENT_ID: str = os.getenv( + "CENTML_WORKOS_CLIENT_ID", default="client_01JP5TWVNBMQ5FVC777FQZC661" + ) # Long-term credentials - can be set via environment variables - CENTML_SERVICE_ACCOUNT_SECRET: Optional[str] = os.getenv("CENTML_SERVICE_ACCOUNT_SECRET", default=None) - CENTML_SERVICE_ACCOUNT_ID: Optional[str] = os.getenv("CENTML_SERVICE_ACCOUNT_ID", default=None) + CENTML_SERVICE_ACCOUNT_SECRET: Optional[str] = os.getenv( + "CENTML_SERVICE_ACCOUNT_SECRET", default=None + ) + CENTML_SERVICE_ACCOUNT_ID: Optional[str] = os.getenv( + "CENTML_SERVICE_ACCOUNT_ID", default=None + ) CENTML_SERVICE_ACCOUNT_TOKEN_URL: str = os.getenv( - "CENTML_SERVICE_ACCOUNT_TOKEN_URL", default="https://signin.centml.com/oauth2/token" + "CENTML_SERVICE_ACCOUNT_TOKEN_URL", + default="https://signin.centml.com/oauth2/token", ) diff --git a/centml/sdk/ops.py b/centml/sdk/ops.py index 39d9b8f..ac74387 100644 --- a/centml/sdk/ops.py +++ b/centml/sdk/ops.py @@ -204,8 +204,11 @@ def get_centml_ops_client(): # Update/delete requires platform-api-ops-client response = ops_client.update_cserve_recipes(cluster_id=1001, platform_data=data) """ + # Get token once and reuse it to avoid potential hanging on second call + access_token = auth.get_centml_token() + configuration = platform_api_python_client.Configuration( - host=settings.CENTML_PLATFORM_API_URL, access_token=auth.get_centml_token() + host=settings.CENTML_PLATFORM_API_URL, access_token=access_token ) # Always initialize external API for read operations @@ -217,7 +220,7 @@ def get_centml_ops_client(): if OPS_CLIENT_AVAILABLE: ops_configuration = platform_api_ops_client.Configuration( host=settings.CENTML_PLATFORM_API_URL, - access_token=auth.get_centml_token(), + access_token=access_token, # Reuse the same token ) with platform_api_ops_client.ApiClient(ops_configuration) as ops_client: ops_api = OPSApi(ops_client) From 074393f4c0ccd42fb295dedce994b0ce7eaa7cc7 Mon Sep 17 00:00:00 2001 From: Honglin Cao Date: Thu, 2 Oct 2025 11:50:59 -0400 Subject: [PATCH 8/8] remove comments --- centml/sdk/ops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/centml/sdk/ops.py b/centml/sdk/ops.py index ac74387..4abb0ae 100644 --- a/centml/sdk/ops.py +++ b/centml/sdk/ops.py @@ -204,7 +204,6 @@ def get_centml_ops_client(): # Update/delete requires platform-api-ops-client response = ops_client.update_cserve_recipes(cluster_id=1001, platform_data=data) """ - # Get token once and reuse it to avoid potential hanging on second call access_token = auth.get_centml_token() configuration = platform_api_python_client.Configuration( @@ -220,7 +219,7 @@ def get_centml_ops_client(): if OPS_CLIENT_AVAILABLE: ops_configuration = platform_api_ops_client.Configuration( host=settings.CENTML_PLATFORM_API_URL, - access_token=access_token, # Reuse the same token + access_token=access_token, ) with platform_api_ops_client.ApiClient(ops_configuration) as ops_client: ops_api = OPSApi(ops_client)