diff --git a/dotnet/agent-framework-dotnet.slnx b/dotnet/agent-framework-dotnet.slnx index 2db09a8b98..e194f6a5d0 100644 --- a/dotnet/agent-framework-dotnet.slnx +++ b/dotnet/agent-framework-dotnet.slnx @@ -143,6 +143,7 @@ + diff --git a/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/AgentWithMemory_Step05_BoundedChatHistory.csproj b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/AgentWithMemory_Step05_BoundedChatHistory.csproj new file mode 100644 index 0000000000..860089b621 --- /dev/null +++ b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/AgentWithMemory_Step05_BoundedChatHistory.csproj @@ -0,0 +1,22 @@ + + + + Exe + net10.0 + + enable + enable + + + + + + + + + + + + + + diff --git a/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/BoundedChatHistoryProvider.cs b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/BoundedChatHistoryProvider.cs new file mode 100644 index 0000000000..9118350a77 --- /dev/null +++ b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/BoundedChatHistoryProvider.cs @@ -0,0 +1,128 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Agents.AI; +using Microsoft.Extensions.AI; +using Microsoft.Extensions.VectorData; + +namespace SampleApp; + +/// +/// A that keeps a bounded window of recent messages in session state +/// (via ) and overflows older messages to a vector store +/// (via ). When providing chat history, it searches the vector +/// store for relevant older messages and prepends them as a memory context message. +/// +/// +/// Only non-system messages are counted towards the session state limit and overflow mechanism. System messages are always retained in session state and are not included in the vector store. +/// Function calls and function results are also dropped when truncation happens, both from in-memory state, and they are also not persisted to the vector store. +/// +internal sealed class BoundedChatHistoryProvider : ChatHistoryProvider, IDisposable +{ + private readonly InMemoryChatHistoryProvider _inMemoryProvider; + private readonly ChatHistoryMemoryProvider _memoryProvider; + private readonly TruncatingChatReducer _reducer; + private readonly string _contextPrompt; + + /// + /// Initializes a new instance of the class. + /// + /// The maximum number of non-system messages to keep in session state before overflowing to the vector store. + /// The vector store to use for storing and retrieving overflow chat history. + /// The name of the collection for storing overflow chat history in the vector store. + /// The number of dimensions to use for the chat history vector store embeddings. + /// A delegate that initializes the memory provider state, providing the storage and search scopes. + /// Optional prompt to prefix memory search results. Defaults to a standard memory context prompt. + public BoundedChatHistoryProvider( + int maxSessionMessages, + VectorStore vectorStore, + string collectionName, + int vectorDimensions, + Func stateInitializer, + string? contextPrompt = null) + { + if (maxSessionMessages < 0) + { + throw new ArgumentOutOfRangeException(nameof(maxSessionMessages), "maxSessionMessages must be non-negative."); + } + + this._reducer = new TruncatingChatReducer(maxSessionMessages); + this._inMemoryProvider = new InMemoryChatHistoryProvider(new InMemoryChatHistoryProviderOptions + { + ChatReducer = this._reducer, + ReducerTriggerEvent = InMemoryChatHistoryProviderOptions.ChatReducerTriggerEvent.AfterMessageAdded, + StorageInputMessageFilter = msgs => msgs, + }); + this._memoryProvider = new ChatHistoryMemoryProvider( + vectorStore, + collectionName, + vectorDimensions, + stateInitializer, + options: new ChatHistoryMemoryProviderOptions + { + SearchInputMessageFilter = msgs => msgs, + StorageInputMessageFilter = msgs => msgs, + }); + this._contextPrompt = contextPrompt + ?? "The following are memories from earlier in this conversation. Use them to inform your responses:"; + } + + /// + protected override async ValueTask> ProvideChatHistoryAsync( + InvokingContext context, + CancellationToken cancellationToken = default) + { + // Delegate to the inner provider's full lifecycle (retrieve, filter, stamp, merge with request messages). + var allMessages = await this._inMemoryProvider.InvokingAsync(context, cancellationToken).ConfigureAwait(false); + + // Search the vector store for relevant older messages. + var aiContext = new AIContext { Messages = context.RequestMessages.ToList() }; + var invokingContext = new AIContextProvider.InvokingContext( + context.Agent, context.Session, aiContext); + + var result = await this._memoryProvider.InvokingAsync(invokingContext, cancellationToken).ConfigureAwait(false); + + // Extract only the messages added by the memory provider (stamped with AIContextProvider source type). + var memoryMessages = result.Messages? + .Where(m => m.GetAgentRequestMessageSourceType() == AgentRequestMessageSourceType.AIContextProvider) + .ToList(); + + if (memoryMessages is { Count: > 0 }) + { + var memoryText = string.Join("\n", memoryMessages.Select(m => m.Text).Where(t => !string.IsNullOrWhiteSpace(t))); + + if (!string.IsNullOrWhiteSpace(memoryText)) + { + var contextMessage = new ChatMessage(ChatRole.User, $"{this._contextPrompt}\n{memoryText}"); + return new[] { contextMessage }.Concat(allMessages); + } + } + + return allMessages; + } + + /// + protected override async ValueTask StoreChatHistoryAsync( + InvokedContext context, + CancellationToken cancellationToken = default) + { + // Delegate storage to the in-memory provider. Its TruncatingChatReducer (AfterMessageAdded trigger) + // will automatically truncate to the configured maximum and expose any removed messages. + var innerContext = new InvokedContext( + context.Agent, context.Session, context.RequestMessages, context.ResponseMessages!); + await this._inMemoryProvider.InvokedAsync(innerContext, cancellationToken).ConfigureAwait(false); + + // Archive any messages that the reducer removed to the vector store. + if (this._reducer.RemovedMessages is { Count: > 0 }) + { + var overflowContext = new AIContextProvider.InvokedContext( + context.Agent, context.Session, this._reducer.RemovedMessages, []); + await this._memoryProvider.InvokedAsync(overflowContext, cancellationToken).ConfigureAwait(false); + } + } + + /// + public void Dispose() + { + this._memoryProvider.Dispose(); + } +} diff --git a/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/Program.cs b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/Program.cs new file mode 100644 index 0000000000..ab3a0376eb --- /dev/null +++ b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/Program.cs @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft. All rights reserved. + +// This sample shows how to create a bounded chat history provider that keeps a configurable number of +// recent messages in session state and automatically overflows older messages to a vector store. +// When the agent is invoked, it searches the vector store for relevant older messages and +// prepends them as a "memory" context message before the recent session history. + +using Azure.AI.OpenAI; +using Azure.Identity; +using Microsoft.Agents.AI; +using Microsoft.Extensions.AI; +using Microsoft.Extensions.VectorData; +using Microsoft.SemanticKernel.Connectors.InMemory; +using OpenAI.Chat; +using SampleApp; + +var endpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("AZURE_OPENAI_ENDPOINT is not set."); +var deploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT_NAME") ?? "gpt-4o-mini"; +var embeddingDeploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME") ?? "text-embedding-3-large"; + +// WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production. +// In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid +// latency issues, unintended credential probing, and potential security risks from fallback mechanisms. +var credential = new DefaultAzureCredential(); + +// Create a vector store to store overflow chat messages. +// For demonstration purposes, we are using an in-memory vector store. +// Replace this with a persistent vector store implementation for production scenarios. +VectorStore vectorStore = new InMemoryVectorStore(new InMemoryVectorStoreOptions() +{ + EmbeddingGenerator = new AzureOpenAIClient(new Uri(endpoint), credential) + .GetEmbeddingClient(embeddingDeploymentName) + .AsIEmbeddingGenerator() +}); + +var sessionId = Guid.NewGuid().ToString(); + +// Create the BoundedChatHistoryProvider with a maximum of 4 non-system messages in session state. +// It internally creates an InMemoryChatHistoryProvider with a TruncatingChatReducer and a +// ChatHistoryMemoryProvider with the correct configuration to ensure overflow messages are +// automatically archived to the vector store and recalled via semantic search. +var boundedProvider = new BoundedChatHistoryProvider( + maxSessionMessages: 4, + vectorStore, + collectionName: "chathistory-overflow", + vectorDimensions: 3072, + session => new ChatHistoryMemoryProvider.State( + storageScope: new() { UserId = "UID1", SessionId = sessionId }, + searchScope: new() { UserId = "UID1" })); + +// Create the agent with the bounded chat history provider. +AIAgent agent = new AzureOpenAIClient(new Uri(endpoint), credential) + .GetChatClient(deploymentName) + .AsAIAgent(new ChatClientAgentOptions + { + ChatOptions = new() { Instructions = "You are a helpful assistant. Answer questions concisely." }, + Name = "Assistant", + ChatHistoryProvider = boundedProvider, + }); + +// Start a conversation. The first several exchanges will fill up the session state window. +AgentSession session = await agent.CreateSessionAsync(); + +Console.WriteLine("--- Filling the session window (4 messages max) ---\n"); + +Console.WriteLine(await agent.RunAsync("My favorite color is blue.", session)); +Console.WriteLine(await agent.RunAsync("I have a dog named Max.", session)); + +// At this point the session state holds 4 messages (2 user + 2 assistant). +// The next exchange will push the oldest messages into the vector store. +Console.WriteLine("\n--- Next exchange will trigger overflow to vector store ---\n"); + +Console.WriteLine(await agent.RunAsync("What is the capital of France?", session)); + +// The oldest messages about favorite color have now been archived to the vector store. +// Ask the agent something that requires recalling the overflowed information. +Console.WriteLine("\n--- Asking about overflowed information (should recall from vector store) ---\n"); + +Console.WriteLine(await agent.RunAsync("What is my favorite color?", session)); diff --git a/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/README.md b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/README.md new file mode 100644 index 0000000000..c1e35f5a88 --- /dev/null +++ b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/README.md @@ -0,0 +1,40 @@ +# Bounded Chat History with Vector Store Overflow + +This sample demonstrates how to create a custom `ChatHistoryProvider` that keeps a bounded window of recent messages in session state and automatically overflows older messages to a vector store. When the agent is invoked, it searches the vector store for relevant older messages and prepends them as memory context. + +## Concepts + +- **`TruncatingChatReducer`**: A custom `IChatReducer` that keeps the most recent N messages and exposes removed messages via a `RemovedMessages` property. +- **`BoundedChatHistoryProvider`**: A custom `ChatHistoryProvider` that composes: + - `InMemoryChatHistoryProvider` for fast session-state storage (bounded by the reducer) + - `ChatHistoryMemoryProvider` for vector-store overflow and semantic search of older messages + +## Prerequisites + +- [.NET 10 SDK](https://dotnet.microsoft.com/download/dotnet/10.0) +- An Azure OpenAI resource with: + - A chat deployment (e.g., `gpt-4o-mini`) + - An embedding deployment (e.g., `text-embedding-3-large`) + +## Configuration + +Set the following environment variables: + +| Variable | Description | Default | +|---|---|---| +| `AZURE_OPENAI_ENDPOINT` | Your Azure OpenAI endpoint URL | *(required)* | +| `AZURE_OPENAI_DEPLOYMENT_NAME` | Chat model deployment name | `gpt-4o-mini` | +| `AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME` | Embedding model deployment name | `text-embedding-3-large` | + +## Running the Sample + +```bash +dotnet run +``` + +## How it Works + +1. The agent starts a conversation with a bounded session window of 4 non-system, non-function messages (i.e., user/assistant turns). System messages are always preserved, and function call/result messages are truncated and not preserved. +2. As messages accumulate beyond the limit, the `TruncatingChatReducer` removes the oldest messages. +3. The `BoundedChatHistoryProvider` detects the removed messages and stores them in a vector store via `ChatHistoryMemoryProvider`. +4. On subsequent invocations, the provider searches the vector store for relevant older messages and prepends them as memory context, allowing the agent to recall information from earlier in the conversation. diff --git a/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/TruncatingChatReducer.cs b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/TruncatingChatReducer.cs new file mode 100644 index 0000000000..b32df40dd7 --- /dev/null +++ b/dotnet/samples/GettingStarted/AgentWithMemory/AgentWithMemory_Step05_BoundedChatHistory/TruncatingChatReducer.cs @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft. All rights reserved. + +using Microsoft.Extensions.AI; + +namespace SampleApp; + +/// +/// A truncating chat reducer that keeps the most recent messages up to a configured maximum, +/// preserving any leading system message. Removed messages are exposed via +/// so that a caller can archive them (e.g. to a vector store). +/// +internal sealed class TruncatingChatReducer : IChatReducer +{ + private readonly int _maxMessages; + + /// + /// Initializes a new instance of the class. + /// + /// The maximum number of non-system messages to retain. + public TruncatingChatReducer(int maxMessages) + { + this._maxMessages = maxMessages > 0 ? maxMessages : throw new ArgumentOutOfRangeException(nameof(maxMessages)); + } + + /// + /// Gets the messages that were removed during the most recent call to . + /// + public IReadOnlyList RemovedMessages { get; private set; } = []; + + /// + public Task> ReduceAsync(IEnumerable messages, CancellationToken cancellationToken) + { + _ = messages ?? throw new ArgumentNullException(nameof(messages)); + + ChatMessage? systemMessage = null; + Queue retained = new(capacity: this._maxMessages); + List removed = []; + + foreach (var message in messages) + { + if (message.Role == ChatRole.System) + { + // Preserve the first system message outside the counting window. + systemMessage ??= message; + } + else if (!message.Contents.Any(c => c is FunctionCallContent or FunctionResultContent)) + { + if (retained.Count >= this._maxMessages) + { + removed.Add(retained.Dequeue()); + } + + retained.Enqueue(message); + } + } + + this.RemovedMessages = removed; + + IEnumerable result = systemMessage is not null + ? new[] { systemMessage }.Concat(retained) + : retained; + + return Task.FromResult(result); + } +} diff --git a/dotnet/samples/GettingStarted/AgentWithMemory/README.md b/dotnet/samples/GettingStarted/AgentWithMemory/README.md index 4f35adcd09..33958ec118 100644 --- a/dotnet/samples/GettingStarted/AgentWithMemory/README.md +++ b/dotnet/samples/GettingStarted/AgentWithMemory/README.md @@ -8,5 +8,6 @@ These samples show how to create an agent with the Agent Framework that uses Mem |[Memory with MemoryStore](./AgentWithMemory_Step02_MemoryUsingMem0/)|This sample demonstrates how to create and run an agent that uses the Mem0 service to extract and retrieve individual memories.| |[Custom Memory Implementation](./AgentWithMemory_Step03_CustomMemory/)|This sample demonstrates how to create a custom memory component and attach it to an agent.| |[Memory with Azure AI Foundry](./AgentWithMemory_Step04_MemoryUsingFoundry/)|This sample demonstrates how to create and run an agent that uses Azure AI Foundry's managed memory service to extract and retrieve individual memories.| +|[Bounded Chat History with Overflow](./AgentWithMemory_Step05_BoundedChatHistory/)|This sample demonstrates how to create a bounded chat history provider that overflows older messages to a vector store and recalls them as memories.| > **See also**: [Memory Search with Foundry Agents](../FoundryAgents/FoundryAgents_Step26_MemorySearch/) - demonstrates using the built-in Memory Search tool with Azure Foundry Agents.