-
Notifications
You must be signed in to change notification settings - Fork 617
Use ReadOnlyMemory<byte> for binary data to eliminate UTF-16 transcoding #1070
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
7dca602
4287694
8e6fcf0
e405dfc
ebe3eef
1d76c11
39213fb
a4ba4a9
b4e4eaf
61f0dfa
5de847c
dc01608
2fc35d9
14fedca
b3eacff
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -281,9 +281,9 @@ public static IList<PromptMessage> ToPromptMessages(this ChatMessage chatMessage | |
| { | ||
| TextContentBlock textContent => new TextContent(textContent.Text), | ||
|
|
||
| ImageContentBlock imageContent => new DataContent(Convert.FromBase64String(imageContent.Data), imageContent.MimeType), | ||
| ImageContentBlock imageContent => new DataContent(imageContent.DecodedData, imageContent.MimeType), | ||
|
|
||
| AudioContentBlock audioContent => new DataContent(Convert.FromBase64String(audioContent.Data), audioContent.MimeType), | ||
| AudioContentBlock audioContent => new DataContent(audioContent.DecodedData, audioContent.MimeType), | ||
|
|
||
| EmbeddedResourceBlock resourceContent => resourceContent.Resource.ToAIContent(), | ||
|
|
||
|
|
@@ -324,7 +324,7 @@ public static AIContent ToAIContent(this ResourceContents content) | |
|
|
||
| AIContent ac = content switch | ||
| { | ||
| BlobResourceContents blobResource => new DataContent(Convert.FromBase64String(blobResource.Blob), blobResource.MimeType ?? "application/octet-stream"), | ||
| BlobResourceContents blobResource => new DataContent(blobResource.Data, blobResource.MimeType ?? "application/octet-stream"), | ||
| TextResourceContents textResource => new TextContent(textResource.Text), | ||
| _ => throw new NotSupportedException($"Resource type '{content.GetType().Name}' is not supported.") | ||
| }; | ||
|
|
@@ -401,21 +401,21 @@ public static ContentBlock ToContentBlock(this AIContent content, JsonSerializer | |
|
|
||
| DataContent dataContent when dataContent.HasTopLevelMediaType("image") => new ImageContentBlock | ||
| { | ||
| Data = dataContent.Base64Data.ToString(), | ||
| Data = GetUtf8Bytes(dataContent.Base64Data.Span), | ||
| MimeType = dataContent.MediaType, | ||
| }, | ||
|
|
||
| DataContent dataContent when dataContent.HasTopLevelMediaType("audio") => new AudioContentBlock | ||
| { | ||
| Data = dataContent.Base64Data.ToString(), | ||
| Data = GetUtf8Bytes(dataContent.Base64Data.Span), | ||
| MimeType = dataContent.MediaType, | ||
| }, | ||
|
|
||
| DataContent dataContent => new EmbeddedResourceBlock | ||
| { | ||
| Resource = new BlobResourceContents | ||
| { | ||
| Blob = dataContent.Base64Data.ToString(), | ||
| Blob = GetUtf8Bytes(dataContent.Base64Data.Span), | ||
| MimeType = dataContent.MediaType, | ||
| Uri = string.Empty, | ||
| } | ||
|
|
@@ -448,6 +448,22 @@ public static ContentBlock ToContentBlock(this AIContent content, JsonSerializer | |
| contentBlock.Meta = content.AdditionalProperties?.ToJsonObject(options); | ||
|
|
||
| return contentBlock; | ||
|
|
||
| unsafe byte[] GetUtf8Bytes(ReadOnlySpan<char> utf16) | ||
| { | ||
| // gets UTF-8 bytes from UTF-16 chars without intermediate string allocations | ||
| fixed (char* pChars = utf16) | ||
| { | ||
| var byteCount = System.Text.Encoding.UTF8.GetByteCount(pChars, utf16.Length); | ||
| var bytes = new byte[byteCount]; | ||
|
|
||
| fixed (byte* pBytes = bytes) | ||
| { | ||
| System.Text.Encoding.UTF8.GetBytes(pChars, utf16.Length, pBytes, byteCount); | ||
| } | ||
| return bytes; | ||
| } | ||
|
Comment on lines
+454
to
+465
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot, this shouldn't require unsafe code on .NET Core... there are overloads that work with spans, so this can be e.g. byte[] bytes = new byte[Encoding.UTF8.GetByteCount(utf16)]
Encoding.UTF8.GetBytes(utf16, bytes);
return bytes;For the netstandard2.0 build, you can add polyfills for these methods in https://github.com/modelcontextprotocol/csharp-sdk/tree/main/src/Common/Polyfills/System
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had used the unsafe code to avoid the ifdef, I agree a polyfill approach should work. Thanks for suggestion. |
||
| } | ||
| } | ||
|
|
||
| private sealed class ToolAIFunctionDeclaration(Tool tool) : AIFunctionDeclaration | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -1,4 +1,7 @@ | ||||||||
| using System.Buffers; | ||||||||
| using System.Buffers.Text; | ||||||||
| using System.Diagnostics; | ||||||||
| using System.Runtime.InteropServices; | ||||||||
| using System.Text.Json.Serialization; | ||||||||
|
|
||||||||
| namespace ModelContextProtocol.Protocol; | ||||||||
|
|
@@ -9,8 +12,8 @@ namespace ModelContextProtocol.Protocol; | |||||||
| /// <remarks> | ||||||||
| /// <para> | ||||||||
| /// <see cref="BlobResourceContents"/> is used when binary data needs to be exchanged through | ||||||||
| /// the Model Context Protocol. The binary data is represented as a base64-encoded string | ||||||||
| /// in the <see cref="Blob"/> property. | ||||||||
| /// the Model Context Protocol. The binary data is represented as base64-encoded UTF-8 bytes | ||||||||
| /// in the <see cref="Blob"/> property, providing a zero-copy representation of the wire payload. | ||||||||
| /// </para> | ||||||||
| /// <para> | ||||||||
| /// This class inherits from <see cref="ResourceContents"/>, which also has a sibling implementation | ||||||||
|
|
@@ -24,18 +27,101 @@ namespace ModelContextProtocol.Protocol; | |||||||
| [DebuggerDisplay("{DebuggerDisplay,nq}")] | ||||||||
| public sealed class BlobResourceContents : ResourceContents | ||||||||
| { | ||||||||
| private ReadOnlyMemory<byte>? _decodedData; | ||||||||
| private ReadOnlyMemory<byte> _blob; | ||||||||
|
|
||||||||
| /// <summary> | ||||||||
| /// Creates an <see cref="BlobResourceContents"/> from raw data. | ||||||||
| /// </summary> | ||||||||
| /// <param name="data">The raw data.</param> | ||||||||
| /// <param name="uri">The URI of the data.</param> | ||||||||
| /// <param name="mimeType">The optional MIME type of the data.</param> | ||||||||
| /// <returns>A new <see cref="BlobResourceContents"/> instance.</returns> | ||||||||
| /// <exception cref="InvalidOperationException"></exception> | ||||||||
| public static BlobResourceContents FromData(ReadOnlyMemory<byte> data, string uri, string? mimeType = null) | ||||||||
| { | ||||||||
| ReadOnlyMemory<byte> blob; | ||||||||
|
|
||||||||
| // Encode directly to UTF-8 base64 bytes without string intermediate | ||||||||
| int maxLength = Base64.GetMaxEncodedToUtf8Length(data.Length); | ||||||||
| byte[] buffer = new byte[maxLength]; | ||||||||
| if (Base64.EncodeToUtf8(data.Span, buffer, out _, out int bytesWritten) == OperationStatus.Done) | ||||||||
| { | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We expect bytesWritten to always equal buffer.Length, right? Should we assert that? |
||||||||
| blob = buffer.AsMemory(0, bytesWritten); | ||||||||
| } | ||||||||
| else | ||||||||
| { | ||||||||
| throw new InvalidOperationException("Failed to encode binary data to base64"); | ||||||||
| } | ||||||||
|
|
||||||||
| return new() | ||||||||
| { | ||||||||
| _decodedData = data, | ||||||||
| Blob = blob, | ||||||||
| MimeType = mimeType, | ||||||||
| Uri = uri | ||||||||
| }; | ||||||||
| } | ||||||||
|
|
||||||||
| /// <summary> | ||||||||
| /// Gets or sets the base64-encoded string representing the binary data of the item. | ||||||||
| /// Gets or sets the base64-encoded UTF-8 bytes representing the binary data of the item. | ||||||||
| /// </summary> | ||||||||
| /// <remarks> | ||||||||
| /// This is a zero-copy representation of the wire payload of this item. Setting this value will invalidate any cached value of <see cref="Data"/>. | ||||||||
| /// </remarks> | ||||||||
| [JsonPropertyName("blob")] | ||||||||
| public required string Blob { get; set; } | ||||||||
| public required ReadOnlyMemory<byte> Blob | ||||||||
| { | ||||||||
| get => _blob; | ||||||||
| set | ||||||||
| { | ||||||||
| _blob = value; | ||||||||
| _decodedData = null; // Invalidate cache | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| /// <summary> | ||||||||
| /// Gets or sets the decoded data represented by <see cref="Blob"/>. | ||||||||
| /// </summary> | ||||||||
| /// <remarks> | ||||||||
| /// <para> | ||||||||
| /// When getting, this member will decode the value in <see cref="Blob"/> and cache the result. | ||||||||
| /// Subsequent accesses return the cached value unless <see cref="Blob"/> is modified. | ||||||||
| /// </para> | ||||||||
| /// <para> | ||||||||
| /// When setting, the binary data is stored without copying and <see cref="Blob"/> is updated | ||||||||
| /// with the base64-encoded UTF-8 representation. | ||||||||
| /// </para> | ||||||||
| /// </remarks> | ||||||||
| [JsonIgnore] | ||||||||
| public ReadOnlyMemory<byte> Data | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ericstj why do you call this Data here but DecodedData elsewhere like on ContentBlock?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can change it. The change was made here 4287694#diff-9da29ad32a17c90e9b28c79ad3ab9c4b370a757e26657c736020ea47314a60ba I had assumed we would have liked to use I agree that consistency is better here - lets use
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think there's value in keeping the names of these C# properties aligned with their spec / JSON schema counterparts. I don't know why they differ in the MCP spec, but alas. |
||||||||
| { | ||||||||
| get | ||||||||
| { | ||||||||
| if (_decodedData is null) | ||||||||
| { | ||||||||
| // Decode directly from UTF-8 base64 bytes without string intermediate | ||||||||
| int maxLength = Base64.GetMaxDecodedFromUtf8Length(Blob.Length); | ||||||||
| byte[] buffer = new byte[maxLength]; | ||||||||
| if (Base64.DecodeFromUtf8(Blob.Span, buffer, out _, out int bytesWritten) == System.Buffers.OperationStatus.Done) | ||||||||
| { | ||||||||
| _decodedData = buffer.AsMemory(0, bytesWritten); | ||||||||
| } | ||||||||
| else | ||||||||
| { | ||||||||
| throw new FormatException("Invalid base64 data"); | ||||||||
| } | ||||||||
| } | ||||||||
| return _decodedData.Value; | ||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| [DebuggerBrowsable(DebuggerBrowsableState.Never)] | ||||||||
| private string DebuggerDisplay | ||||||||
| { | ||||||||
| get | ||||||||
| { | ||||||||
| string lengthDisplay = DebuggerDisplayHelper.GetBase64LengthDisplay(Blob); | ||||||||
| string lengthDisplay = _decodedData is null ? DebuggerDisplayHelper.GetBase64LengthDisplay(Blob) : $"{Data.Length} bytes"; | ||||||||
| string mimeInfo = MimeType is not null ? $", MimeType = {MimeType}" : ""; | ||||||||
| return $"Uri = \"{Uri}\"{mimeInfo}, Length = {lengthDisplay}"; | ||||||||
| } | ||||||||
|
|
||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot, we prefer to use simple rather than fully-qualified names. Can these "System.Text"s be moved instead to usings at the beginning?