Skip to content

Commit 03f1be9

Browse files
committed
Reworked the rune operations to not spam char[1 or 2] allocations and leverage simd for BMP-only strings
1 parent 0fd8306 commit 03f1be9

File tree

4 files changed

+84
-30
lines changed

4 files changed

+84
-30
lines changed

src/FirebirdSql.Data.FirebirdClient/Client/Managed/Version10/GdsStatement.cs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,16 +1533,7 @@ protected object ReadRawValue(IXdrReader xdr, DbField field)
15331533
else
15341534
{
15351535
var s = xdr.ReadString(innerCharset, field.Length);
1536-
var runes = s.EnumerateRunesToChars().ToList();
1537-
if ((field.Length % field.Charset.BytesPerCharacter) == 0 &&
1538-
runes.Count > field.CharCount)
1539-
{
1540-
return new string([.. runes.Take(field.CharCount).SelectMany(x => x)]);
1541-
}
1542-
else
1543-
{
1544-
return s;
1545-
}
1536+
return TruncateStringByRuneCount(s, field);
15461537
}
15471538

15481539
case DbDataType.VarChar:
@@ -1631,16 +1622,7 @@ protected async ValueTask<object> ReadRawValueAsync(IXdrReader xdr, DbField fiel
16311622
else
16321623
{
16331624
var s = await xdr.ReadStringAsync(innerCharset, field.Length, cancellationToken).ConfigureAwait(false);
1634-
var runes = s.EnumerateRunesToChars().ToList();
1635-
if ((field.Length % field.Charset.BytesPerCharacter) == 0 &&
1636-
runes.Count > field.CharCount)
1637-
{
1638-
return new string([.. runes.Take(field.CharCount).SelectMany(x => x)]);
1639-
}
1640-
else
1641-
{
1642-
return s;
1643-
}
1625+
return TruncateStringByRuneCount(s, field);
16441626
}
16451627

16461628
case DbDataType.VarChar:
@@ -1797,6 +1779,22 @@ protected virtual async ValueTask<DbValue[]> ReadRowAsync(CancellationToken canc
17971779
return row;
17981780
}
17991781

1782+
private static string TruncateStringByRuneCount(string s, DbField field)
1783+
{
1784+
if ((field.Length % field.Charset.BytesPerCharacter) != 0)
1785+
{
1786+
return s;
1787+
}
1788+
1789+
var runeCount = s.CountRunes();
1790+
if (runeCount <= field.CharCount)
1791+
{
1792+
return s;
1793+
}
1794+
1795+
return new string(s.TruncateStringToRuneCount(field.CharCount));
1796+
}
1797+
18001798
#endregion
18011799

18021800
#region Protected Internal Methods

src/FirebirdSql.Data.FirebirdClient/Common/DbField.cs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -325,12 +325,12 @@ public void SetValue(byte[] buffer)
325325
else
326326
{
327327
var s = Charset.GetString(buffer, 0, buffer.Length);
328-
329-
var runes = s.EnumerateRunesToChars().ToList();
330-
if ((Length % Charset.BytesPerCharacter) == 0 &&
331-
runes.Count > CharCount)
332-
{
333-
s = new string([.. runes.Take(CharCount).SelectMany(x => x)]);
328+
if((Length % Charset.BytesPerCharacter) == 0)
329+
{
330+
var runes = s.CountRunes();
331+
if(runes > CharCount) {
332+
s = new string(s.TruncateStringToRuneCount(CharCount));
333+
}
334334
}
335335

336336
DbValue.SetValue(s);

src/FirebirdSql.Data.FirebirdClient/Common/DbValue.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ public byte[] GetBytes()
424424
else
425425
{
426426
var svalue = GetString();
427-
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.EnumerateRunesToChars().Count() > Field.CharCount)
427+
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.CountRunes() > Field.CharCount)
428428
{
429429
throw IscException.ForErrorCodes(new[] { IscCodes.isc_arith_except, IscCodes.isc_string_truncation });
430430
}
@@ -460,7 +460,7 @@ public byte[] GetBytes()
460460
else
461461
{
462462
var svalue = GetString();
463-
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.EnumerateRunesToChars().Count() > Field.CharCount)
463+
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.CountRunes() > Field.CharCount)
464464
{
465465
throw IscException.ForErrorCodes(new[] { IscCodes.isc_arith_except, IscCodes.isc_string_truncation });
466466
}
@@ -639,7 +639,7 @@ public async ValueTask<byte[]> GetBytesAsync(CancellationToken cancellationToken
639639
else
640640
{
641641
var svalue = await GetStringAsync(cancellationToken).ConfigureAwait(false);
642-
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.EnumerateRunesToChars().Count() > Field.CharCount)
642+
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.CountRunes() > Field.CharCount)
643643
{
644644
throw IscException.ForErrorCodes(new[] { IscCodes.isc_arith_except, IscCodes.isc_string_truncation });
645645
}
@@ -675,7 +675,7 @@ public async ValueTask<byte[]> GetBytesAsync(CancellationToken cancellationToken
675675
else
676676
{
677677
var svalue = await GetStringAsync(cancellationToken).ConfigureAwait(false);
678-
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.EnumerateRunesToChars().Count() > Field.CharCount)
678+
if ((Field.Length % Field.Charset.BytesPerCharacter) == 0 && svalue.CountRunes() > Field.CharCount)
679679
{
680680
throw IscException.ForErrorCodes(new[] { IscCodes.isc_arith_except, IscCodes.isc_string_truncation });
681681
}

src/FirebirdSql.Data.FirebirdClient/Common/Extensions.cs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,60 @@ public static Encoding GetANSIEncoding()
102102
}
103103
}
104104
}
105+
106+
public static int CountRunes(this ReadOnlySpan<char> text)
107+
{
108+
var length = text.Length;
109+
if(length == 0)
110+
return 0;
111+
112+
var i = text.IndexOfAnyInRange('\uD800', '\uDBFF');
113+
if(i < 0)
114+
return length;
115+
116+
var count = i;
117+
while(i < length)
118+
{
119+
if(char.IsHighSurrogate(text[i]) && i + 1 < length && char.IsLowSurrogate(text[i + 1]))
120+
{
121+
i += 2;
122+
}
123+
else
124+
{
125+
i++;
126+
}
127+
count++;
128+
}
129+
return count;
130+
}
131+
132+
public static ReadOnlySpan<char> TruncateStringToRuneCount(this ReadOnlySpan<char> text, int maxRuneCount)
133+
{
134+
if(maxRuneCount <= 0 || text.IsEmpty)
135+
return ReadOnlySpan<char>.Empty;
136+
137+
var length = text.Length;
138+
if(maxRuneCount >= length)
139+
return text;
140+
141+
var prefix = text[..maxRuneCount];
142+
var i = prefix.IndexOfAnyInRange('\uD800', '\uDBFF');
143+
if(i < 0)
144+
return prefix;
145+
146+
var remaining = maxRuneCount - i;
147+
while(i < length && remaining > 0)
148+
{
149+
if(char.IsHighSurrogate(text[i]) && i + 1 < length && char.IsLowSurrogate(text[i + 1]))
150+
{
151+
i += 2;
152+
}
153+
else
154+
{
155+
i++;
156+
}
157+
remaining--;
158+
}
159+
return text[..i];
160+
}
105161
}

0 commit comments

Comments
 (0)