@@ -98,15 +98,18 @@ const sharedTextEncoder = new TextEncoder();
9898
9999// This threshold should be determined by benchmarking, which might vary in engines and input data.
100100// Run `npx ts-node benchmark/encode-string.ts` for details.
101+ // For mixed content (ASCII + CJK + emoji), JS wins for strLength < 30-50.
102+ // After that, WASM or TextEncoder is faster depending on content type.
101103const TEXT_ENCODER_THRESHOLD = 50 ;
102104
103105export function utf8EncodeTE ( str : string , output : Uint8Array , outputOffset : number ) : void {
104106 sharedTextEncoder . encodeInto ( str , output . subarray ( outputOffset ) ) ;
105107}
106108
107- // Wasm threshold: use wasm for medium strings, TextEncoder for large strings
108- // These thresholds should be determined by benchmarking.
109- // Run `npx ts-node benchmark/encode-string.ts` for details.
109+ // Wasm threshold: use wasm for medium strings, TextEncoder for large strings.
110+ // For pure ASCII, TextEncoder is ~1.7x faster at 100+ strLength.
111+ // For CJK/emoji, WASM is ~1.4-1.6x faster than TextEncoder at all sizes.
112+ // 1000 is a compromise for mixed content.
110113const WASM_ENCODE_MAX = 1000 ;
111114
112115function utf8EncodeWithWasm ( str : string , output : Uint8Array , outputOffset : number ) : void {
@@ -187,14 +190,19 @@ const sharedTextDecoder = new TextDecoder();
187190
188191// This threshold should be determined by benchmarking, which might vary in engines and input data.
189192// Run `npx ts-node benchmark/decode-string.ts` for details.
190- const TEXT_DECODER_THRESHOLD = 200 ;
193+ // For mixed content (ASCII + CJK + emoji), JS wins for very short strings only.
194+ // WASM becomes superior at ~30-50 bytes for non-ASCII content.
195+ const TEXT_DECODER_THRESHOLD = 50 ;
191196
192197export function utf8DecodeTD ( bytes : Uint8Array , inputOffset : number , byteLength : number ) : string {
193198 const stringBytes = bytes . subarray ( inputOffset , inputOffset + byteLength ) ;
194199 return sharedTextDecoder . decode ( stringBytes ) ;
195200}
196201
197- // Wasm decode threshold: use wasm for medium strings, TextDecoder for large strings
202+ // Wasm decode threshold: use wasm for medium strings, TextDecoder for large strings.
203+ // For pure ASCII, TextDecoder is ~5x faster at 1000+ bytes.
204+ // For CJK/emoji, WASM is ~5-6x faster than TextDecoder at all sizes.
205+ // 1000 is a compromise for mixed content.
198206const WASM_DECODE_MAX = 1000 ;
199207
200208function utf8DecodeWithWasm ( bytes : Uint8Array , inputOffset : number , byteLength : number ) : string {
0 commit comments