From edb80054094baa869edd87187b755f3f73f4b920 Mon Sep 17 00:00:00 2001 From: Simon Cropp Date: Sat, 27 Jun 2026 11:23:54 +1000 Subject: [PATCH 1/2] Align stream wrappers with the dotnet 11 implementation (follow-up to #562) StringStream and ReadOnlySequenceStream diverged from dotnet/runtime#126669 in ways that hurt performance on the cases those types exist for. Bring them in line and port the PR's tests. StringStream - Encode incrementally via a stateful Encoder (fast-path single-shot, spillover buffer for sub-scalar reads, final flush) instead of encoding the whole text into one buffer on first read. Peak memory is now bounded by the caller's read buffer rather than the full encoded length, matching the BCL. - Span-based Encoder.Convert/Encoding.GetBytes on net core 2.1 / netstandard 2.1+, array-based equivalents on older TFMs. ReadOnlySequenceStream - Track a SequencePosition cursor alongside the absolute position so Read/ReadByte advance from the current segment instead of re-slicing from the start every call (was O(segments^2) for multi-segment sequences). Seek/Position reposition the cursor forward from the current spot, walking from the start only for back jumps. - Override CopyTo/CopyToAsync to write segments directly to the destination. CopyTo is gated to net core 2.1 / netstandard 2.1+ (Stream.CopyTo(Stream, int) is not virtual on older TFMs); CopyToAsync applies everywhere. ReadOnlyMemoryStream / WritableMemoryStream are unchanged: they are already at parity for array-backed memory, and matching the BCL for native (non-array) memory would require MemoryStream internals unavailable outside CoreLib. Tests - Port the explicit unit tests from the PR (StringStream encodings, surrogate pairs, chunk boundaries, GetMaxByteCount overflow guard, memory-slice/char-array ctors; WritableMemoryStream capacity/seek/overwrite cases) into the TUnit suite, plus ReadOnlySequenceStream CopyTo/CopyToAsync coverage. Regenerated Split + assemblySize; public API surface unchanged. Verified across all 22 TFMs (Consume) and passing on net10.0 and net48. --- assemblySize.include.md | 84 +++---- src/Polyfill/ReadOnlySequenceStream.cs | 162 +++++++++++-- src/Polyfill/StringStream.cs | 192 +++++++++++++--- src/Split/net10.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/net10.0/StringStream.cs | 139 +++++++++--- src/Split/net11.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/net11.0/StringStream.cs | 139 +++++++++--- src/Split/net461/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/net461/StringStream.cs | 141 +++++++++--- src/Split/net462/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/net462/StringStream.cs | 141 +++++++++--- src/Split/net47/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/net47/StringStream.cs | 141 +++++++++--- src/Split/net471/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/net471/StringStream.cs | 141 +++++++++--- src/Split/net472/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/net472/StringStream.cs | 141 +++++++++--- src/Split/net48/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/net48/StringStream.cs | 141 +++++++++--- src/Split/net481/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/net481/StringStream.cs | 141 +++++++++--- src/Split/net5.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/net5.0/StringStream.cs | 139 +++++++++--- src/Split/net6.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/net6.0/StringStream.cs | 139 +++++++++--- src/Split/net7.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/net7.0/StringStream.cs | 139 +++++++++--- src/Split/net8.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/net8.0/StringStream.cs | 139 +++++++++--- src/Split/net9.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/net9.0/StringStream.cs | 139 +++++++++--- .../netcoreapp2.0/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/netcoreapp2.0/StringStream.cs | 141 +++++++++--- .../netcoreapp2.1/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/netcoreapp2.1/StringStream.cs | 139 +++++++++--- .../netcoreapp2.2/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/netcoreapp2.2/StringStream.cs | 139 +++++++++--- .../netcoreapp3.0/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/netcoreapp3.0/StringStream.cs | 139 +++++++++--- .../netcoreapp3.1/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/netcoreapp3.1/StringStream.cs | 139 +++++++++--- .../netstandard2.0/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/netstandard2.0/StringStream.cs | 141 +++++++++--- .../netstandard2.1/ReadOnlySequenceStream.cs | 112 +++++++-- src/Split/netstandard2.1/StringStream.cs | 139 +++++++++--- src/Split/uap10.0/ReadOnlySequenceStream.cs | 111 +++++++-- src/Split/uap10.0/StringStream.cs | 141 +++++++++--- .../PolyfillTests_ReadOnlyMemoryStream.cs | 11 + .../PolyfillTests_ReadOnlySequenceStream.cs | 80 +++++++ src/Tests/PolyfillTests_StringStream.cs | 214 ++++++++++++++++++ .../PolyfillTests_WritableMemoryStream.cs | 72 ++++++ 51 files changed, 5314 insertions(+), 1033 deletions(-) diff --git a/assemblySize.include.md b/assemblySize.include.md index 27bcf822..82bb6d98 100644 --- a/assemblySize.include.md +++ b/assemblySize.include.md @@ -2,51 +2,51 @@ | | Empty Assembly | With Polyfill | Diff | Ensure | ArgumentExceptions | StringInterpolation | Nullability | |----------------|----------------|---------------|-----------|-----------|--------------------|---------------------|-------------| -| netstandard2.0 | 8.0KB | 347.5KB | +339.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | -| netstandard2.1 | 8.5KB | 302.0KB | +293.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | -| net461 | 8.5KB | 346.5KB | +338.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net462 | 7.0KB | 350.0KB | +343.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net47 | 7.0KB | 349.5KB | +342.5KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | -| net471 | 8.5KB | 349.0KB | +340.5KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | -| net472 | 8.5KB | 347.5KB | +339.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net48 | 8.5KB | 347.5KB | +339.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net481 | 8.5KB | 347.5KB | +339.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp2.0 | 9.0KB | 323.5KB | +314.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp2.1 | 9.0KB | 304.5KB | +295.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp2.2 | 9.0KB | 304.5KB | +295.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp3.0 | 9.5KB | 297.0KB | +287.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | -| netcoreapp3.1 | 9.5KB | 295.5KB | +286.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net5.0 | 9.5KB | 259.0KB | +249.5KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | -| net6.0 | 10.0KB | 201.0KB | +191.0KB | +10.0KB | +6.5KB | +512bytes | +3.5KB | -| net7.0 | 10.0KB | 163.5KB | +153.5KB | +9.0KB | +5.5KB | +512bytes | +3.0KB | -| net8.0 | 9.5KB | 135.0KB | +125.5KB | +8.0KB | | +512bytes | +3.0KB | -| net9.0 | 9.5KB | 88.5KB | +79.0KB | +8.5KB | | +512bytes | +3.5KB | -| net10.0 | 10.0KB | 66.0KB | +56.0KB | +9.0KB | | +512bytes | +3.5KB | -| net11.0 | 10.0KB | 27.0KB | +17.0KB | +9.0KB | | +1.0KB | +3.5KB | +| netstandard2.0 | 8.0KB | 351.5KB | +343.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| netstandard2.1 | 8.5KB | 306.0KB | +297.5KB | +8.5KB | +6.0KB | +9.0KB | +13.5KB | +| net461 | 8.5KB | 350.0KB | +341.5KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | +| net462 | 7.0KB | 353.5KB | +346.5KB | +9.0KB | +6.5KB | +9.5KB | +13.5KB | +| net47 | 7.0KB | 353.5KB | +346.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| net471 | 8.5KB | 352.5KB | +344.0KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | +| net472 | 8.5KB | 351.5KB | +343.0KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | +| net48 | 8.5KB | 351.5KB | +343.0KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | +| net481 | 8.5KB | 351.5KB | +343.0KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | +| netcoreapp2.0 | 9.0KB | 327.5KB | +318.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| netcoreapp2.1 | 9.0KB | 308.0KB | +299.0KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| netcoreapp2.2 | 9.0KB | 308.0KB | +299.0KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| netcoreapp3.0 | 9.5KB | 301.0KB | +291.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| netcoreapp3.1 | 9.5KB | 299.0KB | +289.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| net5.0 | 9.5KB | 263.0KB | +253.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| net6.0 | 10.0KB | 205.0KB | +195.0KB | +9.5KB | +6.5KB | +512bytes | +3.0KB | +| net7.0 | 10.0KB | 167.0KB | +157.0KB | +9.5KB | +5.5KB | +1.0KB | +3.5KB | +| net8.0 | 9.5KB | 138.5KB | +129.0KB | +8.5KB | +512bytes | +512bytes | +3.5KB | +| net9.0 | 9.5KB | 92.5KB | +83.0KB | +8.5KB | | +512bytes | +3.5KB | +| net10.0 | 10.0KB | 70.0KB | +60.0KB | +9.0KB | | +512bytes | +3.5KB | +| net11.0 | 10.0KB | 31.5KB | +21.5KB | +9.0KB | | +512bytes | +3.5KB | ### Assembly Sizes with EmbedUntrackedSources | | Empty Assembly | With Polyfill | Diff | Ensure | ArgumentExceptions | StringInterpolation | Nullability | |----------------|----------------|---------------|-----------|-----------|--------------------|---------------------|-------------| -| netstandard2.0 | 8.0KB | 507.8KB | +499.8KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | -| netstandard2.1 | 8.5KB | 436.6KB | +428.1KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | -| net461 | 8.5KB | 507.8KB | +499.3KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net462 | 7.0KB | 511.3KB | +504.3KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net47 | 7.0KB | 510.6KB | +503.6KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | -| net471 | 8.5KB | 509.7KB | +501.2KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | -| net472 | 8.5KB | 507.2KB | +498.7KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net48 | 8.5KB | 507.2KB | +498.7KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net481 | 8.5KB | 507.2KB | +498.7KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp2.0 | 9.0KB | 473.2KB | +464.2KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp2.1 | 9.0KB | 442.8KB | +433.8KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp2.2 | 9.0KB | 442.8KB | +433.8KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp3.0 | 9.5KB | 426.5KB | +417.0KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | -| netcoreapp3.1 | 9.5KB | 425.0KB | +415.5KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net5.0 | 9.5KB | 370.3KB | +360.8KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | -| net6.0 | 10.0KB | 292.3KB | +282.3KB | +17.7KB | +8.2KB | +1.1KB | +4.2KB | -| net7.0 | 10.0KB | 236.2KB | +226.2KB | +16.6KB | +6.9KB | +1.1KB | +3.7KB | -| net8.0 | 9.5KB | 192.7KB | +183.2KB | +15.5KB | +299bytes | +1.1KB | +3.7KB | -| net9.0 | 9.5KB | 125.2KB | +115.7KB | +16.0KB | | +1.1KB | +4.2KB | -| net10.0 | 10.0KB | 94.4KB | +84.4KB | +16.5KB | | +1.1KB | +4.2KB | -| net11.0 | 10.0KB | 41.3KB | +31.3KB | +16.5KB | | +1.6KB | +4.2KB | +| netstandard2.0 | 8.0KB | 513.0KB | +505.0KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| netstandard2.1 | 8.5KB | 441.7KB | +433.2KB | +16.2KB | +7.7KB | +13.9KB | +18.9KB | +| net461 | 8.5KB | 512.5KB | +504.0KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | +| net462 | 7.0KB | 516.0KB | +509.0KB | +16.7KB | +8.2KB | +14.4KB | +18.9KB | +| net47 | 7.0KB | 515.8KB | +508.8KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| net471 | 8.5KB | 514.4KB | +505.9KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | +| net472 | 8.5KB | 512.4KB | +503.9KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | +| net48 | 8.5KB | 512.4KB | +503.9KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | +| net481 | 8.5KB | 512.4KB | +503.9KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | +| netcoreapp2.0 | 9.0KB | 478.4KB | +469.4KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| netcoreapp2.1 | 9.0KB | 447.4KB | +438.4KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| netcoreapp2.2 | 9.0KB | 447.4KB | +438.4KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| netcoreapp3.0 | 9.5KB | 431.6KB | +422.1KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| netcoreapp3.1 | 9.5KB | 429.6KB | +420.1KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| net5.0 | 9.5KB | 375.4KB | +365.9KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| net6.0 | 10.0KB | 297.4KB | +287.4KB | +17.2KB | +8.2KB | +1.1KB | +3.7KB | +| net7.0 | 10.0KB | 240.8KB | +230.8KB | +17.1KB | +6.9KB | +1.6KB | +4.2KB | +| net8.0 | 9.5KB | 197.3KB | +187.8KB | +16.0KB | +811bytes | +1.1KB | +4.2KB | +| net9.0 | 9.5KB | 130.3KB | +120.8KB | +16.0KB | | +1.1KB | +4.2KB | +| net10.0 | 10.0KB | 99.5KB | +89.5KB | +16.5KB | | +1.1KB | +4.2KB | +| net11.0 | 10.0KB | 46.9KB | +36.9KB | +16.5KB | | +1.1KB | +4.2KB | diff --git a/src/Polyfill/ReadOnlySequenceStream.cs b/src/Polyfill/ReadOnlySequenceStream.cs index be82092c..b2a2bbc6 100644 --- a/src/Polyfill/ReadOnlySequenceStream.cs +++ b/src/Polyfill/ReadOnlySequenceStream.cs @@ -23,6 +23,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; @@ -46,6 +47,9 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + // Incremental cursor into the sequence's segments, kept in sync with the absolute position. + // Advancing from this cursor avoids re-walking the segment list from the start on every read. + SequencePosition cursor; long position; bool disposed; @@ -53,8 +57,12 @@ sealed class ReadOnlySequenceStream : /// Initializes a new instance of the class over the specified . /// //Link: https://learn.microsoft.com/en-us/dotnet/api/system.buffers.readonlysequencestream.-ctor?view=net-11.0 - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; @@ -91,7 +99,7 @@ public override long Position throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } @@ -106,7 +114,7 @@ public override int Read(byte[] buffer, int offset, int count) return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { @@ -114,6 +122,7 @@ public override int Read(byte[] buffer, int offset, int count) } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -128,7 +137,8 @@ public override int ReadByte() return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -137,20 +147,14 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// @@ -166,16 +170,147 @@ public override long Seek(long offset, SeekOrigin origin) _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + // Repositions the segment cursor to the given absolute position, advancing forward from the + // current cursor when possible and only walking from the start for backward jumps. + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + + position = value; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + // Stream.CopyTo(Stream, int) only became virtual in netcoreapp2.1/netstandard2.1. On older + // targets it cannot be overridden, so the base implementation (which routes through the + // cursor-based Read above) is used instead. + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + + if (position >= sequence.Length) + { + return; + } + + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + + cursor = sequence.End; + position = sequence.Length; + } +#endif + + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + + return CopyToAsyncCore(destination, cancellationToken); + } + + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + + cursor = sequence.End; + position = sequence.Length; + } + + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return destination.WriteAsync(segment, cancellationToken).AsTask(); +#else + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); +#endif + } + +#if !(NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER) + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } +#endif + + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } + /// public override void Flush() { @@ -198,6 +333,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } diff --git a/src/Polyfill/StringStream.cs b/src/Polyfill/StringStream.cs index 87cd6090..095d19c5 100644 --- a/src/Polyfill/StringStream.cs +++ b/src/Polyfill/StringStream.cs @@ -32,8 +32,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -49,10 +51,23 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + // Lazily created on the encoder slow path. The single-shot fast path in ReadCore + // uses stateless Encoding.GetBytes and never touches this field. + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + // Spillover buffer for multi-byte encodings: when the caller's buffer is too small to hold even + // one encoded scalar (for example ReadByte with UTF-16), the bytes are encoded into this buffer + // and served across subsequent reads. Also holds final encoder flush bytes when the caller's + // buffer had no room left. + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; + /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -71,6 +86,7 @@ public StringStream(string text, Encoding encoding) this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// @@ -86,6 +102,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// @@ -113,49 +130,156 @@ public override long Position set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() + /// + public override int Read(byte[] buffer, int offset, int count) { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - - return encoded; + GuardRange(buffer, offset, count); + return ReadCore(buffer, offset, count); } /// - public override int Read(byte[] buffer, int offset, int count) + public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + + int ReadCore(byte[] buffer, int offset, int count) { - GuardRange(buffer, offset, count); ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) { return 0; } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; - } + // Fast path: nothing emitted yet and the caller's buffer is guaranteed large enough to hold + // the entire encoded payload in a single shot. Encoding.GetBytes is stateless and emits any + // reset/shift sequences required by stateful encodings for a complete conversion, so the + // encoder can be marked flushed without ever being allocated. The overflow guard keeps + // Encoding.GetMaxByteCount from overflowing int for very large inputs. + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) + { + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; + } - /// - public override int ReadByte() - { - ThrowIfDisposed(); + var totalBytesWritten = 0; + + // Drain any pending bytes left over from a previous partial read. + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + + // If the caller's buffer may be too small for even one encoded scalar, encode into the + // spillover buffer first, then copy what fits. The array based Encoder.Convert throws + // when the output cannot hold a single complete encoded character. + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + // Encode directly into the caller's buffer. Only flush on the final block so encoder + // state is preserved for stateful encodings. + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } - var all = GetEncoded(); - if (position < all.Length) + // All input chars are consumed but the encoder has not been flushed: emit any remaining + // encoder state (for example stateful reset sequences). Flush into the spillover buffer, + // which is always large enough, then copy what fits into whatever room the caller has left. + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) { - return all[position++]; + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } } - return -1; + return totalBytesWritten; + } + + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + + // Stateless single-shot encode of the whole text into the caller's buffer at byteIndex. + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); +#else + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); +#endif + } + + // Stateful incremental encode of chars into bytes[byteIndex..byteIndex+byteCount]. + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); +#else + // The array based Encoder.Convert overload (available on every TFM) requires a char[]. Cap the + // slice at byteCount chars: a complete char never encodes to fewer than one byte, so the + // encoder can never consume more chars than there are output bytes, and bounding the copy this + // way keeps a streamed read linear rather than quadratic. + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); +#endif } /// @@ -170,7 +294,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net10.0/ReadOnlySequenceStream.cs b/src/Split/net10.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net10.0/ReadOnlySequenceStream.cs +++ b/src/Split/net10.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net10.0/StringStream.cs b/src/Split/net10.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net10.0/StringStream.cs +++ b/src/Split/net10.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net11.0/ReadOnlySequenceStream.cs b/src/Split/net11.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net11.0/ReadOnlySequenceStream.cs +++ b/src/Split/net11.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net11.0/StringStream.cs b/src/Split/net11.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net11.0/StringStream.cs +++ b/src/Split/net11.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net461/ReadOnlySequenceStream.cs b/src/Split/net461/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net461/ReadOnlySequenceStream.cs +++ b/src/Split/net461/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net461/StringStream.cs b/src/Split/net461/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net461/StringStream.cs +++ b/src/Split/net461/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net462/ReadOnlySequenceStream.cs b/src/Split/net462/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net462/ReadOnlySequenceStream.cs +++ b/src/Split/net462/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net462/StringStream.cs b/src/Split/net462/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net462/StringStream.cs +++ b/src/Split/net462/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net47/ReadOnlySequenceStream.cs b/src/Split/net47/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net47/ReadOnlySequenceStream.cs +++ b/src/Split/net47/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net47/StringStream.cs b/src/Split/net47/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net47/StringStream.cs +++ b/src/Split/net47/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net471/ReadOnlySequenceStream.cs b/src/Split/net471/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net471/ReadOnlySequenceStream.cs +++ b/src/Split/net471/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net471/StringStream.cs b/src/Split/net471/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net471/StringStream.cs +++ b/src/Split/net471/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net472/ReadOnlySequenceStream.cs b/src/Split/net472/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net472/ReadOnlySequenceStream.cs +++ b/src/Split/net472/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net472/StringStream.cs b/src/Split/net472/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net472/StringStream.cs +++ b/src/Split/net472/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net48/ReadOnlySequenceStream.cs b/src/Split/net48/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net48/ReadOnlySequenceStream.cs +++ b/src/Split/net48/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net48/StringStream.cs b/src/Split/net48/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net48/StringStream.cs +++ b/src/Split/net48/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net481/ReadOnlySequenceStream.cs b/src/Split/net481/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net481/ReadOnlySequenceStream.cs +++ b/src/Split/net481/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net481/StringStream.cs b/src/Split/net481/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net481/StringStream.cs +++ b/src/Split/net481/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net5.0/ReadOnlySequenceStream.cs b/src/Split/net5.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net5.0/ReadOnlySequenceStream.cs +++ b/src/Split/net5.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net5.0/StringStream.cs b/src/Split/net5.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net5.0/StringStream.cs +++ b/src/Split/net5.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net6.0/ReadOnlySequenceStream.cs b/src/Split/net6.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net6.0/ReadOnlySequenceStream.cs +++ b/src/Split/net6.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net6.0/StringStream.cs b/src/Split/net6.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net6.0/StringStream.cs +++ b/src/Split/net6.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net7.0/ReadOnlySequenceStream.cs b/src/Split/net7.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net7.0/ReadOnlySequenceStream.cs +++ b/src/Split/net7.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net7.0/StringStream.cs b/src/Split/net7.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net7.0/StringStream.cs +++ b/src/Split/net7.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net8.0/ReadOnlySequenceStream.cs b/src/Split/net8.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net8.0/ReadOnlySequenceStream.cs +++ b/src/Split/net8.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net8.0/StringStream.cs b/src/Split/net8.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net8.0/StringStream.cs +++ b/src/Split/net8.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net9.0/ReadOnlySequenceStream.cs b/src/Split/net9.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net9.0/ReadOnlySequenceStream.cs +++ b/src/Split/net9.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net9.0/StringStream.cs b/src/Split/net9.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net9.0/StringStream.cs +++ b/src/Split/net9.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs b/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp2.0/StringStream.cs b/src/Split/netcoreapp2.0/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/netcoreapp2.0/StringStream.cs +++ b/src/Split/netcoreapp2.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs b/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp2.1/StringStream.cs b/src/Split/netcoreapp2.1/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp2.1/StringStream.cs +++ b/src/Split/netcoreapp2.1/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs b/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp2.2/StringStream.cs b/src/Split/netcoreapp2.2/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp2.2/StringStream.cs +++ b/src/Split/netcoreapp2.2/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs b/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp3.0/StringStream.cs b/src/Split/netcoreapp3.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp3.0/StringStream.cs +++ b/src/Split/netcoreapp3.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs b/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp3.1/StringStream.cs b/src/Split/netcoreapp3.1/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp3.1/StringStream.cs +++ b/src/Split/netcoreapp3.1/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netstandard2.0/ReadOnlySequenceStream.cs b/src/Split/netstandard2.0/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/netstandard2.0/ReadOnlySequenceStream.cs +++ b/src/Split/netstandard2.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netstandard2.0/StringStream.cs b/src/Split/netstandard2.0/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/netstandard2.0/StringStream.cs +++ b/src/Split/netstandard2.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netstandard2.1/ReadOnlySequenceStream.cs b/src/Split/netstandard2.1/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netstandard2.1/ReadOnlySequenceStream.cs +++ b/src/Split/netstandard2.1/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netstandard2.1/StringStream.cs b/src/Split/netstandard2.1/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netstandard2.1/StringStream.cs +++ b/src/Split/netstandard2.1/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/uap10.0/ReadOnlySequenceStream.cs b/src/Split/uap10.0/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/uap10.0/ReadOnlySequenceStream.cs +++ b/src/Split/uap10.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/uap10.0/StringStream.cs b/src/Split/uap10.0/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/uap10.0/StringStream.cs +++ b/src/Split/uap10.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs b/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs index 6f6a2035..a196bce7 100644 --- a/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs +++ b/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs @@ -101,6 +101,17 @@ public async Task ReadOnlyMemoryStream_CopyTo() await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3, 4 }); } + [Test] + public async Task ReadOnlyMemoryStream_FromMemoryViaImplicitConversion() + { + var buffer = new byte[] { 1, 2, 3, 4, 5 }; + Memory memory = buffer; + using var stream = new ReadOnlyMemoryStream(memory); + + await Assert.That(stream.Length).IsEqualTo(5L); + await Assert.That(stream.CanRead).IsTrue(); + } + [Test] public async Task ReadOnlyMemoryStream_Empty() { diff --git a/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs b/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs index 66cc44d8..d3c1c7f5 100644 --- a/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs +++ b/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs @@ -73,6 +73,44 @@ public async Task ReadOnlySequenceStream_Seek() await Assert.That(stream.ReadByte()).IsEqualTo(5); } + [Test] + public async Task ReadOnlySequenceStream_SeekAcrossSegments_RepositionsCursor() + { + // bytes by index: 0..8 -> 1,2,3,4,5,6,7,8,9 across three segments. + var sequence = CreateMultiSegment(new byte[] { 1, 2, 3 }, new byte[] { 4, 5 }, new byte[] { 6, 7, 8, 9 }); + using var stream = new ReadOnlySequenceStream(sequence); + + // Forward from the start into the third segment. + stream.Seek(6, SeekOrigin.Begin); + await Assert.That(stream.Position).IsEqualTo(6L); + await Assert.That(stream.ReadByte()).IsEqualTo(7); + + // Backward into the first segment (walk-from-start branch). + stream.Position = 1; + await Assert.That(stream.ReadByte()).IsEqualTo(2); + + // Forward relative to the current cursor into the second segment. + stream.Seek(2, SeekOrigin.Current); + await Assert.That(stream.ReadByte()).IsEqualTo(5); + + // Clamp to the end via End origin. + stream.Seek(0, SeekOrigin.End); + await Assert.That(stream.Position).IsEqualTo(9L); + await Assert.That(stream.ReadByte()).IsEqualTo(-1); + } + + [Test] + public async Task ReadOnlySequenceStream_SeekBeyondLength_ReadsZero() + { + using var stream = new ReadOnlySequenceStream(new ReadOnlySequence(new byte[] { 1, 2, 3 })); + + stream.Position = 10; + + await Assert.That(stream.Position).IsEqualTo(10L); + await Assert.That(stream.ReadByte()).IsEqualTo(-1); + await Assert.That(stream.Read(new byte[4], 0, 4)).IsEqualTo(0); + } + [Test] public async Task ReadOnlySequenceStream_SeekBeforeBeginThrows() { @@ -111,6 +149,48 @@ public async Task ReadOnlySequenceStream_Empty() await Assert.That(stream.Read(new byte[4], 0, 4)).IsEqualTo(0); } + [Test] + public async Task ReadOnlySequenceStream_CopyTo_MultiSegment() + { + var sequence = CreateMultiSegment(new byte[] { 1, 2, 3 }, new byte[] { 4, 5 }, new byte[] { 6, 7, 8, 9 }); + using var stream = new ReadOnlySequenceStream(sequence); + using var target = new MemoryStream(); + + stream.CopyTo(target); + + await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + await Assert.That(stream.Position).IsEqualTo(9L); + } + + [Test] + public async Task ReadOnlySequenceStream_CopyTo_AfterPartialRead_CopiesRemainder() + { + var sequence = CreateMultiSegment(new byte[] { 1, 2, 3 }, new byte[] { 4, 5, 6 }); + using var stream = new ReadOnlySequenceStream(sequence); + + var head = new byte[2]; + stream.Read(head, 0, 2); + + using var target = new MemoryStream(); + stream.CopyTo(target); + + await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 3, 4, 5, 6 }); + await Assert.That(stream.Position).IsEqualTo(6L); + } + + [Test] + public async Task ReadOnlySequenceStream_CopyToAsync_MultiSegment() + { + var sequence = CreateMultiSegment(new byte[] { 1, 2 }, new byte[] { 3, 4, 5 }); + using var stream = new ReadOnlySequenceStream(sequence); + using var target = new MemoryStream(); + + await stream.CopyToAsync(target); + + await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3, 4, 5 }); + await Assert.That(stream.Position).IsEqualTo(5L); + } + static ReadOnlySequence CreateMultiSegment(params byte[][] parts) { var first = new ReadOnlySequenceStreamSegment(parts[0]); diff --git a/src/Tests/PolyfillTests_StringStream.cs b/src/Tests/PolyfillTests_StringStream.cs index c75e79b7..80d4b390 100644 --- a/src/Tests/PolyfillTests_StringStream.cs +++ b/src/Tests/PolyfillTests_StringStream.cs @@ -55,6 +55,45 @@ public async Task StringStream_ReadInSmallChunks_ReconstructsOutput() await Assert.That(accumulator.ToArray()).IsEquivalentTo(expected); } + [Test] + public async Task StringStream_MultiByteReadByteByByte_MatchesGetBytes() + { + // Reading one byte at a time forces the spillover path: each multi-byte character + // (and the surrogate pair) is encoded into the internal buffer and drained byte by byte. + const string input = "Unicode: 你好世界 🌍 café"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + using var accumulator = new MemoryStream(); + int value; + while ((value = stream.ReadByte()) != -1) + { + accumulator.WriteByte((byte) value); + } + + await Assert.That(accumulator.ToArray()).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_MultiByteBoundarySplit_MatchesGetBytes() + { + // A 7-byte buffer is not a multiple of the 3-byte CJK width, so the encoder must carry + // conversion state across reads without splitting or duplicating a character. + const string input = "你好世界你好世界你好世界"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + using var accumulator = new MemoryStream(); + var buffer = new byte[7]; + int read; + while ((read = stream.Read(buffer, 0, buffer.Length)) > 0) + { + accumulator.Write(buffer, 0, read); + } + + await Assert.That(accumulator.ToArray()).IsEquivalentTo(expected); + } + [Test] public async Task StringStream_ReadByte_ReturnsBytesThenMinusOne() { @@ -157,6 +196,152 @@ public async Task StringStream_AfterDispose_CanReadIsFalse() await Assert.That(stream.CanRead).IsFalse(); } + [Test] + [Arguments("Hello, World! ")] + [Arguments("Unicode: 你好世界 🌍")] + [Arguments("Multi\nLine\r\nText")] + public async Task StringStream_DifferentStrings_MatchGetBytes(string input) + { + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + [Arguments("ASCII text")] + [Arguments("Ñoño español")] + public async Task StringStream_DifferentEncodings_MatchGetBytes(string input) + { + foreach (var encoding in new[] { Encoding.UTF8, Encoding.Unicode, Encoding.UTF32 }) + { + var expected = encoding.GetBytes(input); + using var stream = new StringStream(input, encoding); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + } + + [Test] + public async Task StringStream_SurrogatePairs_MatchGetBytes() + { + const string input = "😀😁😂🤣😃😄"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_MultiByteAcrossChunkBoundary_MatchGetBytes() + { + var input = new string('A', 1023) + "你"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_ChunkedReading_LargeInput() + { + var input = new string('A', 10000); + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + var actual = new byte[expected.Length]; + var total = 0; + const int chunkSize = 512; + int read; + while ((read = stream.Read(actual, total, Math.Min(chunkSize, expected.Length - total))) > 0) + { + total += read; + } + + await Assert.That(total).IsEqualTo(expected.Length); + await Assert.That(actual).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_FastPathGuard_DoesNotOverflow() + { + // GetMaxByteCount(text.Length) would overflow int for this encoding; the fast-path guard + // must short-circuit before calling it rather than throwing OverflowException. + using var stream = new StringStream("hello", new OverflowingEncoding()); + + var threw = false; + try + { + stream.Read(new byte[16], 0, 16); + } + catch (OverflowException) + { + threw = true; + } + + await Assert.That(threw).IsFalse(); + } + + [Test] + public async Task StringStream_MemorySlice_MatchGetBytes() + { + const string source = "0123456789ABCDEFGHIJ"; + var slice = source.AsMemory(5, 10); + var expected = Encoding.UTF8.GetBytes("56789ABCDE"); + using var stream = new StringStream(slice, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_CharArrayMemory_MatchGetBytes() + { + var chars = new[] { 'H', 'e', 'l', 'l', 'o' }; + var expected = Encoding.UTF8.GetBytes("Hello"); + using var stream = new StringStream(new ReadOnlyMemory(chars), Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + [Arguments("Hello")] + [Arguments("Unicode: 你好")] + [Arguments("Emoji: 😀")] + public async Task StringStream_MemoryOverloadMatchesStringOverload(string input) + { + using var fromMemory = new StringStream(input.AsMemory(), Encoding.UTF8); + using var fromString = new StringStream(input, Encoding.UTF8); + + var memoryBytes = ReadToEnd(fromMemory); + var stringBytes = ReadToEnd(fromString); + + await Assert.That(memoryBytes).IsEquivalentTo(stringBytes); + } + + [Test] + public async Task StringStream_TruncatedSurrogate_ProducesReplacementChar() + { + // "A" plus the high surrogate of U+1F30D with no trailing low surrogate. + const string emoji = "A\U0001F30D"; + var truncated = emoji.AsMemory(0, 2); + var expected = Encoding.UTF8.GetBytes("A�"); + using var stream = new StringStream(truncated, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_CopyToAsync_HonorsCancellation() + { + using var stream = new StringStream("hello", Encoding.UTF8); + using var destination = new MemoryStream(); + using var cancelSource = new CancelSource(); + cancelSource.Cancel(); + + await Assert.That(async () => await stream.CopyToAsync(destination, 81920, cancelSource.Token)) + .Throws(); + } + static byte[] ReadToEnd(Stream stream) { using var memory = new MemoryStream(); @@ -169,5 +354,34 @@ static byte[] ReadToEnd(Stream stream) return memory.ToArray(); } + + // An encoding whose GetMaxByteCount overflows int for larger char counts, used to verify the + // StringStream fast-path guard skips GetMaxByteCount rather than throwing OverflowException. + sealed class OverflowingEncoding : + Encoding + { + public override int GetByteCount(char[] chars, int index, int count) => + UTF8.GetByteCount(chars, index, count); + + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) => + UTF8.GetBytes(chars, charIndex, charCount, bytes, byteIndex); + + public override int GetMaxByteCount(int charCount) => + charCount switch + { + 1 => int.MaxValue, + 2 => 8, + _ => checked((charCount + 1) * int.MaxValue) + }; + + public override int GetCharCount(byte[] bytes, int index, int count) => + throw new NotImplementedException(); + + public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) => + throw new NotImplementedException(); + + public override int GetMaxCharCount(int byteCount) => + throw new NotImplementedException(); + } } #endif diff --git a/src/Tests/PolyfillTests_WritableMemoryStream.cs b/src/Tests/PolyfillTests_WritableMemoryStream.cs index c0f14262..3f420578 100644 --- a/src/Tests/PolyfillTests_WritableMemoryStream.cs +++ b/src/Tests/PolyfillTests_WritableMemoryStream.cs @@ -96,5 +96,77 @@ public async Task WritableMemoryStream_ToArrayReturnsWrittenContent() await Assert.That(stream.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3 }); } + + [Test] + public async Task WritableMemoryStream_WriteByteBeyondCapacityThrows() + { + using var stream = new WritableMemoryStream(new byte[3]); + + stream.WriteByte(1); + stream.WriteByte(2); + stream.WriteByte(3); + + await Assert.That(() => stream.WriteByte(4)).Throws(); + } + + [Test] + public async Task WritableMemoryStream_WriteUpToExactCapacitySucceeds() + { + using var stream = new WritableMemoryStream(new byte[10]); + var data = new byte[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + + stream.Write(data, 0, data.Length); + + await Assert.That(stream.Position).IsEqualTo(10L); + await Assert.That(stream.Length).IsEqualTo(10L); + + stream.Position = 0; + var readBack = new byte[10]; + var read = stream.Read(readBack, 0, 10); + + await Assert.That(read).IsEqualTo(10); + await Assert.That(readBack).IsEquivalentTo(data); + } + + [Test] + public async Task WritableMemoryStream_WritePastCapacityLeavesPositionUnchanged() + { + using var stream = new WritableMemoryStream(new byte[10]); + stream.Write(new byte[8], 0, 8); + + await Assert.That(stream.Position).IsEqualTo(8L); + await Assert.That(() => stream.Write(new byte[5], 0, 5)).Throws(); + await Assert.That(stream.Position).IsEqualTo(8L); + } + + [Test] + public async Task WritableMemoryStream_SeekPastCapacity() + { + using var stream = new WritableMemoryStream(new byte[10]); + + stream.Seek(100, SeekOrigin.Begin); + + await Assert.That(stream.Position).IsEqualTo(100L); + await Assert.That(stream.ReadByte()).IsEqualTo(-1); + await Assert.That(() => stream.WriteByte(42)).Throws(); + } + + [Test] + public async Task WritableMemoryStream_WriteOverExistingDataReplacesData() + { + var backing = new byte[10]; + using var stream = new WritableMemoryStream(backing); + stream.Write(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 0, 10); + + stream.Position = 3; + stream.Write(new byte[] { 100, 101, 102 }, 0, 3); + + stream.Position = 0; + var result = new byte[10]; + var read = stream.Read(result, 0, 10); + + await Assert.That(read).IsEqualTo(10); + await Assert.That(result).IsEquivalentTo(new byte[] { 1, 2, 3, 100, 101, 102, 7, 8, 9, 10 }); + } } #endif From 894697ac4177708b2523ec66051cca08f83aff42 Mon Sep 17 00:00:00 2001 From: Simon Cropp Date: Sat, 27 Jun 2026 11:36:46 +1000 Subject: [PATCH 2/2] Update Directory.Build.props --- src/Directory.Build.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Directory.Build.props b/src/Directory.Build.props index b7e87696..810bbd9c 100644 --- a/src/Directory.Build.props +++ b/src/Directory.Build.props @@ -2,7 +2,7 @@ CS1591;NETSDK1138;NU1901;NU1902;NU1903;CA1822;CA1847;CA1861;NU1510;NU1608;NU1109 - 10.11.0 + 10.11.1 1.0.0 Polyfill true