diff --git a/assemblySize.include.md b/assemblySize.include.md index 27bcf822..82bb6d98 100644 --- a/assemblySize.include.md +++ b/assemblySize.include.md @@ -2,51 +2,51 @@ | | Empty Assembly | With Polyfill | Diff | Ensure | ArgumentExceptions | StringInterpolation | Nullability | |----------------|----------------|---------------|-----------|-----------|--------------------|---------------------|-------------| -| netstandard2.0 | 8.0KB | 347.5KB | +339.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | -| netstandard2.1 | 8.5KB | 302.0KB | +293.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | -| net461 | 8.5KB | 346.5KB | +338.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net462 | 7.0KB | 350.0KB | +343.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net47 | 7.0KB | 349.5KB | +342.5KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | -| net471 | 8.5KB | 349.0KB | +340.5KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | -| net472 | 8.5KB | 347.5KB | +339.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net48 | 8.5KB | 347.5KB | +339.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net481 | 8.5KB | 347.5KB | +339.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp2.0 | 9.0KB | 323.5KB | +314.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp2.1 | 9.0KB | 304.5KB | +295.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp2.2 | 9.0KB | 304.5KB | +295.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| netcoreapp3.0 | 9.5KB | 297.0KB | +287.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | -| netcoreapp3.1 | 9.5KB | 295.5KB | +286.0KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | -| net5.0 | 9.5KB | 259.0KB | +249.5KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | -| net6.0 | 10.0KB | 201.0KB | +191.0KB | +10.0KB | +6.5KB | +512bytes | +3.5KB | -| net7.0 | 10.0KB | 163.5KB | +153.5KB | +9.0KB | +5.5KB | +512bytes | +3.0KB | -| net8.0 | 9.5KB | 135.0KB | +125.5KB | +8.0KB | | +512bytes | +3.0KB | -| net9.0 | 9.5KB | 88.5KB | +79.0KB | +8.5KB | | +512bytes | +3.5KB | -| net10.0 | 10.0KB | 66.0KB | +56.0KB | +9.0KB | | +512bytes | +3.5KB | -| net11.0 | 10.0KB | 27.0KB | +17.0KB | +9.0KB | | +1.0KB | +3.5KB | +| netstandard2.0 | 8.0KB | 351.5KB | +343.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| netstandard2.1 | 8.5KB | 306.0KB | +297.5KB | +8.5KB | +6.0KB | +9.0KB | +13.5KB | +| net461 | 8.5KB | 350.0KB | +341.5KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | +| net462 | 7.0KB | 353.5KB | +346.5KB | +9.0KB | +6.5KB | +9.5KB | +13.5KB | +| net47 | 7.0KB | 353.5KB | +346.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| net471 | 8.5KB | 352.5KB | +344.0KB | +9.0KB | +6.5KB | +9.5KB | +14.0KB | +| net472 | 8.5KB | 351.5KB | +343.0KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | +| net48 | 8.5KB | 351.5KB | +343.0KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | +| net481 | 8.5KB | 351.5KB | +343.0KB | +9.0KB | +6.0KB | +9.0KB | +13.5KB | +| netcoreapp2.0 | 9.0KB | 327.5KB | +318.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| netcoreapp2.1 | 9.0KB | 308.0KB | +299.0KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| netcoreapp2.2 | 9.0KB | 308.0KB | +299.0KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| netcoreapp3.0 | 9.5KB | 301.0KB | +291.5KB | +9.0KB | +6.5KB | +9.0KB | +13.5KB | +| netcoreapp3.1 | 9.5KB | 299.0KB | +289.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| net5.0 | 9.5KB | 263.0KB | +253.5KB | +9.0KB | +6.5KB | +9.0KB | +14.0KB | +| net6.0 | 10.0KB | 205.0KB | +195.0KB | +9.5KB | +6.5KB | +512bytes | +3.0KB | +| net7.0 | 10.0KB | 167.0KB | +157.0KB | +9.5KB | +5.5KB | +1.0KB | +3.5KB | +| net8.0 | 9.5KB | 138.5KB | +129.0KB | +8.5KB | +512bytes | +512bytes | +3.5KB | +| net9.0 | 9.5KB | 92.5KB | +83.0KB | +8.5KB | | +512bytes | +3.5KB | +| net10.0 | 10.0KB | 70.0KB | +60.0KB | +9.0KB | | +512bytes | +3.5KB | +| net11.0 | 10.0KB | 31.5KB | +21.5KB | +9.0KB | | +512bytes | +3.5KB | ### Assembly Sizes with EmbedUntrackedSources | | Empty Assembly | With Polyfill | Diff | Ensure | ArgumentExceptions | StringInterpolation | Nullability | |----------------|----------------|---------------|-----------|-----------|--------------------|---------------------|-------------| -| netstandard2.0 | 8.0KB | 507.8KB | +499.8KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | -| netstandard2.1 | 8.5KB | 436.6KB | +428.1KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | -| net461 | 8.5KB | 507.8KB | +499.3KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net462 | 7.0KB | 511.3KB | +504.3KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net47 | 7.0KB | 510.6KB | +503.6KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | -| net471 | 8.5KB | 509.7KB | +501.2KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | -| net472 | 8.5KB | 507.2KB | +498.7KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net48 | 8.5KB | 507.2KB | +498.7KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net481 | 8.5KB | 507.2KB | +498.7KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp2.0 | 9.0KB | 473.2KB | +464.2KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp2.1 | 9.0KB | 442.8KB | +433.8KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp2.2 | 9.0KB | 442.8KB | +433.8KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| netcoreapp3.0 | 9.5KB | 426.5KB | +417.0KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | -| netcoreapp3.1 | 9.5KB | 425.0KB | +415.5KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | -| net5.0 | 9.5KB | 370.3KB | +360.8KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | -| net6.0 | 10.0KB | 292.3KB | +282.3KB | +17.7KB | +8.2KB | +1.1KB | +4.2KB | -| net7.0 | 10.0KB | 236.2KB | +226.2KB | +16.6KB | +6.9KB | +1.1KB | +3.7KB | -| net8.0 | 9.5KB | 192.7KB | +183.2KB | +15.5KB | +299bytes | +1.1KB | +3.7KB | -| net9.0 | 9.5KB | 125.2KB | +115.7KB | +16.0KB | | +1.1KB | +4.2KB | -| net10.0 | 10.0KB | 94.4KB | +84.4KB | +16.5KB | | +1.1KB | +4.2KB | -| net11.0 | 10.0KB | 41.3KB | +31.3KB | +16.5KB | | +1.6KB | +4.2KB | +| netstandard2.0 | 8.0KB | 513.0KB | +505.0KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| netstandard2.1 | 8.5KB | 441.7KB | +433.2KB | +16.2KB | +7.7KB | +13.9KB | +18.9KB | +| net461 | 8.5KB | 512.5KB | +504.0KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | +| net462 | 7.0KB | 516.0KB | +509.0KB | +16.7KB | +8.2KB | +14.4KB | +18.9KB | +| net47 | 7.0KB | 515.8KB | +508.8KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| net471 | 8.5KB | 514.4KB | +505.9KB | +16.7KB | +8.2KB | +14.4KB | +19.4KB | +| net472 | 8.5KB | 512.4KB | +503.9KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | +| net48 | 8.5KB | 512.4KB | +503.9KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | +| net481 | 8.5KB | 512.4KB | +503.9KB | +16.7KB | +7.7KB | +13.9KB | +18.9KB | +| netcoreapp2.0 | 9.0KB | 478.4KB | +469.4KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| netcoreapp2.1 | 9.0KB | 447.4KB | +438.4KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| netcoreapp2.2 | 9.0KB | 447.4KB | +438.4KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| netcoreapp3.0 | 9.5KB | 431.6KB | +422.1KB | +16.7KB | +8.2KB | +13.9KB | +18.9KB | +| netcoreapp3.1 | 9.5KB | 429.6KB | +420.1KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| net5.0 | 9.5KB | 375.4KB | +365.9KB | +16.7KB | +8.2KB | +13.9KB | +19.4KB | +| net6.0 | 10.0KB | 297.4KB | +287.4KB | +17.2KB | +8.2KB | +1.1KB | +3.7KB | +| net7.0 | 10.0KB | 240.8KB | +230.8KB | +17.1KB | +6.9KB | +1.6KB | +4.2KB | +| net8.0 | 9.5KB | 197.3KB | +187.8KB | +16.0KB | +811bytes | +1.1KB | +4.2KB | +| net9.0 | 9.5KB | 130.3KB | +120.8KB | +16.0KB | | +1.1KB | +4.2KB | +| net10.0 | 10.0KB | 99.5KB | +89.5KB | +16.5KB | | +1.1KB | +4.2KB | +| net11.0 | 10.0KB | 46.9KB | +36.9KB | +16.5KB | | +1.1KB | +4.2KB | diff --git a/src/Directory.Build.props b/src/Directory.Build.props index b7e87696..810bbd9c 100644 --- a/src/Directory.Build.props +++ b/src/Directory.Build.props @@ -2,7 +2,7 @@ CS1591;NETSDK1138;NU1901;NU1902;NU1903;CA1822;CA1847;CA1861;NU1510;NU1608;NU1109 - 10.11.0 + 10.11.1 1.0.0 Polyfill true diff --git a/src/Polyfill/ReadOnlySequenceStream.cs b/src/Polyfill/ReadOnlySequenceStream.cs index be82092c..b2a2bbc6 100644 --- a/src/Polyfill/ReadOnlySequenceStream.cs +++ b/src/Polyfill/ReadOnlySequenceStream.cs @@ -23,6 +23,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; @@ -46,6 +47,9 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + // Incremental cursor into the sequence's segments, kept in sync with the absolute position. + // Advancing from this cursor avoids re-walking the segment list from the start on every read. + SequencePosition cursor; long position; bool disposed; @@ -53,8 +57,12 @@ sealed class ReadOnlySequenceStream : /// Initializes a new instance of the class over the specified . /// //Link: https://learn.microsoft.com/en-us/dotnet/api/system.buffers.readonlysequencestream.-ctor?view=net-11.0 - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; @@ -91,7 +99,7 @@ public override long Position throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } @@ -106,7 +114,7 @@ public override int Read(byte[] buffer, int offset, int count) return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { @@ -114,6 +122,7 @@ public override int Read(byte[] buffer, int offset, int count) } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -128,7 +137,8 @@ public override int ReadByte() return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -137,20 +147,14 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// @@ -166,16 +170,147 @@ public override long Seek(long offset, SeekOrigin origin) _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + // Repositions the segment cursor to the given absolute position, advancing forward from the + // current cursor when possible and only walking from the start for backward jumps. + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + + position = value; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + // Stream.CopyTo(Stream, int) only became virtual in netcoreapp2.1/netstandard2.1. On older + // targets it cannot be overridden, so the base implementation (which routes through the + // cursor-based Read above) is used instead. + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + + if (position >= sequence.Length) + { + return; + } + + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + + cursor = sequence.End; + position = sequence.Length; + } +#endif + + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + + return CopyToAsyncCore(destination, cancellationToken); + } + + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + + cursor = sequence.End; + position = sequence.Length; + } + + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return destination.WriteAsync(segment, cancellationToken).AsTask(); +#else + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); +#endif + } + +#if !(NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER) + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } +#endif + + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } + /// public override void Flush() { @@ -198,6 +333,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } diff --git a/src/Polyfill/StringStream.cs b/src/Polyfill/StringStream.cs index 87cd6090..095d19c5 100644 --- a/src/Polyfill/StringStream.cs +++ b/src/Polyfill/StringStream.cs @@ -32,8 +32,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -49,10 +51,23 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + // Lazily created on the encoder slow path. The single-shot fast path in ReadCore + // uses stateless Encoding.GetBytes and never touches this field. + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + // Spillover buffer for multi-byte encodings: when the caller's buffer is too small to hold even + // one encoded scalar (for example ReadByte with UTF-16), the bytes are encoded into this buffer + // and served across subsequent reads. Also holds final encoder flush bytes when the caller's + // buffer had no room left. + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; + /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -71,6 +86,7 @@ public StringStream(string text, Encoding encoding) this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// @@ -86,6 +102,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// @@ -113,49 +130,156 @@ public override long Position set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() + /// + public override int Read(byte[] buffer, int offset, int count) { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - - return encoded; + GuardRange(buffer, offset, count); + return ReadCore(buffer, offset, count); } /// - public override int Read(byte[] buffer, int offset, int count) + public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + + int ReadCore(byte[] buffer, int offset, int count) { - GuardRange(buffer, offset, count); ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) { return 0; } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; - } + // Fast path: nothing emitted yet and the caller's buffer is guaranteed large enough to hold + // the entire encoded payload in a single shot. Encoding.GetBytes is stateless and emits any + // reset/shift sequences required by stateful encodings for a complete conversion, so the + // encoder can be marked flushed without ever being allocated. The overflow guard keeps + // Encoding.GetMaxByteCount from overflowing int for very large inputs. + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) + { + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; + } - /// - public override int ReadByte() - { - ThrowIfDisposed(); + var totalBytesWritten = 0; + + // Drain any pending bytes left over from a previous partial read. + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + + // If the caller's buffer may be too small for even one encoded scalar, encode into the + // spillover buffer first, then copy what fits. The array based Encoder.Convert throws + // when the output cannot hold a single complete encoded character. + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + // Encode directly into the caller's buffer. Only flush on the final block so encoder + // state is preserved for stateful encodings. + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } - var all = GetEncoded(); - if (position < all.Length) + // All input chars are consumed but the encoder has not been flushed: emit any remaining + // encoder state (for example stateful reset sequences). Flush into the spillover buffer, + // which is always large enough, then copy what fits into whatever room the caller has left. + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) { - return all[position++]; + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } } - return -1; + return totalBytesWritten; + } + + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + + // Stateless single-shot encode of the whole text into the caller's buffer at byteIndex. + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); +#else + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); +#endif + } + + // Stateful incremental encode of chars into bytes[byteIndex..byteIndex+byteCount]. + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); +#else + // The array based Encoder.Convert overload (available on every TFM) requires a char[]. Cap the + // slice at byteCount chars: a complete char never encodes to fewer than one byte, so the + // encoder can never consume more chars than there are output bytes, and bounding the copy this + // way keeps a streamed read linear rather than quadratic. + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); +#endif } /// @@ -170,7 +294,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net10.0/ReadOnlySequenceStream.cs b/src/Split/net10.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net10.0/ReadOnlySequenceStream.cs +++ b/src/Split/net10.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net10.0/StringStream.cs b/src/Split/net10.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net10.0/StringStream.cs +++ b/src/Split/net10.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net11.0/ReadOnlySequenceStream.cs b/src/Split/net11.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net11.0/ReadOnlySequenceStream.cs +++ b/src/Split/net11.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net11.0/StringStream.cs b/src/Split/net11.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net11.0/StringStream.cs +++ b/src/Split/net11.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net461/ReadOnlySequenceStream.cs b/src/Split/net461/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net461/ReadOnlySequenceStream.cs +++ b/src/Split/net461/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net461/StringStream.cs b/src/Split/net461/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net461/StringStream.cs +++ b/src/Split/net461/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net462/ReadOnlySequenceStream.cs b/src/Split/net462/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net462/ReadOnlySequenceStream.cs +++ b/src/Split/net462/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net462/StringStream.cs b/src/Split/net462/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net462/StringStream.cs +++ b/src/Split/net462/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net47/ReadOnlySequenceStream.cs b/src/Split/net47/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net47/ReadOnlySequenceStream.cs +++ b/src/Split/net47/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net47/StringStream.cs b/src/Split/net47/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net47/StringStream.cs +++ b/src/Split/net47/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net471/ReadOnlySequenceStream.cs b/src/Split/net471/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net471/ReadOnlySequenceStream.cs +++ b/src/Split/net471/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net471/StringStream.cs b/src/Split/net471/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net471/StringStream.cs +++ b/src/Split/net471/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net472/ReadOnlySequenceStream.cs b/src/Split/net472/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net472/ReadOnlySequenceStream.cs +++ b/src/Split/net472/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net472/StringStream.cs b/src/Split/net472/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net472/StringStream.cs +++ b/src/Split/net472/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net48/ReadOnlySequenceStream.cs b/src/Split/net48/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net48/ReadOnlySequenceStream.cs +++ b/src/Split/net48/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net48/StringStream.cs b/src/Split/net48/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net48/StringStream.cs +++ b/src/Split/net48/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net481/ReadOnlySequenceStream.cs b/src/Split/net481/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/net481/ReadOnlySequenceStream.cs +++ b/src/Split/net481/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net481/StringStream.cs b/src/Split/net481/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/net481/StringStream.cs +++ b/src/Split/net481/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net5.0/ReadOnlySequenceStream.cs b/src/Split/net5.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net5.0/ReadOnlySequenceStream.cs +++ b/src/Split/net5.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net5.0/StringStream.cs b/src/Split/net5.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net5.0/StringStream.cs +++ b/src/Split/net5.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net6.0/ReadOnlySequenceStream.cs b/src/Split/net6.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net6.0/ReadOnlySequenceStream.cs +++ b/src/Split/net6.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net6.0/StringStream.cs b/src/Split/net6.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net6.0/StringStream.cs +++ b/src/Split/net6.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net7.0/ReadOnlySequenceStream.cs b/src/Split/net7.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net7.0/ReadOnlySequenceStream.cs +++ b/src/Split/net7.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net7.0/StringStream.cs b/src/Split/net7.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net7.0/StringStream.cs +++ b/src/Split/net7.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net8.0/ReadOnlySequenceStream.cs b/src/Split/net8.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net8.0/ReadOnlySequenceStream.cs +++ b/src/Split/net8.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net8.0/StringStream.cs b/src/Split/net8.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net8.0/StringStream.cs +++ b/src/Split/net8.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/net9.0/ReadOnlySequenceStream.cs b/src/Split/net9.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/net9.0/ReadOnlySequenceStream.cs +++ b/src/Split/net9.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/net9.0/StringStream.cs b/src/Split/net9.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/net9.0/StringStream.cs +++ b/src/Split/net9.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs b/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp2.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp2.0/StringStream.cs b/src/Split/netcoreapp2.0/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/netcoreapp2.0/StringStream.cs +++ b/src/Split/netcoreapp2.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs b/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp2.1/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp2.1/StringStream.cs b/src/Split/netcoreapp2.1/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp2.1/StringStream.cs +++ b/src/Split/netcoreapp2.1/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs b/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp2.2/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp2.2/StringStream.cs b/src/Split/netcoreapp2.2/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp2.2/StringStream.cs +++ b/src/Split/netcoreapp2.2/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs b/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp3.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp3.0/StringStream.cs b/src/Split/netcoreapp3.0/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp3.0/StringStream.cs +++ b/src/Split/netcoreapp3.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs b/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs +++ b/src/Split/netcoreapp3.1/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netcoreapp3.1/StringStream.cs b/src/Split/netcoreapp3.1/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netcoreapp3.1/StringStream.cs +++ b/src/Split/netcoreapp3.1/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netstandard2.0/ReadOnlySequenceStream.cs b/src/Split/netstandard2.0/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/netstandard2.0/ReadOnlySequenceStream.cs +++ b/src/Split/netstandard2.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netstandard2.0/StringStream.cs b/src/Split/netstandard2.0/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/netstandard2.0/StringStream.cs +++ b/src/Split/netstandard2.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/netstandard2.1/ReadOnlySequenceStream.cs b/src/Split/netstandard2.1/ReadOnlySequenceStream.cs index 1b9bd979..c8adc7a4 100644 --- a/src/Split/netstandard2.1/ReadOnlySequenceStream.cs +++ b/src/Split/netstandard2.1/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,97 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override void CopyTo(Stream destination, int bufferSize) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (position >= sequence.Length) + { + return; + } + foreach (var segment in sequence.Slice(cursor)) + { + destination.Write(segment.Span); + } + cursor = sequence.End; + position = sequence.Length; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + return destination.WriteAsync(segment, cancellationToken).AsTask(); + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +238,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/netstandard2.1/StringStream.cs b/src/Split/netstandard2.1/StringStream.cs index b4cd3943..272f4c1e 100644 --- a/src/Split/netstandard2.1/StringStream.cs +++ b/src/Split/netstandard2.1/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,107 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + return encoding.GetBytes(chars, bytes.AsSpan(byteIndex, byteCount)); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + GetEncoder().Convert(chars, bytes.AsSpan(byteIndex, byteCount), flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +202,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Split/uap10.0/ReadOnlySequenceStream.cs b/src/Split/uap10.0/ReadOnlySequenceStream.cs index 1b9bd979..ee49714e 100644 --- a/src/Split/uap10.0/ReadOnlySequenceStream.cs +++ b/src/Split/uap10.0/ReadOnlySequenceStream.cs @@ -7,6 +7,7 @@ namespace System.Buffers; using System.IO; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; /// @@ -28,13 +29,18 @@ sealed class ReadOnlySequenceStream : Stream { ReadOnlySequence sequence; + SequencePosition cursor; long position; bool disposed; /// /// Initializes a new instance of the class over the specified . /// - public ReadOnlySequenceStream(ReadOnlySequence source) => + public ReadOnlySequenceStream(ReadOnlySequence source) + { sequence = source; + cursor = source.Start; + position = 0; + } /// public override bool CanRead => !disposed; /// @@ -65,7 +71,7 @@ public override long Position { throw new ArgumentOutOfRangeException(nameof(value)); } - position = value; + MoveTo(value); } } /// @@ -77,13 +83,14 @@ public override int Read(byte[] buffer, int offset, int count) { return 0; } - var remaining = sequence.Slice(position); + var remaining = sequence.Slice(cursor); var toRead = (int)Math.Min(remaining.Length, count); if (toRead <= 0) { return 0; } remaining.Slice(0, toRead).CopyTo(buffer.AsSpan(offset, toRead)); + cursor = sequence.GetPosition(toRead, cursor); position += toRead; return toRead; } @@ -95,7 +102,8 @@ public override int ReadByte() { return -1; } - var result = sequence.Slice(position, 1).First.Span[0]; + var result = sequence.Slice(cursor, 1).First.Span[0]; + cursor = sequence.GetPosition(1, cursor); position++; return result; } @@ -103,18 +111,12 @@ public override int ReadByte() public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) { GuardRange(buffer, offset, count); + ThrowIfDisposed(); if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - try - { - return Task.FromResult(Read(buffer, offset, count)); - } - catch (Exception exception) - { - return Task.FromException(exception); - } + return Task.FromResult(Read(buffer, offset, count)); } /// public override long Seek(long offset, SeekOrigin origin) @@ -127,14 +129,96 @@ public override long Seek(long offset, SeekOrigin origin) SeekOrigin.End => sequence.Length, _ => throw new ArgumentException("Invalid seek origin.", nameof(origin)) }; + if (offset > long.MaxValue - basePosition) + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } var newPosition = basePosition + offset; if (newPosition < 0) { throw new IOException("An attempt was made to move the position before the beginning of the stream."); } - position = newPosition; + MoveTo(newPosition); return position; } + void MoveTo(long value) + { + if (value >= sequence.Length) + { + cursor = sequence.End; + } + else if (value >= position) + { + cursor = sequence.GetPosition(value - position, cursor); + } + else + { + cursor = sequence.GetPosition(value, sequence.Start); + } + position = value; + } + /// + public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) + { + GuardCopyTo(destination, bufferSize); + ThrowIfDisposed(); + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + if (position >= sequence.Length) + { + return Task.CompletedTask; + } + return CopyToAsyncCore(destination, cancellationToken); + } + async Task CopyToAsyncCore(Stream destination, CancellationToken cancellationToken) + { + foreach (var segment in sequence.Slice(cursor)) + { + await WriteSegmentAsync(destination, segment, cancellationToken).ConfigureAwait(false); + } + cursor = sequence.End; + position = sequence.Length; + } + static Task WriteSegmentAsync(Stream destination, ReadOnlyMemory segment, CancellationToken cancellationToken) + { + var array = GetSegmentArray(segment, out var offset, out var count); + return destination.WriteAsync(array, offset, count, cancellationToken); + } + static byte[] GetSegmentArray(ReadOnlyMemory segment, out int offset, out int count) + { + if (MemoryMarshal.TryGetArray(segment, out var arraySegment) && + arraySegment.Array != null) + { + offset = arraySegment.Offset; + count = arraySegment.Count; + return arraySegment.Array; + } + var array = segment.ToArray(); + offset = 0; + count = array.Length; + return array; + } + static void GuardCopyTo(Stream destination, int bufferSize) + { + if (destination == null) + { + throw new ArgumentNullException(nameof(destination)); + } + if (bufferSize <= 0) + { + throw new ArgumentOutOfRangeException(nameof(bufferSize)); + } + if (!destination.CanWrite) + { + if (destination.CanRead) + { + throw new NotSupportedException("Stream does not support writing."); + } + throw new ObjectDisposedException(null, "Cannot access a closed stream."); + } + } /// public override void Flush() { @@ -153,6 +237,7 @@ protected override void Dispose(bool disposing) { disposed = true; sequence = default; + cursor = default; base.Dispose(disposing); } void ThrowIfDisposed() diff --git a/src/Split/uap10.0/StringStream.cs b/src/Split/uap10.0/StringStream.cs index b4cd3943..6742772d 100644 --- a/src/Split/uap10.0/StringStream.cs +++ b/src/Split/uap10.0/StringStream.cs @@ -15,8 +15,10 @@ namespace System.IO; /// /// /// This stream never emits a byte order mark (BOM). Callers who need a BOM can prepend it themselves. -/// The polyfill encodes the whole text into a single buffer on first read rather than encoding -/// on-the-fly; this is a non-observable performance difference from the BCL implementation. +/// The text is encoded incrementally as the stream is read rather than being buffered up front, so the +/// peak memory cost is bounded by the caller's read buffer rather than the full encoded length. A +/// stateful preserves conversion state across reads, so multi-byte characters +/// (including surrogate pairs) that straddle a buffer boundary are encoded correctly. /// [ExcludeFromCodeCoverage] [DebuggerNonUserCode] @@ -31,9 +33,15 @@ sealed class StringStream : { ReadOnlyMemory text; Encoding encoding; - byte[]? encoded; - int position; + int maxBytesPerChar; + Encoder? encoder; + int charPosition; + bool encoderFlushed; bool disposed; + byte[]? pendingBytes; + int pendingOffset; + int pendingCount; + byte[]? singleByteBuffer; /// /// Initializes a new instance of the class with the specified string and encoding. /// @@ -49,6 +57,7 @@ public StringStream(string text, Encoding encoding) } this.text = text.AsMemory(); this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Initializes a new instance of the class with the specified character memory and encoding. @@ -61,6 +70,7 @@ public StringStream(ReadOnlyMemory text, Encoding encoding) } this.text = text; this.encoding = encoding; + maxBytesPerChar = encoding.GetMaxByteCount(1); } /// /// Gets the encoding used by this stream. @@ -80,42 +90,109 @@ public override long Position get => throw new NotSupportedException("Stream does not support seeking."); set => throw new NotSupportedException("Stream does not support seeking."); } - byte[] GetEncoded() - { - if (encoded == null) - { - var chars = text.ToArray(); - encoded = encoding.GetBytes(chars, 0, chars.Length); - } - return encoded; - } /// public override int Read(byte[] buffer, int offset, int count) { GuardRange(buffer, offset, count); - ThrowIfDisposed(); - var all = GetEncoded(); - var remaining = all.Length - position; - if (remaining <= 0 || - count == 0) - { - return 0; - } - var toRead = Math.Min(remaining, count); - Array.Copy(all, position, buffer, offset, toRead); - position += toRead; - return toRead; + return ReadCore(buffer, offset, count); } /// public override int ReadByte() + { + var single = singleByteBuffer ??= new byte[1]; + return ReadCore(single, 0, 1) > 0 ? single[0] : -1; + } + int ReadCore(byte[] buffer, int offset, int count) { ThrowIfDisposed(); - var all = GetEncoded(); - if (position < all.Length) + if (count == 0 || + (charPosition >= text.Length && pendingCount == 0 && encoderFlushed)) + { + return 0; + } + if (charPosition == 0 && + pendingCount == 0 && + text.Length <= (int.MaxValue / maxBytesPerChar) - 1 && + count >= encoding.GetMaxByteCount(text.Length)) { - return all[position++]; + var written = EncodeAll(text.Span, buffer, offset, count); + charPosition = text.Length; + encoderFlushed = true; + return written; } - return -1; + var totalBytesWritten = 0; + if (pendingCount > 0) + { + var toCopy = Math.Min(pendingCount, count); + Array.Copy(pendingBytes!, pendingOffset, buffer, offset, toCopy); + pendingOffset += toCopy; + pendingCount -= toCopy; + totalBytesWritten += toCopy; + if (totalBytesWritten == count) + { + return totalBytesWritten; + } + } + if (charPosition < text.Length) + { + var remaining = text.Span.Slice(charPosition); + var availableBytes = count - totalBytesWritten; + if (availableBytes < maxBytesPerChar) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + var charsToEncode = Math.Min(2, remaining.Length); + Convert(remaining.Slice(0, charsToEncode), pendingBytes, pendingBytes.Length, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + var toCopy = Math.Min(bytesUsed, availableBytes); + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + pendingOffset = toCopy; + pendingCount = bytesUsed - toCopy; + } + else + { + Convert(remaining, buffer, offset + totalBytesWritten, availableBytes, flush: false, out var charsUsed, out var bytesUsed); + charPosition += charsUsed; + totalBytesWritten += bytesUsed; + } + } + if (charPosition >= text.Length && + !encoderFlushed && + pendingCount == 0) + { + pendingBytes ??= new byte[encoding.GetMaxByteCount(2)]; + Convert(ReadOnlySpan.Empty, pendingBytes, pendingBytes.Length, flush: true, out _, out var flushBytes); + encoderFlushed = true; + if (flushBytes > 0) + { + var available = count - totalBytesWritten; + var toCopy = Math.Min(flushBytes, available); + if (toCopy > 0) + { + Array.Copy(pendingBytes, 0, buffer, offset + totalBytesWritten, toCopy); + totalBytesWritten += toCopy; + } + if (toCopy < flushBytes) + { + pendingOffset = toCopy; + pendingCount = flushBytes - toCopy; + } + } + } + return totalBytesWritten; + } + Encoder GetEncoder() => encoder ??= encoding.GetEncoder(); + int EncodeAll(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount) + { + var charArray = chars.ToArray(); + return encoding.GetBytes(charArray, 0, charArray.Length, bytes, byteIndex); + } + void Convert(ReadOnlySpan chars, byte[] bytes, int byteCount, bool flush, out int charsUsed, out int bytesUsed) => + Convert(chars, bytes, 0, byteCount, flush, out charsUsed, out bytesUsed); + void Convert(ReadOnlySpan chars, byte[] bytes, int byteIndex, int byteCount, bool flush, out int charsUsed, out int bytesUsed) + { + var charArray = chars.Slice(0, Math.Min(chars.Length, byteCount)).ToArray(); + GetEncoder().Convert(charArray, 0, charArray.Length, bytes, byteIndex, byteCount, flush, out charsUsed, out bytesUsed, out _); } /// public override Task ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) @@ -127,7 +204,11 @@ public override Task ReadAsync(byte[] buffer, int offset, int count, Cancel } try { - return Task.FromResult(Read(buffer, offset, count)); + return Task.FromResult(ReadCore(buffer, offset, count)); + } + catch (OperationCanceledException exception) + { + return Task.FromCanceled(exception.CancellationToken); } catch (Exception exception) { diff --git a/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs b/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs index 6f6a2035..a196bce7 100644 --- a/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs +++ b/src/Tests/PolyfillTests_ReadOnlyMemoryStream.cs @@ -101,6 +101,17 @@ public async Task ReadOnlyMemoryStream_CopyTo() await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3, 4 }); } + [Test] + public async Task ReadOnlyMemoryStream_FromMemoryViaImplicitConversion() + { + var buffer = new byte[] { 1, 2, 3, 4, 5 }; + Memory memory = buffer; + using var stream = new ReadOnlyMemoryStream(memory); + + await Assert.That(stream.Length).IsEqualTo(5L); + await Assert.That(stream.CanRead).IsTrue(); + } + [Test] public async Task ReadOnlyMemoryStream_Empty() { diff --git a/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs b/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs index 66cc44d8..d3c1c7f5 100644 --- a/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs +++ b/src/Tests/PolyfillTests_ReadOnlySequenceStream.cs @@ -73,6 +73,44 @@ public async Task ReadOnlySequenceStream_Seek() await Assert.That(stream.ReadByte()).IsEqualTo(5); } + [Test] + public async Task ReadOnlySequenceStream_SeekAcrossSegments_RepositionsCursor() + { + // bytes by index: 0..8 -> 1,2,3,4,5,6,7,8,9 across three segments. + var sequence = CreateMultiSegment(new byte[] { 1, 2, 3 }, new byte[] { 4, 5 }, new byte[] { 6, 7, 8, 9 }); + using var stream = new ReadOnlySequenceStream(sequence); + + // Forward from the start into the third segment. + stream.Seek(6, SeekOrigin.Begin); + await Assert.That(stream.Position).IsEqualTo(6L); + await Assert.That(stream.ReadByte()).IsEqualTo(7); + + // Backward into the first segment (walk-from-start branch). + stream.Position = 1; + await Assert.That(stream.ReadByte()).IsEqualTo(2); + + // Forward relative to the current cursor into the second segment. + stream.Seek(2, SeekOrigin.Current); + await Assert.That(stream.ReadByte()).IsEqualTo(5); + + // Clamp to the end via End origin. + stream.Seek(0, SeekOrigin.End); + await Assert.That(stream.Position).IsEqualTo(9L); + await Assert.That(stream.ReadByte()).IsEqualTo(-1); + } + + [Test] + public async Task ReadOnlySequenceStream_SeekBeyondLength_ReadsZero() + { + using var stream = new ReadOnlySequenceStream(new ReadOnlySequence(new byte[] { 1, 2, 3 })); + + stream.Position = 10; + + await Assert.That(stream.Position).IsEqualTo(10L); + await Assert.That(stream.ReadByte()).IsEqualTo(-1); + await Assert.That(stream.Read(new byte[4], 0, 4)).IsEqualTo(0); + } + [Test] public async Task ReadOnlySequenceStream_SeekBeforeBeginThrows() { @@ -111,6 +149,48 @@ public async Task ReadOnlySequenceStream_Empty() await Assert.That(stream.Read(new byte[4], 0, 4)).IsEqualTo(0); } + [Test] + public async Task ReadOnlySequenceStream_CopyTo_MultiSegment() + { + var sequence = CreateMultiSegment(new byte[] { 1, 2, 3 }, new byte[] { 4, 5 }, new byte[] { 6, 7, 8, 9 }); + using var stream = new ReadOnlySequenceStream(sequence); + using var target = new MemoryStream(); + + stream.CopyTo(target); + + await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }); + await Assert.That(stream.Position).IsEqualTo(9L); + } + + [Test] + public async Task ReadOnlySequenceStream_CopyTo_AfterPartialRead_CopiesRemainder() + { + var sequence = CreateMultiSegment(new byte[] { 1, 2, 3 }, new byte[] { 4, 5, 6 }); + using var stream = new ReadOnlySequenceStream(sequence); + + var head = new byte[2]; + stream.Read(head, 0, 2); + + using var target = new MemoryStream(); + stream.CopyTo(target); + + await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 3, 4, 5, 6 }); + await Assert.That(stream.Position).IsEqualTo(6L); + } + + [Test] + public async Task ReadOnlySequenceStream_CopyToAsync_MultiSegment() + { + var sequence = CreateMultiSegment(new byte[] { 1, 2 }, new byte[] { 3, 4, 5 }); + using var stream = new ReadOnlySequenceStream(sequence); + using var target = new MemoryStream(); + + await stream.CopyToAsync(target); + + await Assert.That(target.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3, 4, 5 }); + await Assert.That(stream.Position).IsEqualTo(5L); + } + static ReadOnlySequence CreateMultiSegment(params byte[][] parts) { var first = new ReadOnlySequenceStreamSegment(parts[0]); diff --git a/src/Tests/PolyfillTests_StringStream.cs b/src/Tests/PolyfillTests_StringStream.cs index c75e79b7..80d4b390 100644 --- a/src/Tests/PolyfillTests_StringStream.cs +++ b/src/Tests/PolyfillTests_StringStream.cs @@ -55,6 +55,45 @@ public async Task StringStream_ReadInSmallChunks_ReconstructsOutput() await Assert.That(accumulator.ToArray()).IsEquivalentTo(expected); } + [Test] + public async Task StringStream_MultiByteReadByteByByte_MatchesGetBytes() + { + // Reading one byte at a time forces the spillover path: each multi-byte character + // (and the surrogate pair) is encoded into the internal buffer and drained byte by byte. + const string input = "Unicode: 你好世界 🌍 café"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + using var accumulator = new MemoryStream(); + int value; + while ((value = stream.ReadByte()) != -1) + { + accumulator.WriteByte((byte) value); + } + + await Assert.That(accumulator.ToArray()).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_MultiByteBoundarySplit_MatchesGetBytes() + { + // A 7-byte buffer is not a multiple of the 3-byte CJK width, so the encoder must carry + // conversion state across reads without splitting or duplicating a character. + const string input = "你好世界你好世界你好世界"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + using var accumulator = new MemoryStream(); + var buffer = new byte[7]; + int read; + while ((read = stream.Read(buffer, 0, buffer.Length)) > 0) + { + accumulator.Write(buffer, 0, read); + } + + await Assert.That(accumulator.ToArray()).IsEquivalentTo(expected); + } + [Test] public async Task StringStream_ReadByte_ReturnsBytesThenMinusOne() { @@ -157,6 +196,152 @@ public async Task StringStream_AfterDispose_CanReadIsFalse() await Assert.That(stream.CanRead).IsFalse(); } + [Test] + [Arguments("Hello, World! ")] + [Arguments("Unicode: 你好世界 🌍")] + [Arguments("Multi\nLine\r\nText")] + public async Task StringStream_DifferentStrings_MatchGetBytes(string input) + { + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + [Arguments("ASCII text")] + [Arguments("Ñoño español")] + public async Task StringStream_DifferentEncodings_MatchGetBytes(string input) + { + foreach (var encoding in new[] { Encoding.UTF8, Encoding.Unicode, Encoding.UTF32 }) + { + var expected = encoding.GetBytes(input); + using var stream = new StringStream(input, encoding); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + } + + [Test] + public async Task StringStream_SurrogatePairs_MatchGetBytes() + { + const string input = "😀😁😂🤣😃😄"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_MultiByteAcrossChunkBoundary_MatchGetBytes() + { + var input = new string('A', 1023) + "你"; + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_ChunkedReading_LargeInput() + { + var input = new string('A', 10000); + var expected = Encoding.UTF8.GetBytes(input); + using var stream = new StringStream(input, Encoding.UTF8); + + var actual = new byte[expected.Length]; + var total = 0; + const int chunkSize = 512; + int read; + while ((read = stream.Read(actual, total, Math.Min(chunkSize, expected.Length - total))) > 0) + { + total += read; + } + + await Assert.That(total).IsEqualTo(expected.Length); + await Assert.That(actual).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_FastPathGuard_DoesNotOverflow() + { + // GetMaxByteCount(text.Length) would overflow int for this encoding; the fast-path guard + // must short-circuit before calling it rather than throwing OverflowException. + using var stream = new StringStream("hello", new OverflowingEncoding()); + + var threw = false; + try + { + stream.Read(new byte[16], 0, 16); + } + catch (OverflowException) + { + threw = true; + } + + await Assert.That(threw).IsFalse(); + } + + [Test] + public async Task StringStream_MemorySlice_MatchGetBytes() + { + const string source = "0123456789ABCDEFGHIJ"; + var slice = source.AsMemory(5, 10); + var expected = Encoding.UTF8.GetBytes("56789ABCDE"); + using var stream = new StringStream(slice, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_CharArrayMemory_MatchGetBytes() + { + var chars = new[] { 'H', 'e', 'l', 'l', 'o' }; + var expected = Encoding.UTF8.GetBytes("Hello"); + using var stream = new StringStream(new ReadOnlyMemory(chars), Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + [Arguments("Hello")] + [Arguments("Unicode: 你好")] + [Arguments("Emoji: 😀")] + public async Task StringStream_MemoryOverloadMatchesStringOverload(string input) + { + using var fromMemory = new StringStream(input.AsMemory(), Encoding.UTF8); + using var fromString = new StringStream(input, Encoding.UTF8); + + var memoryBytes = ReadToEnd(fromMemory); + var stringBytes = ReadToEnd(fromString); + + await Assert.That(memoryBytes).IsEquivalentTo(stringBytes); + } + + [Test] + public async Task StringStream_TruncatedSurrogate_ProducesReplacementChar() + { + // "A" plus the high surrogate of U+1F30D with no trailing low surrogate. + const string emoji = "A\U0001F30D"; + var truncated = emoji.AsMemory(0, 2); + var expected = Encoding.UTF8.GetBytes("A�"); + using var stream = new StringStream(truncated, Encoding.UTF8); + + await Assert.That(ReadToEnd(stream)).IsEquivalentTo(expected); + } + + [Test] + public async Task StringStream_CopyToAsync_HonorsCancellation() + { + using var stream = new StringStream("hello", Encoding.UTF8); + using var destination = new MemoryStream(); + using var cancelSource = new CancelSource(); + cancelSource.Cancel(); + + await Assert.That(async () => await stream.CopyToAsync(destination, 81920, cancelSource.Token)) + .Throws(); + } + static byte[] ReadToEnd(Stream stream) { using var memory = new MemoryStream(); @@ -169,5 +354,34 @@ static byte[] ReadToEnd(Stream stream) return memory.ToArray(); } + + // An encoding whose GetMaxByteCount overflows int for larger char counts, used to verify the + // StringStream fast-path guard skips GetMaxByteCount rather than throwing OverflowException. + sealed class OverflowingEncoding : + Encoding + { + public override int GetByteCount(char[] chars, int index, int count) => + UTF8.GetByteCount(chars, index, count); + + public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) => + UTF8.GetBytes(chars, charIndex, charCount, bytes, byteIndex); + + public override int GetMaxByteCount(int charCount) => + charCount switch + { + 1 => int.MaxValue, + 2 => 8, + _ => checked((charCount + 1) * int.MaxValue) + }; + + public override int GetCharCount(byte[] bytes, int index, int count) => + throw new NotImplementedException(); + + public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) => + throw new NotImplementedException(); + + public override int GetMaxCharCount(int byteCount) => + throw new NotImplementedException(); + } } #endif diff --git a/src/Tests/PolyfillTests_WritableMemoryStream.cs b/src/Tests/PolyfillTests_WritableMemoryStream.cs index c0f14262..3f420578 100644 --- a/src/Tests/PolyfillTests_WritableMemoryStream.cs +++ b/src/Tests/PolyfillTests_WritableMemoryStream.cs @@ -96,5 +96,77 @@ public async Task WritableMemoryStream_ToArrayReturnsWrittenContent() await Assert.That(stream.ToArray()).IsEquivalentTo(new byte[] { 1, 2, 3 }); } + + [Test] + public async Task WritableMemoryStream_WriteByteBeyondCapacityThrows() + { + using var stream = new WritableMemoryStream(new byte[3]); + + stream.WriteByte(1); + stream.WriteByte(2); + stream.WriteByte(3); + + await Assert.That(() => stream.WriteByte(4)).Throws(); + } + + [Test] + public async Task WritableMemoryStream_WriteUpToExactCapacitySucceeds() + { + using var stream = new WritableMemoryStream(new byte[10]); + var data = new byte[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + + stream.Write(data, 0, data.Length); + + await Assert.That(stream.Position).IsEqualTo(10L); + await Assert.That(stream.Length).IsEqualTo(10L); + + stream.Position = 0; + var readBack = new byte[10]; + var read = stream.Read(readBack, 0, 10); + + await Assert.That(read).IsEqualTo(10); + await Assert.That(readBack).IsEquivalentTo(data); + } + + [Test] + public async Task WritableMemoryStream_WritePastCapacityLeavesPositionUnchanged() + { + using var stream = new WritableMemoryStream(new byte[10]); + stream.Write(new byte[8], 0, 8); + + await Assert.That(stream.Position).IsEqualTo(8L); + await Assert.That(() => stream.Write(new byte[5], 0, 5)).Throws(); + await Assert.That(stream.Position).IsEqualTo(8L); + } + + [Test] + public async Task WritableMemoryStream_SeekPastCapacity() + { + using var stream = new WritableMemoryStream(new byte[10]); + + stream.Seek(100, SeekOrigin.Begin); + + await Assert.That(stream.Position).IsEqualTo(100L); + await Assert.That(stream.ReadByte()).IsEqualTo(-1); + await Assert.That(() => stream.WriteByte(42)).Throws(); + } + + [Test] + public async Task WritableMemoryStream_WriteOverExistingDataReplacesData() + { + var backing = new byte[10]; + using var stream = new WritableMemoryStream(backing); + stream.Write(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 0, 10); + + stream.Position = 3; + stream.Write(new byte[] { 100, 101, 102 }, 0, 3); + + stream.Position = 0; + var result = new byte[10]; + var read = stream.Read(result, 0, 10); + + await Assert.That(read).IsEqualTo(10); + await Assert.That(result).IsEquivalentTo(new byte[] { 1, 2, 3, 100, 101, 102, 7, 8, 9, 10 }); + } } #endif