From f379bfbaf6e9aad2c5ece698cf0b92c12a83d8f4 Mon Sep 17 00:00:00 2001 From: Carlos Sanchez <1175054+carlossanlop@users.noreply.github.com> Date: Thu, 21 Jul 2022 00:34:40 -0700 Subject: [PATCH] Remove code duplication in TarHeader class (#72501) * Missing ConfigureAwait(false) in await using. * Remove argument from TarSizeFieldNegative exception resource string. * TarHeader.TryGetNextHeaderAsync: Reduce duplication. * TarHeader.ReadExtendedAttributesBlockAsync: reduce duplication. * TarHeader.ReadGnuLongPathDataBlockAsync: reduce duplication. * TarHeader.ReadExtendedAttributesFromBuffer: reduce duplication. * TarHeader.WriteAsV7Async: Reduce duplication. * TarHeader.WriteAsUstarAsync: reduce duplication. * TarHeader.*Pax*Async: reduce duplication. * TarHeader.WriteAsGnuAsync: reduce duplication. * Use pretty switch for async writing. Move the sync version to its own method, to keep similar logic close by. * checksum => tmpChecksum for consistency with other similar methods. * Add ExtendedAttributes non-nullable property that initializes private nullable property if needed. * Remove unnecessary buffer cleaning call. Co-authored-by: carlossanlop --- .../src/Resources/Strings.resx | 2 +- .../src/System/Formats/Tar/GnuTarEntry.cs | 4 +- .../PaxGlobalExtendedAttributesTarEntry.cs | 11 +- .../src/System/Formats/Tar/PaxTarEntry.cs | 28 +- .../src/System/Formats/Tar/TarEntry.cs | 2 +- .../src/System/Formats/Tar/TarHeader.Read.cs | 396 ++++++++---------- .../src/System/Formats/Tar/TarHeader.Write.cs | 227 +++++----- .../src/System/Formats/Tar/TarHeader.cs | 10 +- .../src/System/Formats/Tar/TarReader.cs | 8 +- .../src/System/Formats/Tar/TarWriter.cs | 119 +++--- 10 files changed, 362 insertions(+), 445 deletions(-) diff --git a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx index 9ce8016d98721..79e3188410c3c 100644 --- a/src/libraries/System.Formats.Tar/src/Resources/Strings.resx +++ b/src/libraries/System.Formats.Tar/src/Resources/Strings.resx @@ -232,7 +232,7 @@ A POSIX format was expected (Ustar or PAX), but could not be reliably determined for entry '{0}'. - The size field is negative in the tar entry '{0}'. + The size field is negative in a tar entry. The value of the size field for the current entry of type '{0}' is beyond the expected length. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/GnuTarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/GnuTarEntry.cs index 4c13fddfa9ea6..eda97e2b8a26f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/GnuTarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/GnuTarEntry.cs @@ -54,13 +54,13 @@ public GnuTarEntry(TarEntry other) if (other is PaxTarEntry paxOther) { - changedATime = TarHelpers.TryGetDateTimeOffsetFromTimestampString(paxOther._header._extendedAttributes, TarHeader.PaxEaATime, out DateTimeOffset aTime); + changedATime = TarHelpers.TryGetDateTimeOffsetFromTimestampString(paxOther._header.ExtendedAttributes, TarHeader.PaxEaATime, out DateTimeOffset aTime); if (changedATime) { _header._aTime = aTime; } - changedCTime = TarHelpers.TryGetDateTimeOffsetFromTimestampString(paxOther._header._extendedAttributes, TarHeader.PaxEaCTime, out DateTimeOffset cTime); + changedCTime = TarHelpers.TryGetDateTimeOffsetFromTimestampString(paxOther._header.ExtendedAttributes, TarHeader.PaxEaCTime, out DateTimeOffset cTime); if (changedCTime) { _header._cTime = cTime; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxGlobalExtendedAttributesTarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxGlobalExtendedAttributesTarEntry.cs index 7b7c493b171c4..832996693624f 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxGlobalExtendedAttributesTarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxGlobalExtendedAttributesTarEntry.cs @@ -29,20 +29,13 @@ public PaxGlobalExtendedAttributesTarEntry(IEnumerable(globalExtendedAttributes); + _header.InitializeExtendedAttributesWithExisting(globalExtendedAttributes); } /// /// Returns the global extended attributes stored in this entry. /// - public IReadOnlyDictionary GlobalExtendedAttributes - { - get - { - _header._extendedAttributes ??= new Dictionary(); - return _readOnlyGlobalExtendedAttributes ??= _header._extendedAttributes.AsReadOnly(); - } - } + public IReadOnlyDictionary GlobalExtendedAttributes => _readOnlyGlobalExtendedAttributes ??= _header.ExtendedAttributes.AsReadOnly(); // Determines if the current instance's entry type supports setting a data stream. internal override bool IsDataStreamSetterSupported() => false; diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxTarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxTarEntry.cs index 9c80a45e2647e..db317f7a88411 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxTarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/PaxTarEntry.cs @@ -51,7 +51,6 @@ public PaxTarEntry(TarEntryType entryType, string entryName) : base(entryType, entryName, TarEntryFormat.Pax, isGea: false) { _header._prefix = string.Empty; - _header._extendedAttributes = new Dictionary(); Debug.Assert(_header._mTime != default); AddNewAccessAndChangeTimestampsIfNotExist(useMTime: true); @@ -92,7 +91,7 @@ public PaxTarEntry(TarEntryType entryType, string entryName, IEnumerable(extendedAttributes); + _header.InitializeExtendedAttributesWithExisting(extendedAttributes); Debug.Assert(_header._mTime != default); AddNewAccessAndChangeTimestampsIfNotExist(useMTime: true); @@ -111,15 +110,14 @@ public PaxTarEntry(TarEntry other) if (other is PaxTarEntry paxOther) { - _header._extendedAttributes = new Dictionary(paxOther.ExtendedAttributes); + _header.InitializeExtendedAttributesWithExisting(paxOther.ExtendedAttributes); } else { - _header._extendedAttributes = new Dictionary(); if (other is GnuTarEntry gnuOther) { - _header._extendedAttributes[TarHeader.PaxEaATime] = TarHelpers.GetTimestampStringFromDateTimeOffset(gnuOther.AccessTime); - _header._extendedAttributes[TarHeader.PaxEaCTime] = TarHelpers.GetTimestampStringFromDateTimeOffset(gnuOther.ChangeTime); + _header.ExtendedAttributes[TarHeader.PaxEaATime] = TarHelpers.GetTimestampStringFromDateTimeOffset(gnuOther.AccessTime); + _header.ExtendedAttributes[TarHeader.PaxEaCTime] = TarHelpers.GetTimestampStringFromDateTimeOffset(gnuOther.ChangeTime); } } @@ -144,14 +142,7 @@ public PaxTarEntry(TarEntry other) /// File length, under the name size, as an , if the string representation of the number is larger than 12 bytes. /// /// - public IReadOnlyDictionary ExtendedAttributes - { - get - { - _header._extendedAttributes ??= new Dictionary(); - return _readOnlyExtendedAttributes ??= _header._extendedAttributes.AsReadOnly(); - } - } + public IReadOnlyDictionary ExtendedAttributes => _readOnlyExtendedAttributes ??= _header.ExtendedAttributes.AsReadOnly(); // Determines if the current instance's entry type supports setting a data stream. internal override bool IsDataStreamSetterSupported() => EntryType == TarEntryType.RegularFile; @@ -162,9 +153,8 @@ public IReadOnlyDictionary ExtendedAttributes private void AddNewAccessAndChangeTimestampsIfNotExist(bool useMTime) { Debug.Assert(!useMTime || (useMTime && _header._mTime != default)); - Debug.Assert(_header._extendedAttributes != null); - bool containsATime = _header._extendedAttributes.ContainsKey(TarHeader.PaxEaATime); - bool containsCTime = _header._extendedAttributes.ContainsKey(TarHeader.PaxEaCTime); + bool containsATime = _header.ExtendedAttributes.ContainsKey(TarHeader.PaxEaATime); + bool containsCTime = _header.ExtendedAttributes.ContainsKey(TarHeader.PaxEaCTime); if (!containsATime || !containsCTime) { @@ -172,12 +162,12 @@ private void AddNewAccessAndChangeTimestampsIfNotExist(bool useMTime) if (!containsATime) { - _header._extendedAttributes[TarHeader.PaxEaATime] = secondsFromEpochString; + _header.ExtendedAttributes[TarHeader.PaxEaATime] = secondsFromEpochString; } if (!containsCTime) { - _header._extendedAttributes[TarHeader.PaxEaCTime] = secondsFromEpochString; + _header.ExtendedAttributes[TarHeader.PaxEaCTime] = secondsFromEpochString; } } } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs index b581355a4bf2a..ea965322d7809 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarEntry.cs @@ -529,7 +529,7 @@ private async Task ExtractAsRegularFileAsync(string destinationFileName, Cancell // Rely on FileStream's ctor for further checking destinationFileName parameter FileStream fs = new FileStream(destinationFileName, CreateFileStreamOptions(isAsync: true)); - await using (fs) + await using (fs.ConfigureAwait(false)) { if (DataStream != null) { diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs index 05c5017a16461..e89c9d4579dd3 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs @@ -27,39 +27,12 @@ internal sealed partial class TarHeader archiveStream.ReadExactly(buffer); - TarHeader header = new(initialFormat); - - // Confirms if v7 or pax, or tentatively selects ustar - if (!header.TryReadCommonAttributes(buffer)) - { - return null; - } - - // Confirms if gnu, or tentatively selects ustar - header.ReadMagicAttribute(buffer); - - if (header._format != TarEntryFormat.V7) + TarHeader? header = TryReadAttributes(initialFormat, buffer); + if (header != null) { - // Confirms if gnu - header.ReadVersionAttribute(buffer); - - // Fields that ustar, pax and gnu share identically - header.ReadPosixAndGnuSharedAttributes(buffer); - - Debug.Assert(header._format is TarEntryFormat.Ustar or TarEntryFormat.Pax or TarEntryFormat.Gnu); - if (header._format == TarEntryFormat.Ustar) - { - header.ReadUstarAttributes(buffer); - } - else if (header._format == TarEntryFormat.Gnu) - { - header.ReadGnuAttributes(buffer); - } - // In PAX, there is nothing to read in this section (empty space) + header.ProcessDataBlock(archiveStream, copyData); } - header.ProcessDataBlock(archiveStream, copyData); - return header; } @@ -70,118 +43,52 @@ internal sealed partial class TarHeader { cancellationToken.ThrowIfCancellationRequested(); - TarHeader header = new(initialFormat); - // The four supported formats have a header that fits in the default record size byte[] rented = ArrayPool.Shared.Rent(minimumLength: TarHelpers.RecordSize); Memory buffer = rented.AsMemory(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger - buffer.Span.Clear(); // Rented arrays aren't clean await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - // Confirms if v7 or pax, or tentatively selects ustar - if (!header.TryReadCommonAttributes(buffer.Span)) + TarHeader? header = TryReadAttributes(initialFormat, buffer.Span); + if (header != null) { - return null; + await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false); } - // Confirms if gnu, or tentatively selects ustar - header.ReadMagicAttribute(buffer.Span); + ArrayPool.Shared.Return(rented); - if (header._format != TarEntryFormat.V7) - { - // Confirms if gnu - header.ReadVersionAttribute(buffer.Span); + return header; + } - // Fields that ustar, pax and gnu share identically - header.ReadPosixAndGnuSharedAttributes(buffer.Span); + private static TarHeader? TryReadAttributes(TarEntryFormat initialFormat, Span buffer) + { + // Confirms if v7 or pax, or tentatively selects ustar + TarHeader? header = TryReadCommonAttributes(buffer, initialFormat); + if (header != null) + { + // Confirms if gnu, or tentatively selects ustar + header.ReadMagicAttribute(buffer); - Debug.Assert(header._format is TarEntryFormat.Ustar or TarEntryFormat.Pax or TarEntryFormat.Gnu); - if (header._format == TarEntryFormat.Ustar) - { - header.ReadUstarAttributes(buffer.Span); - } - else if (header._format == TarEntryFormat.Gnu) + if (header._format != TarEntryFormat.V7) { - header.ReadGnuAttributes(buffer.Span); - } - // In PAX, there is nothing to read in this section (empty space) - } + // Confirms if gnu + header.ReadVersionAttribute(buffer); - bool skipBlockAlignmentPadding = true; + // Fields that ustar, pax and gnu share identically + header.ReadPosixAndGnuSharedAttributes(buffer); - string? longPath; - switch (header._typeFlag) - { - case TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes: - Debug.Assert(header._name != null); - header._extendedAttributes = await ReadExtendedAttributesBlockAsync(archiveStream, header._typeFlag, header._size, header._name, cancellationToken).ConfigureAwait(false); - break; - - case TarEntryType.LongLink: - longPath = await ReadGnuLongPathDataBlockAsync(archiveStream, header._typeFlag, header._size, cancellationToken).ConfigureAwait(false); - if (longPath != null) + Debug.Assert(header._format is TarEntryFormat.Ustar or TarEntryFormat.Pax or TarEntryFormat.Gnu); + if (header._format == TarEntryFormat.Ustar) { - header._linkName = longPath; + header.ReadUstarAttributes(buffer); } - break; - - case TarEntryType.LongPath: - longPath = await ReadGnuLongPathDataBlockAsync(archiveStream, header._typeFlag, header._size, cancellationToken).ConfigureAwait(false); - if (longPath != null) + else if (header._format == TarEntryFormat.Gnu) { - header._name = longPath; + header.ReadGnuAttributes(buffer); } - break; - - case TarEntryType.BlockDevice: - case TarEntryType.CharacterDevice: - case TarEntryType.Directory: - case TarEntryType.Fifo: - case TarEntryType.HardLink: - case TarEntryType.SymbolicLink: - // No data section - break; - - case TarEntryType.RegularFile: - case TarEntryType.V7RegularFile: // Treated as regular file - case TarEntryType.ContiguousFile: // Treated as regular file - case TarEntryType.DirectoryList: // Contains the list of filesystem entries in the data section - case TarEntryType.MultiVolume: // Contains portion of a file - case TarEntryType.RenamedOrSymlinked: // Might contain data - case TarEntryType.SparseFile: // Contains portion of a file - case TarEntryType.TapeVolume: // Might contain data - default: // Unrecognized entry types could potentially have a data section - header._dataStream = await GetDataStreamAsync(archiveStream, copyData, header._size, cancellationToken).ConfigureAwait(false); - if (header._dataStream is SeekableSubReadStream) - { - await TarHelpers.AdvanceStreamAsync(archiveStream, header._size, cancellationToken).ConfigureAwait(false); - } - else if (header._dataStream is SubReadStream) - { - // This stream gives the user the chance to optionally read the data section - // when the underlying archive stream is unseekable - skipBlockAlignmentPadding = false; - } - - break; - } - - if (skipBlockAlignmentPadding) - { - if (header._size > 0) - { - await TarHelpers.SkipBlockAlignmentPaddingAsync(archiveStream, header._size, cancellationToken).ConfigureAwait(false); - } - - if (archiveStream.CanSeek) - { - header._endOfHeaderAndDataAndBlockAlignment = archiveStream.Position; + // In PAX, there is nothing to read in this section (empty space) } } - - ArrayPool.Shared.Return(rented); - return header; } @@ -192,80 +99,77 @@ internal sealed partial class TarHeader // Throws if any conversion from string to the expected data type fails. internal void ReplaceNormalAttributesWithExtended(Dictionary? dictionaryFromExtendedAttributesHeader) { - // At this point, the header is being created, so this should be the first time we fill the extended attributes dictionary - Debug.Assert(_extendedAttributes == null); - if (dictionaryFromExtendedAttributesHeader == null || dictionaryFromExtendedAttributesHeader.Count == 0) { return; } - _extendedAttributes = dictionaryFromExtendedAttributesHeader; + InitializeExtendedAttributesWithExisting(dictionaryFromExtendedAttributesHeader); // Find all the extended attributes with known names and save them in the expected standard attribute. // The 'name' header field only fits 100 bytes, so we always store the full name text to the dictionary. - if (_extendedAttributes.TryGetValue(PaxEaName, out string? paxEaName)) + if (ExtendedAttributes.TryGetValue(PaxEaName, out string? paxEaName)) { _name = paxEaName; } // The 'linkName' header field only fits 100 bytes, so we always store the full linkName text to the dictionary. - if (_extendedAttributes.TryGetValue(PaxEaLinkName, out string? paxEaLinkName)) + if (ExtendedAttributes.TryGetValue(PaxEaLinkName, out string? paxEaLinkName)) { _linkName = paxEaLinkName; } // The 'mtime' header field only fits 12 bytes, so a more precise timestamp goes in the extended attributes - if (TarHelpers.TryGetDateTimeOffsetFromTimestampString(_extendedAttributes, PaxEaMTime, out DateTimeOffset mTime)) + if (TarHelpers.TryGetDateTimeOffsetFromTimestampString(ExtendedAttributes, PaxEaMTime, out DateTimeOffset mTime)) { _mTime = mTime; } // The user could've stored an override in the extended attributes - if (TarHelpers.TryGetStringAsBaseTenInteger(_extendedAttributes, PaxEaMode, out int mode)) + if (TarHelpers.TryGetStringAsBaseTenInteger(ExtendedAttributes, PaxEaMode, out int mode)) { _mode = mode; } // The 'size' header field only fits 12 bytes, so the data section length that surpases that limit needs to be retrieved - if (TarHelpers.TryGetStringAsBaseTenLong(_extendedAttributes, PaxEaSize, out long size)) + if (TarHelpers.TryGetStringAsBaseTenLong(ExtendedAttributes, PaxEaSize, out long size)) { _size = size; } // The 'uid' header field only fits 8 bytes, or the user could've stored an override in the extended attributes - if (TarHelpers.TryGetStringAsBaseTenInteger(_extendedAttributes, PaxEaUid, out int uid)) + if (TarHelpers.TryGetStringAsBaseTenInteger(ExtendedAttributes, PaxEaUid, out int uid)) { _uid = uid; } // The 'gid' header field only fits 8 bytes, or the user could've stored an override in the extended attributes - if (TarHelpers.TryGetStringAsBaseTenInteger(_extendedAttributes, PaxEaGid, out int gid)) + if (TarHelpers.TryGetStringAsBaseTenInteger(ExtendedAttributes, PaxEaGid, out int gid)) { _gid = gid; } // The 'uname' header field only fits 32 bytes - if (_extendedAttributes.TryGetValue(PaxEaUName, out string? paxEaUName)) + if (ExtendedAttributes.TryGetValue(PaxEaUName, out string? paxEaUName)) { _uName = paxEaUName; } // The 'gname' header field only fits 32 bytes - if (_extendedAttributes.TryGetValue(PaxEaGName, out string? paxEaGName)) + if (ExtendedAttributes.TryGetValue(PaxEaGName, out string? paxEaGName)) { _gName = paxEaGName; } // The 'devmajor' header field only fits 8 bytes, or the user could've stored an override in the extended attributes - if (TarHelpers.TryGetStringAsBaseTenInteger(_extendedAttributes, PaxEaDevMajor, out int devMajor)) + if (TarHelpers.TryGetStringAsBaseTenInteger(ExtendedAttributes, PaxEaDevMajor, out int devMajor)) { _devMajor = devMajor; } // The 'devminor' header field only fits 8 bytes, or the user could've stored an override in the extended attributes - if (TarHelpers.TryGetStringAsBaseTenInteger(_extendedAttributes, PaxEaDevMinor, out int devMinor)) + if (TarHelpers.TryGetStringAsBaseTenInteger(ExtendedAttributes, PaxEaDevMinor, out int devMinor)) { _devMinor = devMinor; } @@ -334,6 +238,64 @@ private void ProcessDataBlock(Stream archiveStream, bool copyData) } } + private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, CancellationToken cancellationToken) + { + bool skipBlockAlignmentPadding = true; + + switch (_typeFlag) + { + case TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes: + await ReadExtendedAttributesBlockAsync(archiveStream, cancellationToken).ConfigureAwait(false); + break; + case TarEntryType.LongLink or TarEntryType.LongPath: + await ReadGnuLongPathDataBlockAsync(archiveStream, cancellationToken).ConfigureAwait(false); + break; + case TarEntryType.BlockDevice: + case TarEntryType.CharacterDevice: + case TarEntryType.Directory: + case TarEntryType.Fifo: + case TarEntryType.HardLink: + case TarEntryType.SymbolicLink: + // No data section + break; + case TarEntryType.RegularFile: + case TarEntryType.V7RegularFile: // Treated as regular file + case TarEntryType.ContiguousFile: // Treated as regular file + case TarEntryType.DirectoryList: // Contains the list of filesystem entries in the data section + case TarEntryType.MultiVolume: // Contains portion of a file + case TarEntryType.RenamedOrSymlinked: // Might contain data + case TarEntryType.SparseFile: // Contains portion of a file + case TarEntryType.TapeVolume: // Might contain data + default: // Unrecognized entry types could potentially have a data section + _dataStream = await GetDataStreamAsync(archiveStream, copyData, _size, cancellationToken).ConfigureAwait(false); + if (_dataStream is SeekableSubReadStream) + { + await TarHelpers.AdvanceStreamAsync(archiveStream, _size, cancellationToken).ConfigureAwait(false); + } + else if (_dataStream is SubReadStream) + { + // This stream gives the user the chance to optionally read the data section + // when the underlying archive stream is unseekable + skipBlockAlignmentPadding = false; + } + + break; + } + + if (skipBlockAlignmentPadding) + { + if (_size > 0) + { + await TarHelpers.SkipBlockAlignmentPaddingAsync(archiveStream, _size, cancellationToken).ConfigureAwait(false); + } + + if (archiveStream.CanSeek) + { + _endOfHeaderAndDataAndBlockAlignment = archiveStream.Position; + } + } + } + // Returns a stream that represents the data section of the current header. // If copyData is true, then a total number of _size bytes will be copied to a new MemoryStream, which is then returned. // Otherwise, if the archive stream is seekable, returns a seekable wrapper stream. @@ -385,7 +347,7 @@ private void ProcessDataBlock(Stream archiveStream, bool copyData) // Attempts to read the fields shared by all formats and stores them in their expected data type. // Throws if any data type conversion fails. // Returns true on success, false if checksum is zero. - private bool TryReadCommonAttributes(Span buffer) + private static TarHeader? TryReadCommonAttributes(Span buffer, TarEntryFormat initialFormat) { // Start by collecting fields that need special checks that return early when data is wrong @@ -393,35 +355,38 @@ private bool TryReadCommonAttributes(Span buffer) Span spanChecksum = buffer.Slice(FieldLocations.Checksum, FieldLengths.Checksum); if (TarHelpers.IsAllNullBytes(spanChecksum)) { - return false; + return null; } - _checksum = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(spanChecksum); + int checksum = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(spanChecksum); // Zero checksum means the whole header is empty - if (_checksum == 0) + if (checksum == 0) { - return false; + return null; } - _size = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); - if (_size < 0) + long size = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Size, FieldLengths.Size)); + if (size < 0) { - throw new FormatException(string.Format(SR.TarSizeFieldNegative, _name)); + throw new FormatException(string.Format(SR.TarSizeFieldNegative)); } // Continue with the rest of the fields that require no special checks - - _name = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)); - _mode = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)); - _uid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)); - _gid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)); - long mTime = TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)); - _mTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(mTime); - _typeFlag = (TarEntryType)buffer[FieldLocations.TypeFlag]; - _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)); - - if (_format == TarEntryFormat.Unknown) - { - _format = _typeFlag switch + TarHeader header = new(initialFormat, + name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)), + mode: TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)), + mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.GetTenBaseLongFromOctalAsciiChars(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))), + typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag]) + { + _checksum = checksum, + _size = size, + _uid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)), + _gid = TarHelpers.GetTenBaseNumberFromOctalAsciiChars(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)), + _linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName)) + }; + + if (header._format == TarEntryFormat.Unknown) + { + header._format = header._typeFlag switch { TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes => TarEntryFormat.Pax, @@ -439,11 +404,11 @@ TarEntryType.SparseFile or // We can quickly determine the *minimum* possible format if the entry type // is the POSIX 'RegularFile', although later we could upgrade it to PAX or GNU - _ => (_typeFlag == TarEntryType.RegularFile) ? TarEntryFormat.Ustar : TarEntryFormat.V7 + _ => (header._typeFlag == TarEntryType.RegularFile) ? TarEntryFormat.Ustar : TarEntryFormat.V7 }; } - return true; + return header; } // Reads fields only found in ustar format or above and converts them to their expected data type. @@ -553,74 +518,62 @@ private void ReadUstarAttributes(Span buffer) // Throws if end of stream is reached or if an attribute is malformed. private void ReadExtendedAttributesBlock(Stream archiveStream) { - Debug.Assert(_typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes); - - // This should be the first time we read the extended attributes directly from the stream block - Debug.Assert(_extendedAttributes == null); - - if (_size == 0) - { - return; - } - - // It is not expected that the extended attributes data section will be longer than Array.MaxLength, considering - // 4096 is a common max path length, and also the size field is 12 bytes long, which is under int.MaxValue. - if (_size > Array.MaxLength) + byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); + if (buffer != null) { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); + archiveStream.ReadExactly(buffer); + ReadExtendedAttributesFromBuffer(buffer, _name); } - - byte[] buffer = new byte[(int)_size]; - archiveStream.ReadExactly(buffer); - - _extendedAttributes = ReadExtendedAttributesFromBuffer(buffer, _name); } // Asynchronously collects the extended attributes found in the data section of a PAX entry of type 'x' or 'g'. // Throws if end of stream is reached or if an attribute is malformed. - private static async ValueTask?> ReadExtendedAttributesBlockAsync(Stream archiveStream, TarEntryType entryType, long size, string name, CancellationToken cancellationToken) + private async ValueTask ReadExtendedAttributesBlockAsync(Stream archiveStream, CancellationToken cancellationToken) { - Debug.Assert(entryType is TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes); - cancellationToken.ThrowIfCancellationRequested(); + byte[]? buffer = CreateExtendedAttributesBufferIfSizeIsValid(); + if (buffer != null) + { + await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); + ReadExtendedAttributesFromBuffer(buffer, _name); + } + } + + // Return a byte array if the size field has a valid value for extended attributes. Otherwise, return null, or throw. + private byte[]? CreateExtendedAttributesBufferIfSizeIsValid() + { + Debug.Assert(_typeFlag is TarEntryType.ExtendedAttributes or TarEntryType.GlobalExtendedAttributes); // It is not expected that the extended attributes data section will be longer than Array.MaxLength, considering - // 4096 is a common max path length, and also the size field is 12 bytes long, which is under int.MaxValue. - if (size > Array.MaxLength) + // the size field is 12 bytes long, which fits a number with a value under int.MaxValue. + if (_size > Array.MaxLength) { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, entryType.ToString())); + throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); } - if (size == 0) + if (_size == 0) { return null; } - byte[] buffer = new byte[(int)size]; - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - - return ReadExtendedAttributesFromBuffer(buffer, name); + return new byte[(int)_size]; } // Returns a dictionary containing the extended attributes collected from the provided byte buffer. - private static Dictionary ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) + private void ReadExtendedAttributesFromBuffer(ReadOnlySpan buffer, string name) { - Dictionary extendedAttributes = new(); - string dataAsString = TarHelpers.GetTrimmedUtf8String(buffer); using StringReader reader = new(dataAsString); while (TryGetNextExtendedAttribute(reader, out string? key, out string? value)) { - if (extendedAttributes.ContainsKey(key)) + if (ExtendedAttributes.ContainsKey(key)) { throw new FormatException(string.Format(SR.TarDuplicateExtendedAttribute, name)); } - extendedAttributes.Add(key, value); + ExtendedAttributes.Add(key, value); } - - return extendedAttributes; } // Reads the long path found in the data section of a GNU entry of type 'K' or 'L' @@ -628,21 +581,31 @@ private static Dictionary ReadExtendedAttributesFromBuffer(ReadO // Throws if end of stream is reached. private void ReadGnuLongPathDataBlock(Stream archiveStream) { - Debug.Assert(_typeFlag is TarEntryType.LongLink or TarEntryType.LongPath); - - if (_size > Array.MaxLength) + byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); + if (buffer != null) { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); + archiveStream.ReadExactly(buffer); + ReadGnuLongPathDataFromBuffer(buffer); } + } - if (_size == 0) + // Asynchronously reads the long path found in the data section of a GNU entry of type 'K' or 'L' + // and replaces Name or LinkName, respectively, with the found string. + // Throws if end of stream is reached. + private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, CancellationToken cancellationToken) + { + cancellationToken.ThrowIfCancellationRequested(); + byte[]? buffer = CreateGnuLongDataBufferIfSizeIsValid(); + if (buffer != null) { - return; + await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); + ReadGnuLongPathDataFromBuffer(buffer); } + } - byte[] buffer = new byte[(int)_size]; - archiveStream.ReadExactly(buffer); - + // Collects the GNU long path info from the buffer and sets it in the right field depending on the type flag. + private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan buffer) + { string longPath = TarHelpers.GetTrimmedUtf8String(buffer); if (_typeFlag == TarEntryType.LongLink) @@ -655,29 +618,22 @@ private void ReadGnuLongPathDataBlock(Stream archiveStream) } } - // Asynchronously reads the long path found in the data section of a GNU entry of type 'K' or 'L' - // and replaces Name or LinkName, respectively, with the found string. - // Throws if end of stream is reached. - private static async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, TarEntryType entryType, long size, CancellationToken cancellationToken) + // Return a byte array if the size field has a valid value for GNU long metadata entry data. Otherwise, return null, or throw. + private byte[]? CreateGnuLongDataBufferIfSizeIsValid() { - Debug.Assert(entryType is TarEntryType.LongLink or TarEntryType.LongPath); - - cancellationToken.ThrowIfCancellationRequested(); + Debug.Assert(_typeFlag is TarEntryType.LongLink or TarEntryType.LongPath); - if (size == 0) + if (_size > Array.MaxLength) { - return null; + throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, _typeFlag.ToString())); } - if (size > Array.MaxLength) + if (_size == 0) { - throw new InvalidOperationException(string.Format(SR.TarSizeFieldTooLargeForEntryType, entryType.ToString())); + return null; } - byte[] buffer = new byte[(int)size]; - await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false); - - return TarHelpers.GetTrimmedUtf8String(buffer); + return new byte[(int)_size]; } // Tries to collect the next extended attribute from the string wrapped by the specified reader. diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs index a808378c8fddd..ad5cdae22d031 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Write.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; using System.IO; using System.Text; using System.Threading; @@ -29,12 +30,7 @@ internal sealed partial class TarHeader // Writes the current header as a V7 entry into the archive stream. internal void WriteAsV7(Stream archiveStream, Span buffer) { - long actualLength = GetTotalDataBytesToWrite(); - TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - - int checksum = WriteName(buffer, out _); - checksum += WriteCommonFields(buffer, actualLength, actualEntryType); - _checksum = WriteChecksum(checksum, buffer); + long actualLength = WriteV7FieldsToBuffer(buffer); archiveStream.Write(buffer); @@ -45,16 +41,11 @@ internal void WriteAsV7(Stream archiveStream, Span buffer) } // Asynchronously writes the current header as a V7 entry into the archive stream and returns the value of the final checksum. - internal async Task WriteAsV7Async(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + internal async Task WriteAsV7Async(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - long actualLength = GetTotalDataBytesToWrite(); - TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); - - int tmpChecksum = WriteName(buffer.Span, out _); - tmpChecksum += WriteCommonFields(buffer.Span, actualLength, actualEntryType); - int checksum = WriteChecksum(tmpChecksum, buffer.Span); + long actualLength = WriteV7FieldsToBuffer(buffer.Span); await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); @@ -62,21 +53,25 @@ internal async Task WriteAsV7Async(Stream archiveStream, Memory buffe { await WriteDataAsync(archiveStream, _dataStream, actualLength, cancellationToken).ConfigureAwait(false); } + } - return checksum; + // Writes the V7 header fields to the specified buffer, calculates and writes the checksum, then returns the final data length. + private long WriteV7FieldsToBuffer(Span buffer) + { + long actualLength = GetTotalDataBytesToWrite(); + TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.V7, _typeFlag); + + int tmpChecksum = WriteName(buffer, out _); + tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType); + _checksum = WriteChecksum(tmpChecksum, buffer); + + return actualLength; } // Writes the current header as a Ustar entry into the archive stream. internal void WriteAsUstar(Stream archiveStream, Span buffer) { - long actualLength = GetTotalDataBytesToWrite(); - TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Ustar, _typeFlag); - - int checksum = WritePosixName(buffer); - checksum += WriteCommonFields(buffer, actualLength, actualEntryType); - checksum += WritePosixMagicAndVersion(buffer); - checksum += WritePosixAndGnuSharedFields(buffer); - _checksum = WriteChecksum(checksum, buffer); + long actualLength = WriteUstarFieldsToBuffer(buffer); archiveStream.Write(buffer); @@ -87,18 +82,11 @@ internal void WriteAsUstar(Stream archiveStream, Span buffer) } // Asynchronously rites the current header as a Ustar entry into the archive stream and returns the value of the final checksum. - internal async Task WriteAsUstarAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + internal async Task WriteAsUstarAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - long actualLength = GetTotalDataBytesToWrite(); - TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Ustar, _typeFlag); - - int tmpChecksum = WritePosixName(buffer.Span); - tmpChecksum += WriteCommonFields(buffer.Span, actualLength, actualEntryType); - tmpChecksum += WritePosixMagicAndVersion(buffer.Span); - tmpChecksum += WritePosixAndGnuSharedFields(buffer.Span); - int checksum = WriteChecksum(tmpChecksum, buffer.Span); + long actualLength = WriteUstarFieldsToBuffer(buffer.Span); await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); @@ -106,33 +94,47 @@ internal async Task WriteAsUstarAsync(Stream archiveStream, Memory bu { await WriteDataAsync(archiveStream, _dataStream, actualLength, cancellationToken).ConfigureAwait(false); } + } - return checksum; + // Writes the Ustar header fields to the specified buffer, calculates and writes the checksum, then returns the final data length. + private long WriteUstarFieldsToBuffer(Span buffer) + { + long actualLength = GetTotalDataBytesToWrite(); + TarEntryType actualEntryType = TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Ustar, _typeFlag); + + int tmpChecksum = WritePosixName(buffer); + tmpChecksum += WriteCommonFields(buffer, actualLength, actualEntryType); + tmpChecksum += WritePosixMagicAndVersion(buffer); + tmpChecksum += WritePosixAndGnuSharedFields(buffer); + _checksum = WriteChecksum(tmpChecksum, buffer); + + return actualLength; } // Writes the current header as a PAX Global Extended Attributes entry into the archive stream. internal void WriteAsPaxGlobalExtendedAttributes(Stream archiveStream, Span buffer, int globalExtendedAttributesEntryNumber) { - Debug.Assert(_typeFlag is TarEntryType.GlobalExtendedAttributes); - - _name = GenerateGlobalExtendedAttributeName(globalExtendedAttributesEntryNumber); - _extendedAttributes ??= new Dictionary(); - WriteAsPaxExtendedAttributes(archiveStream, buffer, _extendedAttributes, isGea: true); + VerifyGlobalExtendedAttributesDataIsValid(globalExtendedAttributesEntryNumber); + WriteAsPaxExtendedAttributes(archiveStream, buffer, ExtendedAttributes, isGea: true, globalExtendedAttributesEntryNumber); } // Writes the current header as a PAX Global Extended Attributes entry into the archive stream and returns the value of the final checksum. - internal Task WriteAsPaxGlobalExtendedAttributesAsync(Stream archiveStream, Memory buffer, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) + internal Task WriteAsPaxGlobalExtendedAttributesAsync(Stream archiveStream, Memory buffer, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) { - Debug.Assert(_typeFlag is TarEntryType.GlobalExtendedAttributes); - if (cancellationToken.IsCancellationRequested) { return Task.FromCanceled(cancellationToken); } - _name = GenerateGlobalExtendedAttributeName(globalExtendedAttributesEntryNumber); - _extendedAttributes ??= new Dictionary(); - return WriteAsPaxExtendedAttributesAsync(archiveStream, buffer, _extendedAttributes, isGea: true, cancellationToken); + VerifyGlobalExtendedAttributesDataIsValid(globalExtendedAttributesEntryNumber); + return WriteAsPaxExtendedAttributesAsync(archiveStream, buffer, ExtendedAttributes, isGea: true, globalExtendedAttributesEntryNumber, cancellationToken); + } + + // Verifies the data is valid for writing a Global Extended Attributes entry. + private void VerifyGlobalExtendedAttributesDataIsValid(int globalExtendedAttributesEntryNumber) + { + Debug.Assert(_typeFlag is TarEntryType.GlobalExtendedAttributes); + Debug.Assert(globalExtendedAttributesEntryNumber >= 0); } // Writes the current header as a PAX entry into the archive stream. @@ -146,9 +148,7 @@ internal void WriteAsPax(Stream archiveStream, Span buffer) // Fill the current header's dict CollectExtendedAttributesFromStandardFieldsIfNeeded(); // And pass the attributes to the preceding extended attributes header for writing - Debug.Assert(_extendedAttributes != null); - extendedAttributesHeader.WriteAsPaxExtendedAttributes(archiveStream, buffer, _extendedAttributes, isGea: false); - + extendedAttributesHeader.WriteAsPaxExtendedAttributes(archiveStream, buffer, ExtendedAttributes, isGea: false, globalExtendedAttributesEntryNumber: -1); buffer.Clear(); // Reset it to reuse it // Second, we write this header as a normal one WriteAsPaxInternal(archiveStream, buffer); @@ -156,23 +156,22 @@ internal void WriteAsPax(Stream archiveStream, Span buffer) // Asynchronously writes the current header as a PAX entry into the archive stream. // Makes sure to add the preceding exteded attributes entry before the actual entry. - internal async Task WriteAsPaxAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + internal async Task WriteAsPaxAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { + Debug.Assert(_typeFlag is not TarEntryType.GlobalExtendedAttributes); + cancellationToken.ThrowIfCancellationRequested(); // First, we write the preceding extended attributes header TarHeader extendedAttributesHeader = new(TarEntryFormat.Pax); // Fill the current header's dict CollectExtendedAttributesFromStandardFieldsIfNeeded(); - // And pass them to the extended attributes header for writing - _extendedAttributes ??= new Dictionary(); - - // Don't need to store the returned checksum, we only need it written to the archive in the extended attributes entry header - await extendedAttributesHeader.WriteAsPaxExtendedAttributesAsync(archiveStream, buffer, _extendedAttributes, isGea: false, cancellationToken).ConfigureAwait(false); + // And pass the attributes to the preceding extended attributes header for writing + await extendedAttributesHeader.WriteAsPaxExtendedAttributesAsync(archiveStream, buffer, ExtendedAttributes, isGea: false, globalExtendedAttributesEntryNumber: -1, cancellationToken).ConfigureAwait(false); buffer.Span.Clear(); // Reset it to reuse it // Second, we write this header as a normal one - return await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); } // Writes the current header as a Gnu entry into the archive stream. @@ -201,7 +200,7 @@ internal void WriteAsGnu(Stream archiveStream, Span buffer) // Writes the current header as a Gnu entry into the archive stream. // Makes sure to add the preceding LongLink and/or LongPath entries if necessary, before the actual entry. - internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); @@ -222,7 +221,7 @@ internal async Task WriteAsGnuAsync(Stream archiveStream, Memory buff } // Third, we write this header as a normal one - return await WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + await WriteAsGnuInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); } // Creates and returns a GNU long metadata header, with the specified long text written into its data stream. @@ -273,7 +272,7 @@ private static TarHeader GetDefaultGnuLongMetadataHeader(int longTextLength, Tar // Writes the current header as a GNU entry into the archive stream. internal void WriteAsGnuInternal(Stream archiveStream, Span buffer) { - WriteAsGnuSharedInternal(buffer, out long actualLength, out _checksum); + WriteAsGnuSharedInternal(buffer, out long actualLength); archiveStream.Write(buffer); @@ -284,11 +283,11 @@ internal void WriteAsGnuInternal(Stream archiveStream, Span buffer) } // Asynchronously writes the current header as a GNU entry into the archive stream. - internal async Task WriteAsGnuInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + internal async Task WriteAsGnuInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - WriteAsGnuSharedInternal(buffer.Span, out long actualLength, out int checksum); + WriteAsGnuSharedInternal(buffer.Span, out long actualLength); await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); @@ -296,12 +295,10 @@ internal async Task WriteAsGnuInternalAsync(Stream archiveStream, Memory buffer, out long actualLength, out int checksum) + private void WriteAsGnuSharedInternal(Span buffer, out long actualLength) { actualLength = GetTotalDataBytesToWrite(); @@ -310,54 +307,46 @@ private void WriteAsGnuSharedInternal(Span buffer, out long actualLength, tmpChecksum += WriteGnuMagicAndVersion(buffer); tmpChecksum += WritePosixAndGnuSharedFields(buffer); tmpChecksum += WriteGnuFields(buffer); - checksum = WriteChecksum(tmpChecksum, buffer); + + _checksum = WriteChecksum(tmpChecksum, buffer); } // Writes the current header as a PAX Extended Attributes entry into the archive stream. - private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span buffer, IEnumerable> extendedAttributes, bool isGea) + private void WriteAsPaxExtendedAttributes(Stream archiveStream, Span buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber) { - // The ustar fields (uid, gid, linkName, uname, gname, devmajor, devminor) do not get written. - // The mode gets the default value. - _name = GenerateExtendedAttributeName(); - _mode = TarHelpers.GetDefaultMode(_typeFlag); - _typeFlag = isGea ? TarEntryType.GlobalExtendedAttributes : TarEntryType.ExtendedAttributes; - _linkName = string.Empty; - _magic = string.Empty; - _version = string.Empty; - _gName = string.Empty; - _uName = string.Empty; - + WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber); _dataStream = GenerateExtendedAttributesDataStream(extendedAttributes); - WriteAsPaxInternal(archiveStream, buffer); } // Asynchronously writes the current header as a PAX Extended Attributes entry into the archive stream and returns the value of the final checksum. - private async Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, IEnumerable> extendedAttributes, bool isGea, CancellationToken cancellationToken) + private async Task WriteAsPaxExtendedAttributesAsync(Stream archiveStream, Memory buffer, Dictionary extendedAttributes, bool isGea, int globalExtendedAttributesEntryNumber, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - // The ustar fields (uid, gid, linkName, uname, gname, devmajor, devminor) do not get written. - // The mode gets the default value. - _name = GenerateExtendedAttributeName(); - _mode = TarHelpers.GetDefaultMode(_typeFlag); - _typeFlag = isGea ? TarEntryType.GlobalExtendedAttributes : TarEntryType.ExtendedAttributes; - _linkName = string.Empty; - _magic = string.Empty; - _version = string.Empty; - _gName = string.Empty; - _uName = string.Empty; - + WriteAsPaxExtendedAttributesShared(isGea, globalExtendedAttributesEntryNumber); _dataStream = await GenerateExtendedAttributesDataStreamAsync(extendedAttributes, cancellationToken).ConfigureAwait(false); + await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + } + + // Initializes the name, mode and type flag of a PAX extended attributes entry. + private void WriteAsPaxExtendedAttributesShared(bool isGea, int globalExtendedAttributesEntryNumber) + { + Debug.Assert(isGea && globalExtendedAttributesEntryNumber >= 0 || !isGea && globalExtendedAttributesEntryNumber < 0); - return await WriteAsPaxInternalAsync(archiveStream, buffer, cancellationToken).ConfigureAwait(false); + _name = isGea ? + GenerateGlobalExtendedAttributeName(globalExtendedAttributesEntryNumber) : + GenerateExtendedAttributeName(); + + _mode = TarHelpers.GetDefaultMode(_typeFlag); + _typeFlag = isGea ? TarEntryType.GlobalExtendedAttributes : TarEntryType.ExtendedAttributes; } // Both the Extended Attributes and Global Extended Attributes entry headers are written in a similar way, just the data changes // This method writes an entry as both entries require, using the data from the current header instance. private void WriteAsPaxInternal(Stream archiveStream, Span buffer) { - WriteAsPaxSharedInternal(buffer, out long actualLength, out _checksum); + WriteAsPaxSharedInternal(buffer, out long actualLength); archiveStream.Write(buffer); @@ -369,11 +358,11 @@ private void WriteAsPaxInternal(Stream archiveStream, Span buffer) // Both the Extended Attributes and Global Extended Attributes entry headers are written in a similar way, just the data changes // This method asynchronously writes an entry as both entries require, using the data from the current header instance. - private async Task WriteAsPaxInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) + private async Task WriteAsPaxInternalAsync(Stream archiveStream, Memory buffer, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); - WriteAsPaxSharedInternal(buffer.Span, out long actualLength, out int checksum); + WriteAsPaxSharedInternal(buffer.Span, out long actualLength); await archiveStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); @@ -381,12 +370,10 @@ private async Task WriteAsPaxInternalAsync(Stream archiveStream, Memory buffer, out long actualLength, out int checksum) + private void WriteAsPaxSharedInternal(Span buffer, out long actualLength) { actualLength = GetTotalDataBytesToWrite(); @@ -394,7 +381,8 @@ private void WriteAsPaxSharedInternal(Span buffer, out long actualLength, tmpChecksum += WriteCommonFields(buffer, actualLength, TarHelpers.GetCorrectTypeFlagForFormat(TarEntryFormat.Pax, _typeFlag)); tmpChecksum += WritePosixMagicAndVersion(buffer); tmpChecksum += WritePosixAndGnuSharedFields(buffer); - checksum = WriteChecksum(tmpChecksum, buffer); + + _checksum = WriteChecksum(tmpChecksum, buffer); } // All formats save in the name byte array only the ASCII bytes that fit. The full string is returned in the out byte array. @@ -551,36 +539,38 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream } // Dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. - private static Stream? GenerateExtendedAttributesDataStream(IEnumerable> extendedAttributes) + private static Stream? GenerateExtendedAttributesDataStream(Dictionary extendedAttributes) { MemoryStream? dataStream = null; - foreach ((string attribute, string value) in extendedAttributes) + if (extendedAttributes.Count > 0) { - // Need to do this because IEnumerable has no Count property - dataStream ??= new MemoryStream(); - - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - dataStream.Write(entryBytes); + dataStream = new MemoryStream(); + foreach ((string attribute, string value) in extendedAttributes) + { + byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); + dataStream.Write(entryBytes); + } + dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning return dataStream; } // Asynchronously dumps into the archive stream an extended attribute entry containing metadata of the entry it precedes. - private static async Task GenerateExtendedAttributesDataStreamAsync(IEnumerable> extendedAttributes, CancellationToken cancellationToken) + private static async Task GenerateExtendedAttributesDataStreamAsync(Dictionary extendedAttributes, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); MemoryStream? dataStream = null; - foreach ((string attribute, string value) in extendedAttributes) + if (extendedAttributes.Count > 0) { - // Need to do this because IEnumerable has no Count property - dataStream ??= new MemoryStream(); - - byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); - await dataStream.WriteAsync(entryBytes, cancellationToken).ConfigureAwait(false); + dataStream = new MemoryStream(); + foreach ((string attribute, string value) in extendedAttributes) + { + byte[] entryBytes = GenerateExtendedAttributeKeyValuePairAsByteArray(Encoding.UTF8.GetBytes(attribute), Encoding.UTF8.GetBytes(value)); + await dataStream.WriteAsync(entryBytes, cancellationToken).ConfigureAwait(false); + } + dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning } - dataStream?.Seek(0, SeekOrigin.Begin); // Ensure it gets written into the archive from the beginning return dataStream; } @@ -588,30 +578,29 @@ private static async Task WriteDataAsync(Stream archiveStream, Stream dataStream // extended attributes. They get collected and saved in that dictionary, with no restrictions. private void CollectExtendedAttributesFromStandardFieldsIfNeeded() { - _extendedAttributes ??= new Dictionary(); - _extendedAttributes.Add(PaxEaName, _name); + ExtendedAttributes.Add(PaxEaName, _name); - if (!_extendedAttributes.ContainsKey(PaxEaMTime)) + if (!ExtendedAttributes.ContainsKey(PaxEaMTime)) { - _extendedAttributes.Add(PaxEaMTime, TarHelpers.GetTimestampStringFromDateTimeOffset(_mTime)); + ExtendedAttributes.Add(PaxEaMTime, TarHelpers.GetTimestampStringFromDateTimeOffset(_mTime)); } if (!string.IsNullOrEmpty(_gName)) { - TryAddStringField(_extendedAttributes, PaxEaGName, _gName, FieldLengths.GName); + TryAddStringField(ExtendedAttributes, PaxEaGName, _gName, FieldLengths.GName); } if (!string.IsNullOrEmpty(_uName)) { - TryAddStringField(_extendedAttributes, PaxEaUName, _uName, FieldLengths.UName); + TryAddStringField(ExtendedAttributes, PaxEaUName, _uName, FieldLengths.UName); } if (!string.IsNullOrEmpty(_linkName)) { - _extendedAttributes.Add(PaxEaLinkName, _linkName); + ExtendedAttributes.Add(PaxEaLinkName, _linkName); } if (_size > 99_999_999) { - _extendedAttributes.Add(PaxEaSize, _size.ToString()); + ExtendedAttributes.Add(PaxEaSize, _size.ToString()); } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs index bd6700c705519..65fdda022b32b 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.Diagnostics; using System.IO; namespace System.Formats.Tar @@ -77,7 +78,8 @@ internal sealed partial class TarHeader // PAX attributes - internal Dictionary? _extendedAttributes; + private Dictionary? _ea; + internal Dictionary ExtendedAttributes => _ea ??= new Dictionary(); // GNU attributes @@ -113,6 +115,12 @@ internal TarHeader(TarEntryFormat format, TarEntryType typeFlag, TarHeader other _dataStream = other._dataStream; } + internal void InitializeExtendedAttributesWithExisting(IEnumerable> existing) + { + Debug.Assert(_ea == null); + _ea = new Dictionary(existing); + } + private static string GetMagicForFormat(TarEntryFormat format) => format switch { TarEntryFormat.Ustar or TarEntryFormat.Pax => UstarMagic, diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs index 4089cc5a13a57..7e951c5243e35 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs @@ -430,10 +430,8 @@ TarEntryType.LongLink or throw new FormatException(string.Format(SR.TarUnexpectedMetadataEntry, actualHeader._typeFlag, TarEntryType.ExtendedAttributes)); } - Debug.Assert(extendedAttributesHeader._extendedAttributes != null); - // Replace all the attributes representing standard fields with the extended ones, if any - actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader._extendedAttributes); + actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes); return true; } @@ -466,10 +464,8 @@ TarEntryType.LongLink or throw new FormatException(string.Format(SR.TarUnexpectedMetadataEntry, TarEntryType.ExtendedAttributes, TarEntryType.ExtendedAttributes)); } - Debug.Assert(extendedAttributesHeader._extendedAttributes != null); - // Replace all the attributes representing standard fields with the extended ones, if any - actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader._extendedAttributes); + actualHeader.ReplaceNormalAttributesWithExtended(extendedAttributesHeader.ExtendedAttributes); return actualHeader; } diff --git a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs index 4161e36fb8c38..9469231684023 100644 --- a/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs +++ b/src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarWriter.cs @@ -192,45 +192,7 @@ public void WriteEntry(TarEntry entry) { ObjectDisposedException.ThrowIf(_isDisposed, this); ArgumentNullException.ThrowIfNull(entry); - - byte[] rented = ArrayPool.Shared.Rent(minimumLength: TarHelpers.RecordSize); - Span buffer = rented.AsSpan(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger - buffer.Clear(); // Rented arrays aren't clean - try - { - switch (entry.Format) - { - case TarEntryFormat.V7: - entry._header.WriteAsV7(_archiveStream, buffer); - break; - case TarEntryFormat.Ustar: - entry._header.WriteAsUstar(_archiveStream, buffer); - break; - case TarEntryFormat.Pax: - if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) - { - entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber); - _nextGlobalExtendedAttributesEntryNumber++; - } - else - { - entry._header.WriteAsPax(_archiveStream, buffer); - } - break; - case TarEntryFormat.Gnu: - entry._header.WriteAsGnu(_archiveStream, buffer); - break; - default: - Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); - throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); - } - } - finally - { - ArrayPool.Shared.Return(rented); - } - - _wroteEntries = true; + WriteEntryInternal(entry); } /// @@ -331,6 +293,48 @@ private async ValueTask DisposeAsync(bool disposing) } } + // Portion of the WriteEntry(entry) method that rents a buffer and writes to the archive. + private void WriteEntryInternal(TarEntry entry) + { + byte[] rented = ArrayPool.Shared.Rent(minimumLength: TarHelpers.RecordSize); + Span buffer = rented.AsSpan(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger + buffer.Clear(); // Rented arrays aren't clean + try + { + switch (entry.Format) + { + case TarEntryFormat.V7: + entry._header.WriteAsV7(_archiveStream, buffer); + break; + case TarEntryFormat.Ustar: + entry._header.WriteAsUstar(_archiveStream, buffer); + break; + case TarEntryFormat.Pax: + if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) + { + entry._header.WriteAsPaxGlobalExtendedAttributes(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++); + } + else + { + entry._header.WriteAsPax(_archiveStream, buffer); + } + break; + case TarEntryFormat.Gnu: + entry._header.WriteAsGnu(_archiveStream, buffer); + break; + default: + Debug.Assert(entry.Format == TarEntryFormat.Unknown, "Missing format handler"); + throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); + } + } + finally + { + ArrayPool.Shared.Return(rented); + } + + _wroteEntries = true; + } + // Portion of the WriteEntryAsync(TarEntry, CancellationToken) method containing awaits. private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken cancellationToken) { @@ -340,36 +344,17 @@ private async Task WriteEntryAsyncInternal(TarEntry entry, CancellationToken can Memory buffer = rented.AsMemory(0, TarHelpers.RecordSize); // minimumLength means the array could've been larger buffer.Span.Clear(); // Rented arrays aren't clean - switch (entry.Format) + Task task = entry.Format switch { - case TarEntryFormat.V7: - entry._header._checksum = await entry._header.WriteAsV7Async(_archiveStream, buffer, cancellationToken).ConfigureAwait(false); - break; - - case TarEntryFormat.Ustar: - entry._header._checksum = await entry._header.WriteAsUstarAsync(_archiveStream, buffer, cancellationToken).ConfigureAwait(false); - break; - - case TarEntryFormat.Pax: - if (entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes) - { - entry._header._checksum = await entry._header.WriteAsPaxGlobalExtendedAttributesAsync(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber, cancellationToken).ConfigureAwait(false); - _nextGlobalExtendedAttributesEntryNumber++; - } - else - { - entry._header._checksum = await entry._header.WriteAsPaxAsync(_archiveStream, buffer, cancellationToken).ConfigureAwait(false); - } - break; + TarEntryFormat.V7 => entry._header.WriteAsV7Async(_archiveStream, buffer, cancellationToken), + TarEntryFormat.Ustar => entry._header.WriteAsUstarAsync(_archiveStream, buffer, cancellationToken), + TarEntryFormat.Pax when entry._header._typeFlag is TarEntryType.GlobalExtendedAttributes => entry._header.WriteAsPaxGlobalExtendedAttributesAsync(_archiveStream, buffer, _nextGlobalExtendedAttributesEntryNumber++, cancellationToken), + TarEntryFormat.Pax => entry._header.WriteAsPaxAsync(_archiveStream, buffer, cancellationToken), + TarEntryFormat.Gnu => entry._header.WriteAsGnuAsync(_archiveStream, buffer, cancellationToken), + _ => throw new FormatException(string.Format(SR.TarInvalidFormat, Format)), + }; + await task.ConfigureAwait(false); - case TarEntryFormat.Gnu: - entry._header._checksum = await entry._header.WriteAsGnuAsync(_archiveStream, buffer, cancellationToken).ConfigureAwait(false); - break; - - case TarEntryFormat.Unknown: - default: - throw new FormatException(string.Format(SR.TarInvalidFormat, Format)); - } _wroteEntries = true; ArrayPool.Shared.Return(rented);