[swift-users] Getting at the bytes of Data

Fri Dec 9 12:00:35 CST 2016

What you have is probably ok in common usage; but there is a chance that it would not properly do a copy on write change as you expect if the Data was somehow obtained from a  multi-threaded environment. Here is a slight refactor (with a few extra fixes and comments for you that might save you some time later)

import Foundation
import Compression

public enum CompressionAlgorithm {
    case lz4   // speed is critical
    case lz4a  // space is critical
    case zlib  // reasonable speed and space
    case lzfse // better speed and space
}

private enum CompressionOperation {
    case compression
    case decompression
}

private func perform(_ operation: CompressionOperation,
                     on input: Data,
                     using algorithm: CompressionAlgorithm,
                     workingBufferSize: Int = 2000) -> Data?  {

    // set the algorithm
    let streamAlgorithm: compression_algorithm
    switch algorithm {
    case .lz4:   streamAlgorithm = COMPRESSION_LZ4
    case .lz4a:  streamAlgorithm = COMPRESSION_LZMA
    case .zlib:  streamAlgorithm = COMPRESSION_ZLIB
    case .lzfse: streamAlgorithm = COMPRESSION_LZFSE
    }

    // set the stream operation, and flags
    let streamOperation: compression_stream_operation
    let flags: Int32
    switch operation {
    case .compression:
        streamOperation = COMPRESSION_STREAM_ENCODE
        flags = Int32(COMPRESSION_STREAM_FINALIZE.rawValue)
    case .decompression:
        streamOperation = COMPRESSION_STREAM_DECODE
        flags = 0
    }

    let dstSize = workingBufferSize
    let dstPointer = UnsafeMutablePointer<UInt8>.allocate(capacity: dstSize) // you could technically use a Data for this too by accessing the mutable bytes ;)
    defer {
        dstPointer.deallocate(capacity: dstSize)
    }

    let inputCount = input.count
    return input.withUnsafeBytes { (bytes: UnsafePointer<UInt8>) in // the closure here can return a Data so the entire compression operation can be encapsulated in the lambda
        var output = Data()
        var stream = compression_stream(dst_ptr: dstPointer, dst_size: dstSize, src_ptr: bytes, src_size: inputCount, state: nil)
        var status = compression_stream_init(&stream, streamOperation, streamAlgorithm)
        guard status != COMPRESSION_STATUS_ERROR else {
            return nil // perhaps instead of making this nullable a throw?
        }
        defer {
            compression_stream_destroy(&stream)
        }

        while status == COMPRESSION_STATUS_OK {
            // process the stream
            status = compression_stream_process(&stream, flags)

            // collect bytes from the stream and reset
            switch status {
            case COMPRESSION_STATUS_OK:
                output.append(dstPointer, count: dstSize)
                stream.dst_ptr = dstPointer
                stream.dst_size = dstSize
                break // I am certain you probably meant to break here?
            case COMPRESSION_STATUS_ERROR:
                return nil // perhaps instead of making this nullable a throw?
            case COMPRESSION_STATUS_END:
                output.append(dstPointer, count: stream.dst_ptr - dstPointer)
                break // here too, this should break right?
            default:
                break // this probably should be a fatal error since it is an unexpected result.
            }
        }
        return output
    }
}

// Compressed keeps the compressed data and the algorithm
// together as one unit, so you never forget how the data was
// compressed.
public struct Compressed {
    public let data: Data // this was what was saving your state of the pointer
    public let algorithm: CompressionAlgorithm

    public init(data: Data, algorithm: CompressionAlgorithm) {
        self.data = data
        self.algorithm = algorithm
    }

    // Compress the input with the specified algorith. Returns nil if it fails.
    public static func compress(input: Data,
                                with algorithm: CompressionAlgorithm) -> Compressed? {
        guard let data = perform(.compression, on: input, using: algorithm) else {
            return nil
        }
        return Compressed(data: data, algorithm: algorithm)
    }

    // Factory method to return uncompressed data. Returns nil if it cannot be decompressed.
    public func makeDecompressed() -> Data? {
        return perform(.decompression, on: data, using: algorithm)
    }
}

// For discoverability, add a compressed method to Data
extension Data {
    // Factory method to make compressed data or nil if it fails.
    public func makeCompressed(with algorithm: CompressionAlgorithm) -> Compressed? {
        return Compressed.compress(input: self, with: algorithm)
    }
}

// Example usage:
let input = Data(bytes: Array(repeating: UInt8(123), count: 10000))

let compressed = input.makeCompressed(with: .lzfse)
compressed?.data.count // in most cases much less than orginal input count
let restoredInput = compressed?.makeDecompressed()
input == restoredInput // true

> On Dec 9, 2016, at 9:04 AM, Ray Fix <rayfix at gmail.com> wrote:
> 
> Hi Philippe,
> 
> Thank you for your response!
> 
>> On Dec 9, 2016, at 8:44 AM, Philippe Hausler <phausler at apple.com <mailto:phausler at apple.com>> wrote:
>> 
>> So letting the bytes escape from the closure could potentially be in that territory of “bad things may happen”. If you know for certain that the Data will not be mutated and it’s underlying storage will last for the duration of the usage then you should be able to get away with it.
>> 
> 
> So far it is looking like I am getting away with it. I believe the conditions that you state hold.
> 
>> The bytes pointer is an inner pointer return from the encapsulating object backing the struct Data. Since that backing object can potentially be deallocated via mutation or the buffer may be reallocated (which very often can cause the base address of the pointer to change) we decided it would be safer to use a closure to ensure the lifespan of the bytes over the access to the bytes.
>> 
>> Perhaps it might be best to understand more of what you are trying to do to offer a better solution.
> 
> I am trying to use the compression_stream C API in the Compression framework and wrap it so it is a bit more Swifty.  :]
> 
> A full gist of the playground is here: https://gist.github.com/rayfix/a286bb55accffef09249ba3535993782 <https://gist.github.com/rayfix/a286bb55accffef09249ba3535993782>
> 
> See line 91.
> 
> Appreciate the hints. 
> 
> Ray

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.swift.org/pipermail/swift-users/attachments/20161209/df02370c/attachment.html>