diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/zip.cr | 569 |
1 files changed, 569 insertions, 0 deletions
diff --git a/src/zip.cr b/src/zip.cr new file mode 100644 index 0000000..ce61e13 --- /dev/null +++ b/src/zip.cr @@ -0,0 +1,569 @@ +require "./zip/*" + +module Zip + VERSION = "0.1.0" + + LE = IO::ByteOrder::LittleEndian + + # 4.4.4 general purpose bit flag: (2 bytes) + # + # Bit 0: If set, indicates that the file is encrypted. + # + # (For Method 6 - Imploding) + # Bit 1: If the compression method used was type 6, + # Imploding, then this bit, if set, indicates + # an 8K sliding dictionary was used. If clear, + # then a 4K sliding dictionary was used. + # + # Bit 2: If the compression method used was type 6, + # Imploding, then this bit, if set, indicates + # 3 Shannon-Fano trees were used to encode the + # sliding dictionary output. If clear, then 2 + # Shannon-Fano trees were used. + # + # (For Methods 8 and 9 - Deflating) + # Bit 2 Bit 1 + # 0 0 Normal (-en) compression option was used. + # 0 1 Maximum (-exx/-ex) compression option was used. + # 1 0 Fast (-ef) compression option was used. + # 1 1 Super Fast (-es) compression option was used. + # + # (For Method 14 - LZMA) + # Bit 1: If the compression method used was type 14, + # LZMA, then this bit, if set, indicates + # an end-of-stream (EOS) marker is used to + # mark the end of the compressed data stream. + # If clear, then an EOS marker is not present + # and the compressed data size must be known + # to extract. + # + # Note: Bits 1 and 2 are undefined if the compression + # method is any other. + # + # Bit 3: If this bit is set, the fields crc-32, compressed + # size and uncompressed size are set to zero in the + # local header. The correct values are put in the + # data descriptor immediately following the compressed + # data. (Note: PKZIP version 2.04g for DOS only + # recognizes this bit for method 8 compression, newer + # versions of PKZIP recognize this bit for any + # compression method.) + # + # Bit 4: Reserved for use with method 8, for enhanced + # deflating. + # + # Bit 5: If this bit is set, this indicates that the file is + # compressed patched data. (Note: Requires PKZIP + # version 2.70 or greater) + # + # Bit 6: Strong encryption. If this bit is set, you MUST + # set the version needed to extract value to at least + # 50 and you MUST also set bit 0. If AES encryption + # is used, the version needed to extract value MUST + # be at least 51. See the section describing the Strong + # Encryption Specification for details. Refer to the + # section in this document entitled "Incorporating PKWARE + # Proprietary Technology into Your Product" for more + # information. + # + # Bit 7: Currently unused. + # + # Bit 8: Currently unused. + # + # Bit 9: Currently unused. + # + # Bit 10: Currently unused. + # + # Bit 11: Language encoding flag (EFS). If this bit is set, + # the filename and comment fields for this file + # MUST be encoded using UTF-8. (see APPENDIX D) + # + # Bit 12: Reserved by PKWARE for enhanced compression. + # + # Bit 13: Set when encrypting the Central Directory to indicate + # selected data values in the Local Header are masked to + # hide their actual values. See the section describing + # the Strong Encryption Specification for details. Refer + # to the section in this document entitled "Incorporating + # PKWARE Proprietary Technology into Your Product" for + # more information. + # + # Bit 14: Reserved by PKWARE. + # + # Bit 15: Reserved by PKWARE. + # + @[Flags] + enum GeneralFlags + WEAK_ENCRYPTION + COMPRESSION_OPTION_1 + COMPRESSION_OPTION_2 + FOOTER + RESERVED_4 + PATCH + STRONG_ENCRYPTION + RESERVED_7 + RESERVED_8 + RESERVED_9 + RESERVED_10 + EFS + RESERVED_12 + MASKED_VALUES + RESERVED_14 + RESERVED_15 + end + + enum CompressionMethod + NONE = 0 # Stored (no compression) + SHRUNK = 1 # Shrunk + REDUCED_1 = 2 # Reduced with compression factor 1 + REDUCED_2 = 3 # Reduced with compression factor 2 + REDUCED_3 = 4 # Reduced with compression factor 3 + REDUCED_4 = 5 # Reduced with compression factor 4 + IMPLODED = 6 # Imploded + # Tokenized = 7 # Reserved for Tokenizing compression algorithm + DEFLATE = 8 # Deflated + DEFLATE64 = 9 # Enhanced Deflating using Deflate64(tm) + TERSE_OLD = 10 # PKWARE Data Compression Library Imploding (old IBM TERSE) + # RESERVED_11 = 11 # Reserved by PKWARE + BZIP2 = 12 # BZIP2 + # RESERVED_13 = 13 # Reserved by PKWARE + LZMA = 14 # LZMA (EFS) + # RESERVED_15 = 15 # Reserved by PKWARE + # RESERVED_16 = 16 # Reserved by PKWARE + # RESERVED_17 = 17 # Reserved by PKWARE + TERSE = 18 # IBM TERSE (new) + LZ77 = 19 # IBM LZ77 z Architecture (PFS) + WAVPACK = 97 # WavPack compressed data + PPMD = 98 # PPMd version I, Rev 1 + end + + # FIXME: should this have a better class? + class Error < Exception + end + + # TODO + class Reader + def initialize(path : String) + end + + def initialize(io : IO) + end + end + + module NoneCompressor + def self.compress_none(src_io, dst_io) + crc = 0_u32 + + buf = Bytes.new(4096) + src_len = 0_u64 + + while ((len = src_io.read(buf)) > 0) + # TODO: crc32 + + dst_io.write((len < buf.size) ? Bytes.new(buf, len) : len) + src_len += len + end + + # return results + { crc, src_len, dst_len } + end + end + + module DeflateCompressor + def self.compress_deflate(src_io, dst_io) + crc = 0_u32 + src_len = 0_u64 + dst_len = 0_u64 + + # create buffer and intermediate memory io + buf = Bytes.new(4096) + mem_io = MemoryIO.new(4096) + + Zlib::Deflate::Deflate.new( + output: mem_io, + sync_close: false, + ) do |zlib_io| + while ((len = src_io.read(buf)) > 0) + # TODO: crc32 + + # compress bytes to memory io + zlib_io.write((len < buf.size) ? Bytes.new(buf, len) : buf) + src_len += len + + # write compressed bytes to dst_io + dst_io.write(Bytes.new(mem_io.buffer, mem_io.pos)) + dst_len += mem_io.pos + + # clear memio + mem_io.rewind + end + end + + # return results + { crc, src_len, dst_len } + end + end + + class WriterEntry + include NoneCompressor + include DeflateCompressor + + # TODO version needed to extract and header flags + # (used for header and central header) + VERSION_NEEDED = 0_u32 + GENERAL_FLAGS = GeneralFlags.flags(FOOTER, EFS) + + def initialize( + @pos : UInt64, + @path : String, + @io : IO, + @method : CompressionMethod = CompressionMethod::DEFLATE, + @time : Time = Time.now, + @comment : String? = nil, + ) + @crc = 0_u32 + @src_len = 0_u64 + @dst_len = 0_u64 + end + + def to_s(dst_io) : UInt64 + # write header + r = write_header(dst_io) + + # write body + @crc, @src_len, @dst_len = write_body(dst_io) + r += dst_len + + # write footer + r += write_footer(dst_io, crc, src_len, dst_len) + + # return number of bytes written + r + end + + # + # local file header signature 4 bytes (0x04034b50) + # version needed to extract 2 bytes + # general purpose bit flag 2 bytes + # compression method 2 bytes + # last mod file time 2 bytes + # last mod file date 2 bytes + # crc-32 4 bytes + # compressed size 4 bytes + # uncompressed size 4 bytes + # file name length 2 bytes + # extra field length 2 bytes + # file name (variable size) + # extra field (variable size) + # + + HEADER_MAGIC = 0x04034b50_u32 + + private def write_header( + io : IO, + path : String, + method : CompressionMethod, + time : Time, + ) : UInt64 + # get path length, in bytes + path_len = path.bytesize + + # check file path + raise "empty file path" if path_len == 0 + raise "file path too long" if path_len >= UInt16::MAX + raise "file path contains leading slash" if path[0] == '/' + + # write magic, version needed, flags, and compression method + HEADER_MAGIC.to_io(io, LE) + VERSION_NEEDED.to_u16.to_io(io, LE) + GENERAL_FLAGS.to_u16.to_io(io, LE) + method.to_io(io, LE) + + # TODO: write time + # encode(time, io) + + # crc, compressed size, uncompressed size + 0_u32.to_io(io, LE) + 0_u32.to_io(io, LE) + 0_u32.to_io(io, LE) + path_len.to_u16.to_io(io, LE) + + # write extras field length + extras_len = 0_u32 + extras_len.to_u16.to_io(io, LE) + + # write path field + path.to_s(io) + + # write extra fields + # TODO: implement this + + # return number of bytes written + 30_u64 + path_len + extras_len + end + + private def write_body(dst_io : IO) + case @method + when CompressionMethod::NONE + compress_none(@io, dst_io) + when CompressionMethod::DEFLATE + compress_deflate(@io, dst_io) + else + raise Error, "unsupported compression method" + end + end + + # 4.3.9 Data descriptor: + # MAGIC = 0x08074b50 4 bytes + # crc-32 4 bytes + # compressed size 4 bytes + # uncompressed size 4 bytes + # + # 4.3.9.3 Although not originally assigned a signature, the value + # 0x08074b50 has commonly been adopted as a signature value + + FOOTER_MAGIC = 0x08074b50_u32 + + private def write_footer( + io : IO, + crc : UInt32, + src_len : UInt64, + dst_len : UInt64, + ) : UInt64 + # write footer + FOOTER_MAGIC.to_io(io, LE) + crc.to_io(io, LE) + dst_len.to_io(io, LE) + src_len.to_io(io, LE) + + # return number of bytes written + 16_u64 + end + + # + # central file header signature 4 bytes (0x02014b50) + # version made by 2 bytes + # version needed to extract 2 bytes + # general purpose bit flag 2 bytes + # compression method 2 bytes + # last mod file time 2 bytes + # last mod file date 2 bytes + # crc-32 4 bytes + # compressed size 4 bytes + # uncompressed size 4 bytes + # file name length 2 bytes + # extra field length 2 bytes + # file comment length 2 bytes + # disk number start 2 bytes + # internal file attributes 2 bytes + # external file attributes 4 bytes + # relative offset of local header 4 bytes + # + # file name (variable size) + # extra field (variable size) + # file comment (variable size) + + CENTRAL_MAGIC = 0x02014b50_u32 + + # TODO: version made by, if unspecified + CENTRAL_VERSION_MADE_BY = 0_u32 + + def write_central( + io : IO, + version : UInt32 = CENTRAL_VERSION_MADE_BY + ) : UInt64 + CENTRAL_MAGIC.to_io(io, LE) + version.to_u16.to_io(io, LE) + VERSION_NEEDED.to_u16.to_io(io, LE) + GENERAL_FLAGS.to_u16.to_io(io, LE) + @method.to_io(io, LE) + + # TODO: write time + # encode(time, io) + + @crc.to_io(io, LE) + @dst_len.to_io(io, LE) + @src_len.to_io(io, LE) + + # get path length and write it + path_len = path.bytesize + path_len.to_u16.to_io(io, LE) + + # write extras field length + extras_len = 0_u32 + extras_len.to_u16.to_io(io, LE) + + comment_len = @comment.bytesize + comment_len.to_u16.to_io(io, LE) + + # write disk number + 0_u32.to_u16.to_io(io, LE) + + # write file attributes (internal, external) + # TODO + 0_u32.to_u16.to_io(io, LE) + 0_u32.to_u16.to_io(io, LE) + + # write local header offset + @pos.to_u32.to_io(io, LE) + + # write path field + path.to_s(io) + + # write extra fields + # TODO: implement this + + # write comment + comment.to_s(io) + + # return number of bytes written + 30_u64 + path_len + extras_len + + # TODO + 0_u64 + end + + end + + class Writer + def initialize( + @io : IO, + @pos : UInt64 = 0, + @comment : String = "", + @version : UInt32 = 0, + ) + @entries = [] of WriterEntry + @closed = false + @src_pos = @pos + end + + def closed? + @closed + end + + private def assert_open + raise Error, "already closed" if closed? + end + + def bytes_written : UInt64 + # return total number of bytes written + @src_pos - @pos + end + + def close + assert_open + + # cache cdr position + cdr_pos = @pos + + @entries.each do |entry| + @pos += entry.to_central(@io) + end + + # write zip footer + write_footer(cdr_pos) + + # flag as closed + @closed = true + + # return total number of bytes written + bytes_written + end + + def add( + path : String, + io : IO, + method : CompressionMethod = CompressionMethod::DEFLATE, + time : Time = Time.now, + comment : String? = nil, + ) : UInt64 + src_pos = @pos, + # make sure writer is still open + assert_open + + # create entry + entry = WriterEntry.new( + pos: @pos, + path: path, + io: io, + method: method, + time: time, + comment: comment, + ) + + # add to list of entries + @entries << entry + + # write entry, update offset + @pos += entry.to_s(@io) + + # return number of bytes written + @pos - src_pos + end + + # 4.3.16 End of central directory record: + # + # end of central dir signature 4 bytes (0x06054b50) + # number of this disk 2 bytes + # number of the disk with the + # start of the central directory 2 bytes + # total number of entries in the + # central directory on this disk 2 bytes + # total number of entries in + # the central directory 2 bytes + # size of the central directory 4 bytes + # offset of start of central + # directory with respect to + # the starting disk number 4 bytes + # .ZIP file comment length 2 bytes + # .ZIP file comment (variable size) + + FOOTER_MAGIC = 0x06054b50_u32 + + private def write_footer(cdr_pos : UInt64) + FOOTER_MAGIC.to_io(@io, LE) + 0_u32.to_u16.to_io(@io, LE) + 0_u32.to_u16.to_io(@io, LE) + + num_entries = @entries.size + num_entries.to_u16.to_io(@io, LE) + num_entries.to_u16.to_io(@io, LE) + + (@pos - cdr_pos).to_io(@io, LE) + cdr_pos.to_io(@io, LE) + + @comment.bytesize.to_u16.to_io(@io, LE) + @comment.to_s(@io) + end + end + + def self.write( + io : IO, + pos : UInt64 = 0, + comment : String = "", + version : UInt32 = 0, + &cb : Writer -> \ + ) : UInt64 + r = 0_u64 + begin + w = Writer.new(io, pos, comment, version) + cb.call(w) + ensure + w.close unless w.closed? + r = w.bytes_written + end + + # return total number of bytes written + r + end + + def self.write( + path : String, + pos : UInt64 = 0, + comment : String = "", + version : UInt32 = 0, + &cb : Writer -> \ + ) : UInt64 + write(File.open(path, "wb"), pos, comment, version, &cb) + end +end |