diff options
| -rw-r--r-- | .gitignore | 10 | ||||
| -rw-r--r-- | .travis.yml | 1 | ||||
| -rw-r--r-- | LICENSE | 21 | ||||
| -rw-r--r-- | README.md | 41 | ||||
| -rw-r--r-- | shard.yml | 7 | ||||
| -rw-r--r-- | spec/spec_helper.cr | 2 | ||||
| -rw-r--r-- | spec/zip-crystal_spec.cr | 9 | ||||
| -rw-r--r-- | src/zip.cr | 569 | 
8 files changed, 660 insertions, 0 deletions
| diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8cf75a --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +/doc/ +/libs/ +/.crystal/ +/.shards/ + + +# Libraries don't need dependency lock +# Dependencies will be locked in application that uses them +/shard.lock + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..ffc7b6a --- /dev/null +++ b/.travis.yml @@ -0,0 +1 @@ +language: crystal @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Paul Duncan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..454410d --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# zip-crystal + +TODO: Write a description here + +## Installation + + +Add this to your application's `shard.yml`: + +```yaml +dependencies: +  zip-crystal: +    github: [your-github-name]/zip-crystal +``` + + +## Usage + + +```crystal +require "zip-crystal" +``` + + +TODO: Write usage instructions here + +## Development + +TODO: Write development instructions here + +## Contributing + +1. Fork it ( https://github.com/[your-github-name]/zip-crystal/fork ) +2. Create your feature branch (git checkout -b my-new-feature) +3. Commit your changes (git commit -am 'Add some feature') +4. Push to the branch (git push origin my-new-feature) +5. Create a new Pull Request + +## Contributors + +- [[your-github-name]](https://github.com/[your-github-name]) Paul Duncan - creator, maintainer diff --git a/shard.yml b/shard.yml new file mode 100644 index 0000000..3a73c95 --- /dev/null +++ b/shard.yml @@ -0,0 +1,7 @@ +name: zip-crystal +version: 0.1.0 + +authors: +  - Paul Duncan <pabs@pablotron.org> + +license: MIT diff --git a/spec/spec_helper.cr b/spec/spec_helper.cr new file mode 100644 index 0000000..78a7a10 --- /dev/null +++ b/spec/spec_helper.cr @@ -0,0 +1,2 @@ +require "spec" +require "../src/zip-crystal" diff --git a/spec/zip-crystal_spec.cr b/spec/zip-crystal_spec.cr new file mode 100644 index 0000000..910b8f4 --- /dev/null +++ b/spec/zip-crystal_spec.cr @@ -0,0 +1,9 @@ +require "./spec_helper" + +describe Zip::Crystal do +  # TODO: Write tests + +  it "works" do +    false.should eq(true) +  end +end diff --git a/src/zip.cr b/src/zip.cr new file mode 100644 index 0000000..ce61e13 --- /dev/null +++ b/src/zip.cr @@ -0,0 +1,569 @@ +require "./zip/*" + +module Zip +  VERSION = "0.1.0" + +  LE = IO::ByteOrder::LittleEndian + +  # 4.4.4 general purpose bit flag: (2 bytes) +  # +  # Bit 0: If set, indicates that the file is encrypted. +  # +  # (For Method 6 - Imploding) +  # Bit 1: If the compression method used was type 6, +  #        Imploding, then this bit, if set, indicates +  #        an 8K sliding dictionary was used.  If clear, +  #        then a 4K sliding dictionary was used. +  # +  # Bit 2: If the compression method used was type 6, +  #        Imploding, then this bit, if set, indicates +  #        3 Shannon-Fano trees were used to encode the +  #        sliding dictionary output.  If clear, then 2 +  #        Shannon-Fano trees were used. +  # +  # (For Methods 8 and 9 - Deflating) +  # Bit 2  Bit 1 +  #   0      0    Normal (-en) compression option was used. +  #   0      1    Maximum (-exx/-ex) compression option was used. +  #   1      0    Fast (-ef) compression option was used. +  #   1      1    Super Fast (-es) compression option was used. +  # +  # (For Method 14 - LZMA) +  # Bit 1: If the compression method used was type 14, +  #        LZMA, then this bit, if set, indicates +  #        an end-of-stream (EOS) marker is used to +  #        mark the end of the compressed data stream. +  #        If clear, then an EOS marker is not present +  #        and the compressed data size must be known +  #        to extract. +  # +  # Note:  Bits 1 and 2 are undefined if the compression +  #        method is any other. +  # +  # Bit 3: If this bit is set, the fields crc-32, compressed  +  #        size and uncompressed size are set to zero in the  +  #        local header.  The correct values are put in the  +  #        data descriptor immediately following the compressed +  #        data.  (Note: PKZIP version 2.04g for DOS only  +  #        recognizes this bit for method 8 compression, newer  +  #        versions of PKZIP recognize this bit for any  +  #        compression method.) +  # +  # Bit 4: Reserved for use with method 8, for enhanced +  #        deflating.  +  # +  # Bit 5: If this bit is set, this indicates that the file is  +  #        compressed patched data.  (Note: Requires PKZIP  +  #        version 2.70 or greater) +  # +  # Bit 6: Strong encryption.  If this bit is set, you MUST +  #        set the version needed to extract value to at least +  #        50 and you MUST also set bit 0.  If AES encryption +  #        is used, the version needed to extract value MUST  +  #        be at least 51. See the section describing the Strong +  #        Encryption Specification for details.  Refer to the  +  #        section in this document entitled "Incorporating PKWARE  +  #        Proprietary Technology into Your Product" for more  +  #        information. +  # +  # Bit 7: Currently unused. +  # +  # Bit 8: Currently unused. +  # +  # Bit 9: Currently unused. +  # +  # Bit 10: Currently unused. +  # +  # Bit 11: Language encoding flag (EFS).  If this bit is set, +  #         the filename and comment fields for this file +  #         MUST be encoded using UTF-8. (see APPENDIX D) +  # +  # Bit 12: Reserved by PKWARE for enhanced compression. +  # +  # Bit 13: Set when encrypting the Central Directory to indicate  +  #         selected data values in the Local Header are masked to +  #         hide their actual values.  See the section describing  +  #         the Strong Encryption Specification for details.  Refer +  #         to the section in this document entitled "Incorporating  +  #         PKWARE Proprietary Technology into Your Product" for  +  #         more information. +  # +  # Bit 14: Reserved by PKWARE. +  # +  # Bit 15: Reserved by PKWARE. +  # +  @[Flags] +  enum GeneralFlags +    WEAK_ENCRYPTION +    COMPRESSION_OPTION_1 +    COMPRESSION_OPTION_2 +    FOOTER +    RESERVED_4 +    PATCH +    STRONG_ENCRYPTION +    RESERVED_7 +    RESERVED_8 +    RESERVED_9 +    RESERVED_10 +    EFS +    RESERVED_12 +    MASKED_VALUES +    RESERVED_14 +    RESERVED_15 +  end + +  enum CompressionMethod +    NONE = 0            # Stored (no compression) +    SHRUNK = 1          # Shrunk +    REDUCED_1 = 2       # Reduced with compression factor 1 +    REDUCED_2 = 3       # Reduced with compression factor 2 +    REDUCED_3 = 4       # Reduced with compression factor 3 +    REDUCED_4 = 5       # Reduced with compression factor 4 +    IMPLODED = 6        # Imploded +    # Tokenized = 7       # Reserved for Tokenizing compression algorithm +    DEFLATE = 8         # Deflated +    DEFLATE64 = 9       # Enhanced Deflating using Deflate64(tm) +    TERSE_OLD = 10      # PKWARE Data Compression Library Imploding (old IBM TERSE) +    # RESERVED_11 = 11    # Reserved by PKWARE +    BZIP2 = 12          # BZIP2 +    # RESERVED_13 = 13  # Reserved by PKWARE +    LZMA = 14           # LZMA (EFS) +    # RESERVED_15 = 15    # Reserved by PKWARE +    # RESERVED_16 = 16    # Reserved by PKWARE +    # RESERVED_17 = 17    # Reserved by PKWARE +    TERSE = 18          # IBM TERSE (new) +    LZ77 = 19           # IBM LZ77 z Architecture (PFS) +    WAVPACK = 97        # WavPack compressed data +    PPMD = 98           # PPMd version I, Rev 1 +  end + +  # FIXME: should this have a better class? +  class Error < Exception +  end + +  # TODO +  class Reader +    def initialize(path : String) +    end + +    def initialize(io : IO) +    end +  end + +  module NoneCompressor +    def self.compress_none(src_io, dst_io) +      crc = 0_u32 + +      buf = Bytes.new(4096) +      src_len = 0_u64 + +      while ((len = src_io.read(buf)) > 0) +        # TODO: crc32 + +        dst_io.write((len < buf.size) ? Bytes.new(buf, len) : len) +        src_len += len +      end + +      # return results +      { crc, src_len, dst_len } +    end +  end + +  module DeflateCompressor +    def self.compress_deflate(src_io, dst_io) +      crc = 0_u32 +      src_len = 0_u64 +      dst_len = 0_u64 + +      # create buffer and intermediate memory io +      buf = Bytes.new(4096) +      mem_io = MemoryIO.new(4096) + +      Zlib::Deflate::Deflate.new( +        output:     mem_io, +        sync_close: false, +      ) do |zlib_io| +        while ((len = src_io.read(buf)) > 0) +          # TODO: crc32 + +          # compress bytes to memory io +          zlib_io.write((len < buf.size) ? Bytes.new(buf, len) : buf) +          src_len += len + +          # write compressed bytes to dst_io +          dst_io.write(Bytes.new(mem_io.buffer, mem_io.pos)) +          dst_len += mem_io.pos + +          # clear memio +          mem_io.rewind +        end +      end + +      # return results +      { crc, src_len, dst_len } +    end +  end + +  class WriterEntry +    include NoneCompressor +    include DeflateCompressor + +    # TODO version needed to extract and header flags +    # (used for header and central header) +    VERSION_NEEDED = 0_u32 +    GENERAL_FLAGS = GeneralFlags.flags(FOOTER, EFS) + +    def initialize( +      @pos      : UInt64, +      @path     : String,  +      @io       : IO, +      @method   : CompressionMethod = CompressionMethod::DEFLATE, +      @time     : Time = Time.now, +      @comment  : String? = nil, +    ) +      @crc = 0_u32 +      @src_len = 0_u64 +      @dst_len = 0_u64 +    end + +    def to_s(dst_io) : UInt64 +      # write header +      r = write_header(dst_io) + +      # write body +      @crc, @src_len, @dst_len = write_body(dst_io) +      r += dst_len + +      # write footer +      r += write_footer(dst_io, crc, src_len, dst_len) + +      # return number of bytes written +      r +    end + +    # +    # local file header signature     4 bytes  (0x04034b50) +    # version needed to extract       2 bytes +    # general purpose bit flag        2 bytes +    # compression method              2 bytes +    # last mod file time              2 bytes +    # last mod file date              2 bytes +    # crc-32                          4 bytes +    # compressed size                 4 bytes +    # uncompressed size               4 bytes +    # file name length                2 bytes +    # extra field length              2 bytes +    # file name (variable size) +    # extra field (variable size) +    # + +    HEADER_MAGIC = 0x04034b50_u32 + +    private def write_header( +      io      : IO, +      path    : String, +      method  : CompressionMethod, +      time    : Time, +    ) : UInt64 +      # get path length, in bytes +      path_len = path.bytesize + +      # check file path +      raise "empty file path" if path_len == 0 +      raise "file path too long" if path_len >= UInt16::MAX +      raise "file path contains leading slash" if path[0] == '/' + +      # write magic, version needed, flags, and compression method +      HEADER_MAGIC.to_io(io, LE) +      VERSION_NEEDED.to_u16.to_io(io, LE) +      GENERAL_FLAGS.to_u16.to_io(io, LE) +      method.to_io(io, LE) + +      # TODO: write time +      # encode(time, io) + +      # crc, compressed size, uncompressed size +      0_u32.to_io(io, LE) +      0_u32.to_io(io, LE) +      0_u32.to_io(io, LE) +      path_len.to_u16.to_io(io, LE) + +      # write extras field length +      extras_len = 0_u32 +      extras_len.to_u16.to_io(io, LE) + +      # write path field +      path.to_s(io) + +      # write extra fields +      # TODO: implement this + +      # return number of bytes written +      30_u64 + path_len + extras_len +    end + +    private def write_body(dst_io : IO) +      case @method +      when CompressionMethod::NONE +        compress_none(@io, dst_io) +      when CompressionMethod::DEFLATE +        compress_deflate(@io, dst_io) +      else +        raise Error, "unsupported compression method" +      end +    end + +    #  4.3.9  Data descriptor: +    #       MAGIC = 0x08074b50              4 bytes +    #       crc-32                          4 bytes +    #       compressed size                 4 bytes +    #       uncompressed size               4 bytes +    # +    # 4.3.9.3 Although not originally assigned a signature, the value  +    # 0x08074b50 has commonly been adopted as a signature value  + +    FOOTER_MAGIC = 0x08074b50_u32 + +    private def write_footer( +      io      : IO, +      crc     : UInt32, +      src_len : UInt64, +      dst_len : UInt64, +    ) : UInt64 +      # write footer +      FOOTER_MAGIC.to_io(io, LE) +      crc.to_io(io, LE) +      dst_len.to_io(io, LE) +      src_len.to_io(io, LE) + +      # return number of bytes written +      16_u64 +    end + +    # +    # central file header signature   4 bytes  (0x02014b50) +    # version made by                 2 bytes +    # version needed to extract       2 bytes +    # general purpose bit flag        2 bytes +    # compression method              2 bytes +    # last mod file time              2 bytes +    # last mod file date              2 bytes +    # crc-32                          4 bytes +    # compressed size                 4 bytes +    # uncompressed size               4 bytes +    # file name length                2 bytes +    # extra field length              2 bytes +    # file comment length             2 bytes +    # disk number start               2 bytes +    # internal file attributes        2 bytes +    # external file attributes        4 bytes +    # relative offset of local header 4 bytes +    #  +    # file name (variable size) +    # extra field (variable size) +    # file comment (variable size) + +    CENTRAL_MAGIC = 0x02014b50_u32 + +    # TODO: version made by, if unspecified +    CENTRAL_VERSION_MADE_BY = 0_u32 + +    def write_central( +      io      : IO, +      version : UInt32 = CENTRAL_VERSION_MADE_BY +    ) : UInt64 +      CENTRAL_MAGIC.to_io(io, LE) +      version.to_u16.to_io(io, LE) +      VERSION_NEEDED.to_u16.to_io(io, LE) +      GENERAL_FLAGS.to_u16.to_io(io, LE) +      @method.to_io(io, LE) + +      # TODO: write time +      # encode(time, io) + +      @crc.to_io(io, LE) +      @dst_len.to_io(io, LE) +      @src_len.to_io(io, LE) +       +      # get path length and write it +      path_len = path.bytesize +      path_len.to_u16.to_io(io, LE) + +      # write extras field length +      extras_len = 0_u32 +      extras_len.to_u16.to_io(io, LE) + +      comment_len = @comment.bytesize +      comment_len.to_u16.to_io(io, LE) + +      # write disk number +      0_u32.to_u16.to_io(io, LE) + +      # write file attributes (internal, external) +      # TODO +      0_u32.to_u16.to_io(io, LE) +      0_u32.to_u16.to_io(io, LE) + +      # write local header offset +      @pos.to_u32.to_io(io, LE) + +      # write path field +      path.to_s(io) + +      # write extra fields +      # TODO: implement this + +      # write comment +      comment.to_s(io) + +      # return number of bytes written +      30_u64 + path_len + extras_len + +      # TODO +      0_u64 +    end + +  end + +  class Writer +    def initialize( +      @io       : IO, +      @pos      : UInt64 = 0, +      @comment  : String = "", +      @version  : UInt32 = 0, +    ) +      @entries = [] of WriterEntry +      @closed = false +      @src_pos = @pos +    end + +    def closed? +      @closed +    end + +    private def assert_open +      raise Error, "already closed" if closed? +    end + +    def bytes_written : UInt64 +      # return total number of bytes written +      @src_pos - @pos +    end + +    def close +      assert_open + +      # cache cdr position +      cdr_pos = @pos + +      @entries.each do |entry| +        @pos += entry.to_central(@io) +      end + +      # write zip footer +      write_footer(cdr_pos) + +      # flag as closed +      @closed = true + +      # return total number of bytes written +      bytes_written +    end + +    def add( +      path    : String,  +      io      : IO, +      method  : CompressionMethod = CompressionMethod::DEFLATE, +      time    : Time = Time.now, +      comment : String? = nil, +    ) : UInt64 +      src_pos = @pos, +      # make sure writer is still open +      assert_open + +      # create entry +      entry = WriterEntry.new( +        pos:      @pos, +        path:     path, +        io:       io, +        method:   method, +        time:     time, +        comment:  comment, +      ) + +      # add to list of entries +      @entries << entry +       +      # write entry, update offset +      @pos += entry.to_s(@io) + +      # return number of bytes written +      @pos - src_pos +    end + +    # 4.3.16  End of central directory record: +    # +    # end of central dir signature    4 bytes  (0x06054b50) +    # number of this disk             2 bytes +    # number of the disk with the +    # start of the central directory  2 bytes +    # total number of entries in the +    # central directory on this disk  2 bytes +    # total number of entries in +    # the central directory           2 bytes +    # size of the central directory   4 bytes +    # offset of start of central +    # directory with respect to +    # the starting disk number        4 bytes +    # .ZIP file comment length        2 bytes +    # .ZIP file comment       (variable size) + +    FOOTER_MAGIC = 0x06054b50_u32 + +    private def write_footer(cdr_pos : UInt64) +      FOOTER_MAGIC.to_io(@io, LE) +      0_u32.to_u16.to_io(@io, LE) +      0_u32.to_u16.to_io(@io, LE) + +      num_entries = @entries.size +      num_entries.to_u16.to_io(@io, LE) +      num_entries.to_u16.to_io(@io, LE) + +      (@pos - cdr_pos).to_io(@io, LE) +      cdr_pos.to_io(@io, LE) + +      @comment.bytesize.to_u16.to_io(@io, LE) +      @comment.to_s(@io) +    end +  end + +  def self.write( +    io      : IO,  +    pos     : UInt64 = 0, +    comment : String = "",  +    version : UInt32 = 0, +    &cb     : Writer -> \ +  ) : UInt64 +    r = 0_u64 +    begin  +      w = Writer.new(io, pos, comment, version) +      cb.call(w) +    ensure +      w.close unless w.closed? +      r = w.bytes_written +    end + +    # return total number of bytes written +    r +  end + +  def self.write( +    path    : String, +    pos     : UInt64 = 0, +    comment : String = "",  +    version : UInt32 = 0, +    &cb     : Writer -> \ +  ) : UInt64 +    write(File.open(path, "wb"), pos, comment, version, &cb) +  end +end | 
