From cbefdf8beaf3d64a44e74428079502badef60834 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sat, 6 Aug 2016 11:11:41 -0400 Subject: initial commit --- .gitignore | 10 + .travis.yml | 1 + LICENSE | 21 ++ README.md | 41 ++++ shard.yml | 7 + spec/spec_helper.cr | 2 + spec/zip-crystal_spec.cr | 9 + src/zip.cr | 569 +++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 660 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 shard.yml create mode 100644 spec/spec_helper.cr create mode 100644 spec/zip-crystal_spec.cr create mode 100644 src/zip.cr diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8cf75a --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +/doc/ +/libs/ +/.crystal/ +/.shards/ + + +# Libraries don't need dependency lock +# Dependencies will be locked in application that uses them +/shard.lock + diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..ffc7b6a --- /dev/null +++ b/.travis.yml @@ -0,0 +1 @@ +language: crystal diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..3f522c4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Paul Duncan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..454410d --- /dev/null +++ b/README.md @@ -0,0 +1,41 @@ +# zip-crystal + +TODO: Write a description here + +## Installation + + +Add this to your application's `shard.yml`: + +```yaml +dependencies: + zip-crystal: + github: [your-github-name]/zip-crystal +``` + + +## Usage + + +```crystal +require "zip-crystal" +``` + + +TODO: Write usage instructions here + +## Development + +TODO: Write development instructions here + +## Contributing + +1. Fork it ( https://github.com/[your-github-name]/zip-crystal/fork ) +2. Create your feature branch (git checkout -b my-new-feature) +3. Commit your changes (git commit -am 'Add some feature') +4. Push to the branch (git push origin my-new-feature) +5. Create a new Pull Request + +## Contributors + +- [[your-github-name]](https://github.com/[your-github-name]) Paul Duncan - creator, maintainer diff --git a/shard.yml b/shard.yml new file mode 100644 index 0000000..3a73c95 --- /dev/null +++ b/shard.yml @@ -0,0 +1,7 @@ +name: zip-crystal +version: 0.1.0 + +authors: + - Paul Duncan + +license: MIT diff --git a/spec/spec_helper.cr b/spec/spec_helper.cr new file mode 100644 index 0000000..78a7a10 --- /dev/null +++ b/spec/spec_helper.cr @@ -0,0 +1,2 @@ +require "spec" +require "../src/zip-crystal" diff --git a/spec/zip-crystal_spec.cr b/spec/zip-crystal_spec.cr new file mode 100644 index 0000000..910b8f4 --- /dev/null +++ b/spec/zip-crystal_spec.cr @@ -0,0 +1,9 @@ +require "./spec_helper" + +describe Zip::Crystal do + # TODO: Write tests + + it "works" do + false.should eq(true) + end +end diff --git a/src/zip.cr b/src/zip.cr new file mode 100644 index 0000000..ce61e13 --- /dev/null +++ b/src/zip.cr @@ -0,0 +1,569 @@ +require "./zip/*" + +module Zip + VERSION = "0.1.0" + + LE = IO::ByteOrder::LittleEndian + + # 4.4.4 general purpose bit flag: (2 bytes) + # + # Bit 0: If set, indicates that the file is encrypted. + # + # (For Method 6 - Imploding) + # Bit 1: If the compression method used was type 6, + # Imploding, then this bit, if set, indicates + # an 8K sliding dictionary was used. If clear, + # then a 4K sliding dictionary was used. + # + # Bit 2: If the compression method used was type 6, + # Imploding, then this bit, if set, indicates + # 3 Shannon-Fano trees were used to encode the + # sliding dictionary output. If clear, then 2 + # Shannon-Fano trees were used. + # + # (For Methods 8 and 9 - Deflating) + # Bit 2 Bit 1 + # 0 0 Normal (-en) compression option was used. + # 0 1 Maximum (-exx/-ex) compression option was used. + # 1 0 Fast (-ef) compression option was used. + # 1 1 Super Fast (-es) compression option was used. + # + # (For Method 14 - LZMA) + # Bit 1: If the compression method used was type 14, + # LZMA, then this bit, if set, indicates + # an end-of-stream (EOS) marker is used to + # mark the end of the compressed data stream. + # If clear, then an EOS marker is not present + # and the compressed data size must be known + # to extract. + # + # Note: Bits 1 and 2 are undefined if the compression + # method is any other. + # + # Bit 3: If this bit is set, the fields crc-32, compressed + # size and uncompressed size are set to zero in the + # local header. The correct values are put in the + # data descriptor immediately following the compressed + # data. (Note: PKZIP version 2.04g for DOS only + # recognizes this bit for method 8 compression, newer + # versions of PKZIP recognize this bit for any + # compression method.) + # + # Bit 4: Reserved for use with method 8, for enhanced + # deflating. + # + # Bit 5: If this bit is set, this indicates that the file is + # compressed patched data. (Note: Requires PKZIP + # version 2.70 or greater) + # + # Bit 6: Strong encryption. If this bit is set, you MUST + # set the version needed to extract value to at least + # 50 and you MUST also set bit 0. If AES encryption + # is used, the version needed to extract value MUST + # be at least 51. See the section describing the Strong + # Encryption Specification for details. Refer to the + # section in this document entitled "Incorporating PKWARE + # Proprietary Technology into Your Product" for more + # information. + # + # Bit 7: Currently unused. + # + # Bit 8: Currently unused. + # + # Bit 9: Currently unused. + # + # Bit 10: Currently unused. + # + # Bit 11: Language encoding flag (EFS). If this bit is set, + # the filename and comment fields for this file + # MUST be encoded using UTF-8. (see APPENDIX D) + # + # Bit 12: Reserved by PKWARE for enhanced compression. + # + # Bit 13: Set when encrypting the Central Directory to indicate + # selected data values in the Local Header are masked to + # hide their actual values. See the section describing + # the Strong Encryption Specification for details. Refer + # to the section in this document entitled "Incorporating + # PKWARE Proprietary Technology into Your Product" for + # more information. + # + # Bit 14: Reserved by PKWARE. + # + # Bit 15: Reserved by PKWARE. + # + @[Flags] + enum GeneralFlags + WEAK_ENCRYPTION + COMPRESSION_OPTION_1 + COMPRESSION_OPTION_2 + FOOTER + RESERVED_4 + PATCH + STRONG_ENCRYPTION + RESERVED_7 + RESERVED_8 + RESERVED_9 + RESERVED_10 + EFS + RESERVED_12 + MASKED_VALUES + RESERVED_14 + RESERVED_15 + end + + enum CompressionMethod + NONE = 0 # Stored (no compression) + SHRUNK = 1 # Shrunk + REDUCED_1 = 2 # Reduced with compression factor 1 + REDUCED_2 = 3 # Reduced with compression factor 2 + REDUCED_3 = 4 # Reduced with compression factor 3 + REDUCED_4 = 5 # Reduced with compression factor 4 + IMPLODED = 6 # Imploded + # Tokenized = 7 # Reserved for Tokenizing compression algorithm + DEFLATE = 8 # Deflated + DEFLATE64 = 9 # Enhanced Deflating using Deflate64(tm) + TERSE_OLD = 10 # PKWARE Data Compression Library Imploding (old IBM TERSE) + # RESERVED_11 = 11 # Reserved by PKWARE + BZIP2 = 12 # BZIP2 + # RESERVED_13 = 13 # Reserved by PKWARE + LZMA = 14 # LZMA (EFS) + # RESERVED_15 = 15 # Reserved by PKWARE + # RESERVED_16 = 16 # Reserved by PKWARE + # RESERVED_17 = 17 # Reserved by PKWARE + TERSE = 18 # IBM TERSE (new) + LZ77 = 19 # IBM LZ77 z Architecture (PFS) + WAVPACK = 97 # WavPack compressed data + PPMD = 98 # PPMd version I, Rev 1 + end + + # FIXME: should this have a better class? + class Error < Exception + end + + # TODO + class Reader + def initialize(path : String) + end + + def initialize(io : IO) + end + end + + module NoneCompressor + def self.compress_none(src_io, dst_io) + crc = 0_u32 + + buf = Bytes.new(4096) + src_len = 0_u64 + + while ((len = src_io.read(buf)) > 0) + # TODO: crc32 + + dst_io.write((len < buf.size) ? Bytes.new(buf, len) : len) + src_len += len + end + + # return results + { crc, src_len, dst_len } + end + end + + module DeflateCompressor + def self.compress_deflate(src_io, dst_io) + crc = 0_u32 + src_len = 0_u64 + dst_len = 0_u64 + + # create buffer and intermediate memory io + buf = Bytes.new(4096) + mem_io = MemoryIO.new(4096) + + Zlib::Deflate::Deflate.new( + output: mem_io, + sync_close: false, + ) do |zlib_io| + while ((len = src_io.read(buf)) > 0) + # TODO: crc32 + + # compress bytes to memory io + zlib_io.write((len < buf.size) ? Bytes.new(buf, len) : buf) + src_len += len + + # write compressed bytes to dst_io + dst_io.write(Bytes.new(mem_io.buffer, mem_io.pos)) + dst_len += mem_io.pos + + # clear memio + mem_io.rewind + end + end + + # return results + { crc, src_len, dst_len } + end + end + + class WriterEntry + include NoneCompressor + include DeflateCompressor + + # TODO version needed to extract and header flags + # (used for header and central header) + VERSION_NEEDED = 0_u32 + GENERAL_FLAGS = GeneralFlags.flags(FOOTER, EFS) + + def initialize( + @pos : UInt64, + @path : String, + @io : IO, + @method : CompressionMethod = CompressionMethod::DEFLATE, + @time : Time = Time.now, + @comment : String? = nil, + ) + @crc = 0_u32 + @src_len = 0_u64 + @dst_len = 0_u64 + end + + def to_s(dst_io) : UInt64 + # write header + r = write_header(dst_io) + + # write body + @crc, @src_len, @dst_len = write_body(dst_io) + r += dst_len + + # write footer + r += write_footer(dst_io, crc, src_len, dst_len) + + # return number of bytes written + r + end + + # + # local file header signature 4 bytes (0x04034b50) + # version needed to extract 2 bytes + # general purpose bit flag 2 bytes + # compression method 2 bytes + # last mod file time 2 bytes + # last mod file date 2 bytes + # crc-32 4 bytes + # compressed size 4 bytes + # uncompressed size 4 bytes + # file name length 2 bytes + # extra field length 2 bytes + # file name (variable size) + # extra field (variable size) + # + + HEADER_MAGIC = 0x04034b50_u32 + + private def write_header( + io : IO, + path : String, + method : CompressionMethod, + time : Time, + ) : UInt64 + # get path length, in bytes + path_len = path.bytesize + + # check file path + raise "empty file path" if path_len == 0 + raise "file path too long" if path_len >= UInt16::MAX + raise "file path contains leading slash" if path[0] == '/' + + # write magic, version needed, flags, and compression method + HEADER_MAGIC.to_io(io, LE) + VERSION_NEEDED.to_u16.to_io(io, LE) + GENERAL_FLAGS.to_u16.to_io(io, LE) + method.to_io(io, LE) + + # TODO: write time + # encode(time, io) + + # crc, compressed size, uncompressed size + 0_u32.to_io(io, LE) + 0_u32.to_io(io, LE) + 0_u32.to_io(io, LE) + path_len.to_u16.to_io(io, LE) + + # write extras field length + extras_len = 0_u32 + extras_len.to_u16.to_io(io, LE) + + # write path field + path.to_s(io) + + # write extra fields + # TODO: implement this + + # return number of bytes written + 30_u64 + path_len + extras_len + end + + private def write_body(dst_io : IO) + case @method + when CompressionMethod::NONE + compress_none(@io, dst_io) + when CompressionMethod::DEFLATE + compress_deflate(@io, dst_io) + else + raise Error, "unsupported compression method" + end + end + + # 4.3.9 Data descriptor: + # MAGIC = 0x08074b50 4 bytes + # crc-32 4 bytes + # compressed size 4 bytes + # uncompressed size 4 bytes + # + # 4.3.9.3 Although not originally assigned a signature, the value + # 0x08074b50 has commonly been adopted as a signature value + + FOOTER_MAGIC = 0x08074b50_u32 + + private def write_footer( + io : IO, + crc : UInt32, + src_len : UInt64, + dst_len : UInt64, + ) : UInt64 + # write footer + FOOTER_MAGIC.to_io(io, LE) + crc.to_io(io, LE) + dst_len.to_io(io, LE) + src_len.to_io(io, LE) + + # return number of bytes written + 16_u64 + end + + # + # central file header signature 4 bytes (0x02014b50) + # version made by 2 bytes + # version needed to extract 2 bytes + # general purpose bit flag 2 bytes + # compression method 2 bytes + # last mod file time 2 bytes + # last mod file date 2 bytes + # crc-32 4 bytes + # compressed size 4 bytes + # uncompressed size 4 bytes + # file name length 2 bytes + # extra field length 2 bytes + # file comment length 2 bytes + # disk number start 2 bytes + # internal file attributes 2 bytes + # external file attributes 4 bytes + # relative offset of local header 4 bytes + # + # file name (variable size) + # extra field (variable size) + # file comment (variable size) + + CENTRAL_MAGIC = 0x02014b50_u32 + + # TODO: version made by, if unspecified + CENTRAL_VERSION_MADE_BY = 0_u32 + + def write_central( + io : IO, + version : UInt32 = CENTRAL_VERSION_MADE_BY + ) : UInt64 + CENTRAL_MAGIC.to_io(io, LE) + version.to_u16.to_io(io, LE) + VERSION_NEEDED.to_u16.to_io(io, LE) + GENERAL_FLAGS.to_u16.to_io(io, LE) + @method.to_io(io, LE) + + # TODO: write time + # encode(time, io) + + @crc.to_io(io, LE) + @dst_len.to_io(io, LE) + @src_len.to_io(io, LE) + + # get path length and write it + path_len = path.bytesize + path_len.to_u16.to_io(io, LE) + + # write extras field length + extras_len = 0_u32 + extras_len.to_u16.to_io(io, LE) + + comment_len = @comment.bytesize + comment_len.to_u16.to_io(io, LE) + + # write disk number + 0_u32.to_u16.to_io(io, LE) + + # write file attributes (internal, external) + # TODO + 0_u32.to_u16.to_io(io, LE) + 0_u32.to_u16.to_io(io, LE) + + # write local header offset + @pos.to_u32.to_io(io, LE) + + # write path field + path.to_s(io) + + # write extra fields + # TODO: implement this + + # write comment + comment.to_s(io) + + # return number of bytes written + 30_u64 + path_len + extras_len + + # TODO + 0_u64 + end + + end + + class Writer + def initialize( + @io : IO, + @pos : UInt64 = 0, + @comment : String = "", + @version : UInt32 = 0, + ) + @entries = [] of WriterEntry + @closed = false + @src_pos = @pos + end + + def closed? + @closed + end + + private def assert_open + raise Error, "already closed" if closed? + end + + def bytes_written : UInt64 + # return total number of bytes written + @src_pos - @pos + end + + def close + assert_open + + # cache cdr position + cdr_pos = @pos + + @entries.each do |entry| + @pos += entry.to_central(@io) + end + + # write zip footer + write_footer(cdr_pos) + + # flag as closed + @closed = true + + # return total number of bytes written + bytes_written + end + + def add( + path : String, + io : IO, + method : CompressionMethod = CompressionMethod::DEFLATE, + time : Time = Time.now, + comment : String? = nil, + ) : UInt64 + src_pos = @pos, + # make sure writer is still open + assert_open + + # create entry + entry = WriterEntry.new( + pos: @pos, + path: path, + io: io, + method: method, + time: time, + comment: comment, + ) + + # add to list of entries + @entries << entry + + # write entry, update offset + @pos += entry.to_s(@io) + + # return number of bytes written + @pos - src_pos + end + + # 4.3.16 End of central directory record: + # + # end of central dir signature 4 bytes (0x06054b50) + # number of this disk 2 bytes + # number of the disk with the + # start of the central directory 2 bytes + # total number of entries in the + # central directory on this disk 2 bytes + # total number of entries in + # the central directory 2 bytes + # size of the central directory 4 bytes + # offset of start of central + # directory with respect to + # the starting disk number 4 bytes + # .ZIP file comment length 2 bytes + # .ZIP file comment (variable size) + + FOOTER_MAGIC = 0x06054b50_u32 + + private def write_footer(cdr_pos : UInt64) + FOOTER_MAGIC.to_io(@io, LE) + 0_u32.to_u16.to_io(@io, LE) + 0_u32.to_u16.to_io(@io, LE) + + num_entries = @entries.size + num_entries.to_u16.to_io(@io, LE) + num_entries.to_u16.to_io(@io, LE) + + (@pos - cdr_pos).to_io(@io, LE) + cdr_pos.to_io(@io, LE) + + @comment.bytesize.to_u16.to_io(@io, LE) + @comment.to_s(@io) + end + end + + def self.write( + io : IO, + pos : UInt64 = 0, + comment : String = "", + version : UInt32 = 0, + &cb : Writer -> \ + ) : UInt64 + r = 0_u64 + begin + w = Writer.new(io, pos, comment, version) + cb.call(w) + ensure + w.close unless w.closed? + r = w.bytes_written + end + + # return total number of bytes written + r + end + + def self.write( + path : String, + pos : UInt64 = 0, + comment : String = "", + version : UInt32 = 0, + &cb : Writer -> \ + ) : UInt64 + write(File.open(path, "wb"), pos, comment, version, &cb) + end +end -- cgit v1.2.3