From 24f3afd615e629f823c8b1c55edb873784e80401 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Mon, 8 Aug 2016 17:23:34 -0400 Subject: working reader, add Source --- spec/zip_spec.cr | 5 +- src/zip.cr | 353 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 329 insertions(+), 29 deletions(-) diff --git a/spec/zip_spec.cr b/spec/zip_spec.cr index cae6de9..a7e306c 100644 --- a/spec/zip_spec.cr +++ b/spec/zip_spec.cr @@ -56,6 +56,9 @@ describe Zip::Writer do end describe Zip::Reader do - Zip.read(File.join(TEST_DIR, "test-empty.zip")) do |zip| + Zip.read(File.join(TEST_DIR, "test-string.zip")) do |zip| + zip.entries.each do |e| + pp e + end end end diff --git a/src/zip.cr b/src/zip.cr index 4761695..0dcc7ec 100644 --- a/src/zip.cr +++ b/src/zip.cr @@ -180,30 +180,6 @@ module Zip end end - # TODO - class Reader - getter? :closed, :sync_close - - def initialize( - @io : IO, - @pos : UInt32 = 0, - @sync_close : Bool = false, - ) - @closed = false - end - - private def assert_open - raise Error.new("already closed") if closed? - end - - def close - assert_open - - @io.close if @sync_close - @closed = true - end - end - module NoneCompressionHelper def compress_none(src_io, dst_io) crc = 0_u32 @@ -739,15 +715,328 @@ module Zip end end + # alias Source = IO::FileDescriptor | MemoryIO + + class Source + include IO + + def initialize(@io : IO::FileDescriptor | MemoryIO) + end + + delegate read, to: @io + delegate write, to: @io + forward_missing_to @io + end + + # central file header signature 4 bytes (0x02014b50) + # version made by 2 bytes + # version needed to extract 2 bytes + # general purpose bit flag 2 bytes + # compression method 2 bytes + # last mod file time 2 bytes + # last mod file date 2 bytes + # crc-32 4 bytes + # compressed size 4 bytes + # uncompressed size 4 bytes + # file name length 2 bytes + # extra field length 2 bytes + # file comment length 2 bytes + # disk number start 2 bytes + # internal file attributes 2 bytes + # external file attributes 4 bytes + # relative offset of local header 4 bytes + # + # file name (variable size) + # extra field (variable size) + # file comment (variable size) + + class Entry + getter :version, :version_needed, :flags, :method, :datetime, :crc, + :compressed_size, :uncompressed_size, :path, :extras, + :comment, :internal_attr, :external_attr, :pos + + def initialize(io) + # allocate slice for data + mem = Bytes.new(46) + + # read entry + if ((len = io.read(mem)) != 46) + raise Error.new("couldn't read full CDR entry (#{len} != 46)") + end + + # create memory io for slice + mem_io = MemoryIO.new(mem, false) + + magic = UInt32.from_io(mem_io, LE) + if magic != MAGIC[:cdr_header] + raise Error.new("invalid CDR header magic") + end + + # read versions + @version = UInt16.from_io(mem_io, LE).as(UInt16) + @version_needed = UInt16.from_io(mem_io, LE).as(UInt16) + + # TODO: check versions + + # read flags, method, and date + @flags = UInt16.from_io(mem_io, LE).as(UInt16) + @method = UInt16.from_io(mem_io, LE).as(UInt16) + @datetime = UInt32.from_io(mem_io, LE).as(UInt32) + + @crc = UInt32.from_io(mem_io, LE).as(UInt32) + @compressed_size = UInt32.from_io(mem_io, LE).as(UInt32) + @uncompressed_size = UInt32.from_io(mem_io, LE).as(UInt32) + + # read lengths + @path_len = UInt16.from_io(mem_io, LE).not_nil!.as(UInt16) + @extras_len = UInt16.from_io(mem_io, LE).as(UInt16) + @comment_len = UInt16.from_io(mem_io, LE).as(UInt16) + + @internal_attr = UInt16.from_io(mem_io, LE).as(UInt16) + @external_attr = UInt32.from_io(mem_io, LE).as(UInt32) + @pos = UInt32.from_io(mem_io, LE).as(UInt32) + + # close memory io + mem_io.close + + # read path + @path = if @path_len > 0 + buf = Bytes.new(@path_len) + + if io.read(buf) != @path_len + raise Error.new("couldn't read CDR entry name") + end + + # TODO: handle encoding + String.new(buf) + else + "" + end + + # read extras + @extras = if @extras_len > 0 + buf = Bytes.new(@extras_len) + + if io.read(buf) != @extras_len + raise Error.new("couldn't read CDR entry extras") + end + + # TODO: decode extras? + buf + else + # TODO + Bytes.new(0) + end + + # read comment + @comment = if @comment_len > 0 + buf = Bytes.new(@comment_len) + if io.read(buf) != @comment_len + raise Error.new("couldn't read CDR entry comment") + end + + # TODO: handle encoding + String.new(buf) + else + "" + end + end + end + + # 4.3.16 End of central directory record: + # + # * end of central dir signature 4 bytes (0x06054b50) + # * number of this disk 2 bytes + # * number of the disk with the + # start of the central directory 2 bytes + # * total number of entries in the + # central directory on this disk 2 bytes + # * total number of entries in + # the central directory 2 bytes + # * size of the central directory 4 bytes + # * offset of start of central + # directory with respect to + # the starting disk number 4 bytes + # * .ZIP file comment length 2 bytes + # * .ZIP file comment (variable size) + + class Archive + getter :entries + + def initialize(@io : Source) + # initialize entries + # find footer and end of io + footer_pos, end_pos = find_footer_and_eof(@io) + + # skip magic + @io.pos = footer_pos + 4 + + # create slice and memory io + mem = Bytes.new(18) + + # read footer into memory io + @io.pos = footer_pos + 4 + if ((len = @io.read(mem)) < mem.size) + raise Error.new("couldn't read zip footer") + end + + # create memory io for slice + mem_io = MemoryIO.new(mem, false) + + # read disk numbers + @disk_num = mem_io.read_bytes(UInt16, LE).as(UInt16) + @cdr_disk = mem_io.read_bytes(UInt16, LE).as(UInt16) + + # check disk numbers + if @disk_num != @cdr_disk + raise Error.new("multi-disk archives not supported") + end + + # read entry counts + @num_disk_entries = mem_io.read_bytes(UInt16, LE).as(UInt16) + @num_entries = mem_io.read_bytes(UInt16, LE).not_nil!.as(UInt16) + + # check entry counts + if @num_disk_entries != @num_entries + raise Error.new("multi-disk archives not supported") + end + + # read cdr position and length + @cdr_len = mem_io.read_bytes(UInt32, LE).not_nil!.as(UInt32) + @cdr_pos = mem_io.read_bytes(UInt32, LE).not_nil!.as(UInt32) + + # check cdr position + if @cdr_pos.not_nil! + @cdr_len.not_nil! >= end_pos + raise Error.new("invalid CDR offset: #{@cdr_pos}") + end + + # read comment length and comment body + @comment_len = mem_io.read_bytes(UInt16, LE).not_nil!.as(UInt16) + @comment = if @comment_len.not_nil! > 0 + # allocate space for comment + slice = Bytes.new(@comment_len.not_nil!) + + # seek to comment position + @io.pos = footer_pos + 22 + + # read comment data + if ((len = @io.read(slice)) != @comment_len) + raise Error.new("archive comment read truncated") + end + + # FIXME: shouldn't assume UTF-8 here + String.new(slice, "UTF-8") + else + "" + end + + # close memory io + mem_io.close + + # read entries + @entries = [] of Entry + read_entries(@entries, @io, @cdr_pos, @cdr_len, @num_entries) + end + + private def read_entries( + entries : Array(Entry), + io : Source, + cdr_pos : UInt32, + cdr_len : UInt32, + num_entries : UInt16, + ) + # get end position + end_cdr_pos = cdr_pos + cdr_len + + # seek to start of entries + io.pos = cdr_pos + + # read entries + num_entries.times do + # create new entry + entries << Entry.new(io) + + # check position + if io.pos > end_cdr_pos + raise Error.new("read past CDR") + end + end + end + + private def find_footer_and_eof(io : Source) + # seek to end of file + io.seek(0, IO::Seek::End) + end_pos = io.pos + + if end_pos < 22 + raise Error.new("too small for end of central directory") + end + + curr_pos = end_pos - 22 + while curr_pos >= 0 + # seek to current position + io.pos = curr_pos + + # read what might be the end_cdr magic + maybe_end_magic = UInt32.from_io(io, LE) + + if maybe_end_magic == MAGIC[:cdr_footer] + # jump to archive commment len (maybe) + maybe_comment_len_pos = curr_pos + 20 + io.pos = maybe_comment_len_pos + + # get archive commment len (maybe) + maybe_comment_len = UInt16.from_io(io, LE) + + if curr_pos + 22 + maybe_comment_len == end_pos + # magic and comment line up: probably found end_cdr + return { curr_pos, end_pos } + end + end + + # step back one byte + curr_pos -= 1 + end + + # throw error + raise Error.new("couldn't find end of central directory") + end + end + + class Reader + getter? :closed, :sync_close + getter :zip + + def initialize( + @io : Source, + @pos : UInt32 = 0_u32, + @sync_close : Bool = false, + ) + @closed = false + @zip = Archive.new(@io) + end + + private def assert_open + raise Error.new("already closed") if closed? + end + + def close + assert_open + + @io.close if @sync_close + @closed = true + end + end + def self.read( io : IO, pos : UInt32 = 0_u32, sync_close : Bool = false, - &cb : Reader -> \ + &cb : Archive -> \ ) begin - r = Reader.new(io, pos, sync_close) - cb.call(r) + r = Reader.new(Source.new(io), pos, sync_close) + cb.call(r.zip) ensure if r r.close unless r.closed? @@ -757,9 +1046,17 @@ module Zip nil end + def self.read( + slice : Bytes, + &cb : Archive -> \ + ) + src = Source.new(MemoryIO.new(slice, false)) + read(src, 0_u32, false, &cb) + end + def self.read( path : String, - &cb : Reader -> \ + &cb : Archive -> \ ) File.open(path, "rb") do |io| read(io, 0_u32, true, &cb) -- cgit v1.2.3