aboutsummaryrefslogtreecommitdiff
path: root/src/zip.cr
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2016-08-10 18:17:50 -0400
committerPaul Duncan <pabs@pablotron.org>2016-08-10 18:17:50 -0400
commit9787bfe98af7ec407dbcaa1f00c0aef322255ec2 (patch)
treeabedf7981e95208d0fde9994bec516d27d595856 /src/zip.cr
parent3e4edf96c04356cfc3f6132eda7227a00dcab48e (diff)
downloadzip-crystal-9787bfe98af7ec407dbcaa1f00c0aef322255ec2.tar.bz2
zip-crystal-9787bfe98af7ec407dbcaa1f00c0aef322255ec2.zip
add documentation
Diffstat (limited to 'src/zip.cr')
-rw-r--r--src/zip.cr502
1 files changed, 462 insertions, 40 deletions
diff --git a/src/zip.cr b/src/zip.cr
index 74f5d0f..aa9006d 100644
--- a/src/zip.cr
+++ b/src/zip.cr
@@ -1,16 +1,16 @@
require "./zip/*"
require "zlib"
-# https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
-# http://www.onicos.com/staff/iz/formats/zip.html
-#
+# :nodoc:
#
# TODO:
# [x] date/time
# [x] reader (store and deflate only)
-# [ ] extras (at least infozip)
+# [x] documentation
+# [-] extras (at least infozip)
+# [ ] convert datetime to Time
+# [ ] add size to Entry
# [ ] directories
-# [ ] documentation
# [ ] full tests
# [ ] zip64
# [ ] legacy unicode (e.g., non-bit 11) path/comment support
@@ -18,10 +18,45 @@ require "zlib"
# [ ] unix uids
# [ ] bzip2/lzma support
#
+# References:
+# https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+# http://www.onicos.com/staff/iz/formats/zip.html
+#
+# :nodoc:
+#
+# Library for reading and writing zip files.
+#
+# Examples:
+#
+# Reading from a zip file:
+#
+# # create output MemoryIO
+# mem_io = MemoryIO.new
+#
+# # read from "foo.zip"
+# Zip.read("foo.zip") do |zip|
+# # read contents of "bar.txt" in "foo.zip" into mem_io
+# zip["bar.txt"].read(mem_io)
+# end
+#
+# Writing to a zip file:
+#
+# # write to "foo.zip"
+# Zip.write("foo.zip") do |zip|
+# # create "bar.txt" with contents "hello!"
+# zip.add("bar.txt", "hello!")
+# end
+#
module Zip
+ #
+ # Version of zip-crystal library.
+ #
VERSION = "0.1.0"
+ #
+ # Magic numbers for various data in Zip stream.
+ #
MAGIC = {
cdr_header: 0x02014b50_u32,
cdr_footer: 0x06054b50_u32,
@@ -31,9 +66,12 @@ module Zip
LE = IO::ByteFormat::LittleEndian
- # size of buffers, in bytes
+ #
+ # Size of internal buffers, in bytes.
+ #
BUFFER_SIZE = 8192
+ # :nodoc:
# 4.4.4 general purpose bit flag: (2 bytes)
#
# Bit 0: If set, indicates that the file is encrypted.
@@ -120,6 +158,12 @@ module Zip
# Bit 14: Reserved by PKWARE.
#
# Bit 15: Reserved by PKWARE.
+ # :nodoc:
+
+ #
+ # General flags.
+ #
+ # Used by local header and central directory header.
#
@[Flags]
enum GeneralFlags
@@ -141,6 +185,9 @@ module Zip
RESERVED_15
end
+ #
+ # Compression methods.
+ #
enum CompressionMethod
NONE = 0 # Stored (no compression)
SHRUNK = 1 # Shrunk
@@ -166,11 +213,21 @@ module Zip
PPMD = 98 # PPMd version I, Rev 1
end
+ #
+ # Wrapper class for exceptions.
+ #
class Error < Exception
end
+ #
+ # Helper methods for converting to and from `Time` objects.
+ #
module TimeHelper
- def write_time(io : IO, time : Time) : UInt32
+ #
+ # Convert given `Time` to a DOS-style datetime, write the result to
+ # the given IO, and return the number of bytes written.
+ #
+ private def write_time(io : IO, time : Time) : UInt32
year = Math.max(1980, time.year) - 1980
# convert to dos timestamp
@@ -184,8 +241,11 @@ module Zip
end
end
+ #
+ # Helper methods for reading and writing uncompressed data.
+ #
module NoneCompressionHelper
- def compress_none(src_io, dst_io)
+ private def compress_none(src_io, dst_io)
crc = 0_u32
buf = Bytes.new(BUFFER_SIZE)
@@ -213,7 +273,7 @@ module Zip
{ crc.to_u32, src_len, src_len }
end
- def decompress_none(src_io, dst_io, src_len, dst_len)
+ private def decompress_none(src_io, dst_io, src_len, dst_len)
# TODO: verify CRC
IO.copy(src_io, dst_io, src_len)
@@ -222,6 +282,9 @@ module Zip
end
end
+ #
+ # Helper methods for compressing and decompressing deflated data.
+ #
module DeflateCompressionHelper
ZALLOC_PROC = LibZ::AllocFunc.new do |data, num_items, size|
GC.malloc(num_items * size)
@@ -233,7 +296,10 @@ module Zip
ZLIB_VERSION = LibZ.zlibVersion
- def compress_deflate(src_io, dst_io)
+ #
+ # Read data from src_io, and write the compressed result to dst_io.
+ #
+ private def compress_deflate(src_io, dst_io)
crc = 0_u32
# create read and compress buffers
@@ -299,6 +365,9 @@ module Zip
{ crc.to_u32, z.total_in.to_u32, z.total_out.to_u32 }
end
+ #
+ # Deflate data in ZStream and write it to given IO.
+ #
private def write_compressed(
io : IO,
buf : Bytes,
@@ -325,7 +394,11 @@ module Zip
end
end
- def decompress_deflate(src_io, dst_io, src_len, dst_len)
+ #
+ # Decompress src_len bytes of DEFLATEd data from src_io and write it
+ # to dst_io.
+ #
+ private def decompress_deflate(src_io, dst_io, src_len, dst_len)
crc = 0_u32
# create read and compress buffers
@@ -415,6 +488,9 @@ module Zip
dst_len
end
+ #
+ # Inflate compressed data from ZStream and write it to given IO.
+ #
private def read_compressed(
io : IO,
buf : Bytes,
@@ -453,16 +529,33 @@ module Zip
end
end
+ #
+ # Internal class used to store files for `Writer` instance.
+ #
+ # You should not need to instantiate this class directly; it is called
+ # automatically by `Writer#add` and `Writer#add_file`.
+ #
class WriterEntry
include TimeHelper
include NoneCompressionHelper
include DeflateCompressionHelper
- # version needed to extract and header flags (4.4.3.2)
- # (used for local header and central header)
+ #
+ # Version needed to extract this entry (4.4.3.2).
+ #
VERSION_NEEDED = 45_u32
+
+ #
+ # Default flags for local and central header.
+ #
GENERAL_FLAGS = GeneralFlags.flags(FOOTER, EFS)
+ #
+ # Create a new WriterEntry instance.
+ #
+ # You should not need to call this method directly; it is called
+ # automatically by `Writer#add` and `Writer#add_file`.
+ #
def initialize(
@pos : UInt32,
@path : String,
@@ -476,6 +569,13 @@ module Zip
@dst_len = 0_u32
end
+ #
+ # Write local file entry to IO and return the number of bytes
+ # written.
+ #
+ # You should not need to call this method directly; it is called
+ # automatically by `Writer#add` and `Writer#add_file`.
+ #
def to_s(dst_io) : UInt32
# write header
r = write_header(dst_io, @path, @method, @time)
@@ -491,7 +591,7 @@ module Zip
r
end
- #
+ # :nodoc:
# local file header signature 4 bytes (0x04034b50)
# version needed to extract 2 bytes
# general purpose bit flag 2 bytes
@@ -505,8 +605,11 @@ module Zip
# extra field length 2 bytes
# file name (variable size)
# extra field (variable size)
- #
+ # :nodoc:
+ #
+ # Write file header and return the number of bytes written.
+ #
private def write_header(
io : IO,
path : String,
@@ -554,6 +657,9 @@ module Zip
30_u32 + path_len + extras_len
end
+ #
+ # Write file contents and return the number of bytes written.
+ #
private def write_body(dst_io : IO)
case @method
when CompressionMethod::NONE
@@ -565,6 +671,7 @@ module Zip
end
end
+ # :nodoc:
# 4.3.9 Data descriptor:
# MAGIC = 0x08074b50 4 bytes
# crc-32 4 bytes
@@ -573,7 +680,12 @@ module Zip
#
# 4.3.9.3 Although not originally assigned a signature, the value
# 0x08074b50 has commonly been adopted as a signature value
+ # :nodoc:
+ #
+ # Write file footer (data descriptor) and return the number of bytes
+ # written.
+ #
private def write_footer(
io : IO,
crc : UInt32,
@@ -592,7 +704,7 @@ module Zip
16_u32
end
- #
+ # :nodoc:
# central file header signature 4 bytes (0x02014b50)
# version made by 2 bytes
# version needed to extract 2 bytes
@@ -614,10 +726,20 @@ module Zip
# file name (variable size)
# extra field (variable size)
# file comment (variable size)
+ # :nodoc:
- # TODO: version made by, if unspecified
+ #
+ # Version entry was made by, if unspecified.
+ #
CENTRAL_VERSION_MADE_BY = 0_u32
+ #
+ # Write central directory data for this `WriterEntry` and return the
+ # number of bytes written.
+ #
+ # You should not need to call this method directly; it is called
+ # automatically by `Writer#close`.
+ #
def write_central(
io : IO,
version : UInt32 = CENTRAL_VERSION_MADE_BY
@@ -673,8 +795,17 @@ module Zip
end
class Writer
+ #
+ # Is this `Writer` closed?
+ #
getter? :closed
+ #
+ # Create a new `Writer` object.
+ #
+ # You shouldn't need to instantiate this class directly; use
+ # `Zip.write()` instead.
+ #
def initialize(
@io : IO,
@pos : UInt32 = 0,
@@ -690,11 +821,27 @@ module Zip
raise Error.new("already closed") if closed?
end
+ #
+ # Return the total number of bytes written so far.
+ #
+ # Example:
+ #
+ # Zip.write("foo.zip") do |zip|
+ # # add "bar.txt"
+ # zip.add_file("bar.txt", "/path/to/bar.txt")
+ #
+ # # print number of bytes written so far
+ # puts "bytes written so far: #{zip.bytes_written}"
+ # end
+ #
def bytes_written : UInt32
# return total number of bytes written
@src_pos - @pos
end
+ #
+ # Close this writer and return the total number of bytes written.
+ #
def close
assert_open
@@ -715,6 +862,21 @@ module Zip
bytes_written
end
+ #
+ # Read data from `IO` *io*, write it to *path* in archive, then
+ # return the number of bytes written.
+ #
+ # Example:
+ #
+ # # create IO from "/path/to/bar.txt"
+ # File.open("/path/to/bar.txt, "rb") do |io|
+ # # write to "foo.zip"
+ # Zip.write("foo.zip") do |zip|
+ # # add "bar.txt" with contents of given IO
+ # zip.add("bar.txt", io)
+ # end
+ # end
+ #
def add(
path : String,
io : IO,
@@ -748,6 +910,18 @@ module Zip
@pos - src_pos
end
+ #
+ # Write *data* to *path* in archive and return number of bytes
+ # written.
+ #
+ # Example:
+ #
+ # # write to "foo.zip"
+ # Zip.write("foo.zip") do |zip|
+ # # add "bar.txt" with contents "hello!"
+ # zip.add("bar.txt", "hello!")
+ # end
+ #
def add(
path : String,
data : String | Bytes,
@@ -758,6 +932,18 @@ module Zip
add(path, MemoryIO.new(data), method, time, comment)
end
+ #
+ # Add local file *file_path* to archive as *path* and return number
+ # of bytes written.
+ #
+ # Example:
+ #
+ # # write to "foo.zip"
+ # Zip.write("foo.zip") do |zip|
+ # # add local file "/path/to/bar.txt" as "bar.txt"
+ # zip.add_file("bar.txt", "/path/to/bar.txt")
+ # end
+ #
def add_file(
path : String,
file_path : String,
@@ -770,6 +956,7 @@ module Zip
end
end
+ # :nodoc:
# 4.3.16 End of central directory record:
#
# * end of central dir signature 4 bytes (0x06054b50)
@@ -786,6 +973,7 @@ module Zip
# the starting disk number 4 bytes
# * .ZIP file comment length 2 bytes
# * .ZIP file comment (variable size)
+ # :nodoc:
private def write_footer(
cdr_pos : UInt32,
@@ -819,6 +1007,20 @@ module Zip
end
end
+ #
+ # Create a `Zip::Writer` for the output IO *io* and yield it to
+ # the given block. Returns number of bytes written.
+ #
+ # Example:
+ #
+ # # create output IO
+ # File.open("foo.zip", "wb") do |io|
+ # Zip.write(io) do |zip|
+ # # add "bar.txt" with contents "hello!"
+ # zip.add("bar.txt", "hello!")
+ # end
+ # end
+ #
def self.write(
io : IO,
pos : UInt32 = 0_u32,
@@ -842,6 +1044,18 @@ module Zip
r
end
+ #
+ # Create a `Zip::Writer` for the output file *path* and yield it to
+ # the given block. Returns number of bytes written.
+ #
+ # Example:
+ #
+ # # create "foo.zip"
+ # Zip.write("foo.zip") do |zip|
+ # # add "bar.txt" with contents "hello!"
+ # zip.add("bar.txt", "hello!")
+ # end
+ #
def self.write(
path : String,
pos : UInt32 = 0_u32,
@@ -854,9 +1068,22 @@ module Zip
end
end
+ #
+ # Base class for input source for `Archive` object.
+ #
+ # You should not need to instantiate this class directly; use
+ # `Zip.read()` instead.
+ #
class Source
include IO
+ #
+ # Instantiate a new `Source` from the given `IO::FileDescriptor` or
+ # `MemoryIO` object.
+ #
+ # You should not need to instantiate this class directly; use
+ # `Zip.read()` instead.
+ #
def initialize(@io : IO::FileDescriptor | MemoryIO)
end
@@ -865,28 +1092,20 @@ module Zip
forward_missing_to @io
end
- # central file header signature 4 bytes (0x02014b50)
- # version made by 2 bytes
- # version needed to extract 2 bytes
- # general purpose bit flag 2 bytes
- # compression method 2 bytes
- # last mod file time 2 bytes
- # last mod file date 2 bytes
- # crc-32 4 bytes
- # compressed size 4 bytes
- # uncompressed size 4 bytes
- # file name length 2 bytes
- # extra field length 2 bytes
- # file comment length 2 bytes
- # disk number start 2 bytes
- # internal file attributes 2 bytes
- # external file attributes 4 bytes
- # relative offset of local header 4 bytes
- #
- # file name (variable size)
- # extra field (variable size)
- # file comment (variable size)
-
+ #
+ # Extra data associated with `Entry`.
+ #
+ # You should not need to instantiate this class directly; use
+ # `Zip::Entry#extras` or `Zip::Entry#local_extras` instead.
+ #
+ # Example:
+ #
+ # # open "foo.zip"
+ # Zip.read("foo.zip") do |zip|
+ # # get extra data associated with "bar.txt"
+ # extras = zip["bar.txt"].extras
+ # end
+ #
class Extra
property :code, :data
@@ -909,6 +1128,26 @@ module Zip
end
end
+ #
+ # File entry in `Archive`.
+ #
+ # Use `Zip.read()` to read a Zip archive, then `#[]` to fetch a
+ # specific archive entry.
+ #
+ # Example:
+ #
+ # # create MemoryIO
+ # io = MemoryIO.new
+ #
+ # # open "foo.zip"
+ # Zip.read("foo.zip") do |zip|
+ # # get "bar.txt" entry from "foo.zip"
+ # e = zip["bar.txt"]
+ #
+ # # read contents of "bar.txt" into io
+ # e.read(io)
+ # end
+ #
class Entry
include NoneCompressionHelper
include DeflateCompressionHelper
@@ -917,6 +1156,36 @@ module Zip
:compressed_size, :uncompressed_size, :path, :extras,
:comment, :internal_attr, :external_attr, :pos
+ # :nodoc:
+ # central file header signature 4 bytes (0x02014b50)
+ # version made by 2 bytes
+ # version needed to extract 2 bytes
+ # general purpose bit flag 2 bytes
+ # compression method 2 bytes
+ # last mod file time 2 bytes
+ # last mod file date 2 bytes
+ # crc-32 4 bytes
+ # compressed size 4 bytes
+ # uncompressed size 4 bytes
+ # file name length 2 bytes
+ # extra field length 2 bytes
+ # file comment length 2 bytes
+ # disk number start 2 bytes
+ # internal file attributes 2 bytes
+ # external file attributes 4 bytes
+ # relative offset of local header 4 bytes
+ #
+ # file name (variable size)
+ # extra field (variable size)
+ # file comment (variable size)
+ # :nodoc:
+
+ #
+ # Instantiate a new `Entry` object from the given IO.
+ #
+ # You should not need to call this method directly (use
+ # `Zip::Archive#[]` instead).
+ #
def initialize(@io : Source)
# allocate slice for header data
head_buf = Bytes.new(46)
@@ -992,6 +1261,7 @@ module Zip
nil
end
+ # :nodoc:
# local file header signature 4 bytes (0x04034b50)
# version needed to extract 2 bytes
# general purpose bit flag 2 bytes
@@ -1005,8 +1275,26 @@ module Zip
# extra field length 2 bytes
# file name (variable size)
# extra field (variable size)
+ # :nodoc:
- def read(dst_io : IO)
+ #
+ # Write contents of `Entry` into given `IO`.
+ #
+ # Raises an `Error` if the file contents could not be read or if the
+ # compression method is unsupported.
+ #
+ # Example:
+ #
+ # # open "output-bar.txt" for writing
+ # File.open("output-bar.txt", "wb") do |io|
+ # # open archive "./foo.zip"
+ # Zip.read("foo.zip") do |zip|
+ # # write contents of "bar.txt" to "output-bar.txt"
+ # zip["foo.txt"].read(io)
+ # end
+ # end
+ #
+ def read(dst_io : IO) : UInt32
# create buffer for local header
buf = Bytes.new(30)
@@ -1046,8 +1334,30 @@ module Zip
else
raise Error.new("unsupported method: #{@method}")
end
+
+ # return number of bytes written
+ @uncompressed_size
end
+ #
+ # Returns an array of `Extra` attributes for this `Entry`.
+ #
+ # Zip archives can (and do) have separate `Extra` attributes
+ # associated with the file entry itself, and the file's entry in the
+ # Central Directory.
+ #
+ # The `#extras` method returns the `Extra` attributes from the
+ # file's entry in the Central Directory, and this method returns the
+ # `Extra` data from the file entry itself.
+ #
+ # Example:
+ #
+ # # open "./foo.zip"
+ # Zip.read("./foo.zip") do |zip|
+ # # get array of local extra attributes from "bar.txt"
+ # extras = zip["bar.txt"].local_extras
+ # end
+ #
def local_extras : Array(Extra)
unless @local_extras
# move to extras_len in local header
@@ -1068,6 +1378,9 @@ module Zip
@local_extras.not_nil!
end
+ #
+ # Returns an array of `Extra` attributes of length `len` from IO `io`.
+ #
private def read_extras(io, len : UInt16) : Array(Extra)
# read extras
r = [] of Extra
@@ -1095,6 +1408,12 @@ module Zip
r
end
+ #
+ # Read String of length bytes from IO.
+ #
+ # Note: At the moment this assumes UTF-8 encoding, but we should
+ # make this configurable via a parameter to `#read()`.
+ #
private def read_string(io, len : UInt16, name : String) : String
if len > 0
buf = Bytes.new(len)
@@ -1111,6 +1430,7 @@ module Zip
end
end
+ # :nodoc:
# 4.3.16 End of central directory record:
#
# * end of central dir signature 4 bytes (0x06054b50)
@@ -1127,13 +1447,24 @@ module Zip
# the starting disk number 4 bytes
# * .ZIP file comment length 2 bytes
# * .ZIP file comment (variable size)
+ # :nodoc:
+ #
+ # Input archive.
+ #
+ # Use `Zip.read()` instead of instantiating this class directly.
+ #
class Archive
include Enumerable(Entry)
include Iterable
getter :entries, :comment
+ #
+ # Create new Zip::Archive from input Zip::Source.
+ #
+ # Use `Zip.read()` instead of calling this method directly.
+ #
def initialize(@io : Source)
# initialize entries
# find footer and end of io
@@ -1213,6 +1544,9 @@ module Zip
# enumeration/iteration methods #
#################################
+ #
+ # Get hash of path -> Zip::Entries
+ #
private def paths
@paths ||= @entries.reduce({} of String => Entry) do |r, e|
r[e.path] = e
@@ -1220,18 +1554,58 @@ module Zip
end.as(Hash(String, Entry))
end
+ #
+ # Get Zip::Entry by path.
+ #
+ # Example:
+ #
+ # # get bar.txt and read it into memory io
+ # io = MemoryIO.new
+ # zip["bar.txt"].read(io)
+ #
def [](path : String) : Entry
paths[path]
end
+ #
+ # Return Zip::Entry from path, or nil if it doesn't exist.
+ #
+ # Example:
+ #
+ # # read bar.txt into memory io if it exists
+ # if e = zip["bar.txt"]?
+ # io = MemoryIO.new
+ # e.read(io)
+ # end
+ #
def []?(path : String) : Entry?
paths[path]?
end
+ #
+ # Get Zip::Entry by number.
+ #
+ # Example:
+ #
+ # # read third entry from archive into memory io
+ # io = MemoryIO.new
+ # zip[2].read(io)
+ #
def [](id : Int) : Entry
@entries[id]
end
+ #
+ # Get Zip::Entry by number, or nil if it doesn't exist
+ #
+ # Example:
+ #
+ # # read third entry from archive into memory io
+ # if e = zip[2]
+ # io = MemoryIO.new
+ # e.read(io)
+ # end
+ #
def []?(id : Int) : Entry?
@entries[id]?
end
@@ -1243,6 +1617,9 @@ module Zip
# loading methods #
###################
+ #
+ # Read CDR entries from given `Zip::Source`.
+ #
private def read_entries(
entries : Array(Entry),
io : Source,
@@ -1271,6 +1648,9 @@ module Zip
end
end
+ #
+ # Find EOF and end of CDR for archive.
+ #
private def find_footer_and_eof(io : Source)
# seek to end of file
io.seek(0, IO::Seek::End)
@@ -1321,6 +1701,21 @@ module Zip
end
end
+
+ #
+ # Read Zip::Archive from seekable IO instance and pass it to the given
+ # block.
+ #
+ # Example:
+ #
+ # # create memory io for contents of "bar.txt"
+ # io = MemoryIO.new
+ #
+ # # read "bar.txt" from "./foo.zip"
+ # Zip.read(File.open("./foo.zip", "rb")) do |zip|
+ # zip["bar.txt"].read(io)
+ # end
+ #
def self.read(
io : IO,
&cb : Archive -> \
@@ -1329,6 +1724,20 @@ module Zip
cb.call(r)
end
+ #
+ # Read Zip::Archive from Slice and pass it to the given block.
+ #
+ # Example:
+ #
+ # # create memory io for contents of "bar.txt"
+ # io = MemoryIO.new
+ #
+ # # extract "bar.txt" from zip archive in Slice some_slice and
+ # # save it to MemoryIO
+ # Zip.read(some_slice) do |zip|
+ # zip["bar.txt"].read(io)
+ # end
+ #
def self.read(
slice : Bytes,
&cb : Archive -> \
@@ -1337,6 +1746,19 @@ module Zip
read(src, &cb)
end
+ #
+ # Read Zip::Archive from File and pass it to the given block.
+ #
+ # Example:
+ #
+ # # create memory io for contents of "bar.txt"
+ # io = MemoryIO.new
+ #
+ # # extract "bar.txt" from "./foo.zip" and save it to MemoryIO
+ # Zip.read("./foo.zip") do |zip|
+ # zip["bar.txt"].read(io)
+ # end
+ #
def self.read(
path : String,
&cb : Archive -> \