File Coverage

lib/Archive/Tar.pm
Criterion Covered Total %
statement 362 462 78.4
branch 153 286 53.5
condition 56 92 60.9
subroutine 41 47 87.2
pod 23 24 95.8
total 635 911 69.7


line stmt bran cond sub pod time code
1             ### the gnu tar specification:
2             ### http://www.gnu.org/software/tar/manual/tar.html
3             ###
4             ### and the pax format spec, which tar derives from:
5             ### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6              
7             package Archive::Tar;
8             require 5.005_03;
9              
10 3     3   41 use strict;
  3         29  
  3         96  
11 3         49 use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
12 3     3   43 $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING];
  3         28  
13              
14             $DEBUG              = 0;
15             $WARN               = 1;
16             $FOLLOW_SYMLINK     = 0;
17             $VERSION            = "1.30";
18             $CHOWN              = 1;
19             $CHMOD              = 1;
20             $DO_NOT_USE_PREFIX  = 0;
21              
22             BEGIN {
23 3     3   46     use Config;
  3         27  
  3         49  
24 3     3   39     $HAS_PERLIO = $Config::Config{useperlio};
25              
26             ### try and load IO::String anyway, so you can dynamically
27             ### switch between perlio and IO::String
28 3         50     eval {
29 3         129         require IO::String;
30 3         74         import IO::String;
31                 };
32 3 50       41     $HAS_IO_STRING = $@ ? 0 : 1;
33              
34             }
35              
36 3     3   93 use Cwd;
  3         25  
  3         64  
37 3     3   122 use IO::File;
  3         31  
  3         65  
38 3     3   51 use Carp qw(carp croak);
  3         27  
  3         49  
39 3     3   46 use File::Spec ();
  3         26  
  3         28  
40 3     3   75 use File::Spec::Unix ();
  3         26  
  3         27  
41 3     3   43 use File::Path ();
  3         26  
  3         27  
42              
43 3     3   90 use Archive::Tar::File;
  3         30  
  3         80  
44 3     3   59 use Archive::Tar::Constant;
  3         28  
  3         61  
45              
46             =head1 NAME
47            
48             Archive::Tar - module for manipulations of tar archives
49            
50             =head1 SYNOPSIS
51            
52             use Archive::Tar;
53             my $tar = Archive::Tar->new;
54            
55             $tar->read('origin.tgz',1);
56             $tar->extract();
57            
58             $tar->add_files('file/foo.pl', 'docs/README');
59             $tar->add_data('file/baz.txt', 'This is the contents now');
60            
61             $tar->rename('oldname', 'new/file/name');
62            
63             $tar->write('files.tar');
64            
65             =head1 DESCRIPTION
66            
67             Archive::Tar provides an object oriented mechanism for handling tar
68             files. It provides class methods for quick and easy files handling
69             while also allowing for the creation of tar file objects for custom
70             manipulation. If you have the IO::Zlib module installed,
71             Archive::Tar will also support compressed or gzipped tar files.
72            
73             An object of class Archive::Tar represents a .tar(.gz) archive full
74             of files and things.
75            
76             =head1 Object Methods
77            
78             =head2 Archive::Tar->new( [$file, $compressed] )
79            
80             Returns a new Tar object. If given any arguments, C<new()> calls the
81             C<read()> method automatically, passing on the arguments provided to
82             the C<read()> method.
83            
84             If C<new()> is invoked with arguments and the C<read()> method fails
85             for any reason, C<new()> returns undef.
86            
87             =cut
88              
89             my $tmpl = {
90                 _data => [ ],
91                 _file => 'Unknown',
92             };
93              
94             ### install get/set accessors for this object.
95             for my $key ( keys %$tmpl ) {
96 3     3   833     no strict 'refs';
  3         44  
  3         53  
97                 *{__PACKAGE__."::$key"} = sub {
98 192     192   3366         my $self = shift;
99 192 100       2435         $self->{$key} = $_[0] if @_;
100 192         6465         return $self->{$key};
101                 }
102             }
103              
104             sub new {
105 29     28 1 8222     my $class = shift;
106 28 50       698     $class = ref $class if ref $class;
107              
108             ### copying $tmpl here since a shallow copy makes it use the
109             ### same aref, causing for files to remain in memory always.
110 28         866     my $obj = bless { _data => [ ], _file => 'Unknown' }, $class;
111              
112 28 100       1349     if (@_) {
113 3 50       35         unless ( $obj->read( @_ ) ) {
114 0         0             $obj->_error(qq[No data could be read from file]);
115 0         0             return;
116                     }
117                 }
118              
119 28         484     return $obj;
120             }
121              
122             =head2 $tar->read ( $filename|$handle, $compressed, {opt => 'val'} )
123            
124             Read the given tar file into memory.
125             The first argument can either be the name of a file or a reference to
126             an already open filehandle (or an IO::Zlib object if it's compressed)
127             The second argument indicates whether the file referenced by the first
128             argument is compressed.
129            
130             The C<read> will I<replace> any previous content in C<$tar>!
131            
132             The second argument may be considered optional if IO::Zlib is
133             installed, since it will transparently Do The Right Thing.
134             Archive::Tar will warn if you try to pass a compressed file if
135             IO::Zlib is not available and simply return.
136            
137             Note that you can currently B<not> pass a C<gzip> compressed
138             filehandle, which is not opened with C<IO::Zlib>, nor a string
139             containing the full archive information (either compressed or
140             uncompressed). These are worth while features, but not currently
141             implemented. See the C<TODO> section.
142            
143             The third argument can be a hash reference with options. Note that
144             all options are case-sensitive.
145            
146             =over 4
147            
148             =item limit
149            
150             Do not read more than C<limit> files. This is useful if you have
151             very big archives, and are only interested in the first few files.
152            
153             =item extract
154            
155             If set to true, immediately extract entries when reading them. This
156             gives you the same memory break as the C<extract_archive> function.
157             Note however that entries will not be read into memory, but written
158             straight to disk.
159            
160             =back
161            
162             All files are stored internally as C<Archive::Tar::File> objects.
163             Please consult the L<Archive::Tar::File> documentation for details.
164            
165             Returns the number of files read in scalar context, and a list of
166             C<Archive::Tar::File> objects in list context.
167            
168             =cut
169              
170             sub read {
171 23     23 1 5484     my $self = shift;
172 23         551     my $file = shift;
173 23   100     1132     my $gzip = shift || 0;
174 23   100     882     my $opts = shift || {};
175              
176 23 100       293     unless( defined $file ) {
177 1         14         $self->_error( qq[No file to read from!] );
178 1         12         return;
179                 } else {
180 22         508         $self->_file( $file );
181                 }
182              
183 22 50       700     my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
184                                 or return;
185              
186 22 50       804     my $data = $self->_read_tar( $handle, $opts ) or return;
187              
188 22         674     $self->_data( $data );
189              
190 22 100       259     return wantarray ? @$data : scalar @$data;
191             }
192              
193             sub _get_handle {
194 35     35   317     my $self = shift;
195 35 50       343     my $file = shift; return unless defined $file;
  35         340  
196 35 50       338                         return $file if ref $file;
197              
198 35   100     808     my $gzip = shift || 0;
199 35   33     341     my $mode = shift || READ_ONLY->( ZLIB ); # default to read only
200              
201 35         273     my $fh; my $bin;
  35         311  
202              
203             ### only default to ZLIB if we're not trying to /write/ to a handle ###
204 35 100 100     2422     if( ZLIB and $gzip || MODE_READ->( $mode ) ) {
205              
206             ### IO::Zlib will Do The Right Thing, even when passed
207             ### a plain file ###
208 28         1366         $fh = new IO::Zlib;
209              
210                 } else {
211 7 50       85         if( $gzip ) {
212 0         0             $self->_error(qq[Compression not available - Install IO::Zlib!]);
213 0         0             return;
214              
215                     } else {
216 7         131             $fh = new IO::File;
217 7         704             $bin++;
218                     }
219                 }
220              
221 35 50       2688     unless( $fh->open( $file, $mode ) ) {
222 0         0         $self->_error( qq[Could not create filehandle for '$file': $!!] );
223 0         0         return;
224                 }
225              
226 35 100       190651     binmode $fh if $bin;
227              
228 35         631     return $fh;
229             }
230              
231             sub _read_tar {
232 22     22   261     my $self = shift;
233 22 50       345     my $handle = shift or return;
234 22   50     224     my $opts = shift || {};
235              
236 22   100     329     my $count = $opts->{limit} || 0;
237 22   100     291     my $extract = $opts->{extract} || 0;
238              
239             ### set a cap on the amount of files to extract ###
240 22         612     my $limit = 0;
241 22 100       275     $limit = 1 if $count > 0;
242              
243 22         680     my $tarfile = [ ];
244 22         283     my $chunk;
245 22         174     my $read = 0;
246 22         765     my $real_name; # to set the name of a file when
247             # we're encountering @longlink
248 22         178     my $data;
249              
250                 LOOP:
251 22         673     while( $handle->read( $chunk, HEAD ) ) {
252             ### IO::Zlib doesn't support this yet
253 182   50     87382         my $offset = eval { tell $handle } || 'unknown';
  182         2898  
254              
255 182 100       4405         unless( $read++ ) {
256 22         192             my $gzip = GZIP_MAGIC_NUM;
257 22 50       675             if( $chunk =~ /$gzip/ ) {
258 0         0                 $self->_error( qq[Cannot read compressed format in tar-mode] );
259 0         0                 return;
260                         }
261                     }
262              
263             ### if we can't read in all bytes... ###
264 182 50       2732         last if length $chunk != HEAD;
265              
266             ### Apparently this should really be two blocks of 512 zeroes,
267             ### but GNU tar sometimes gets it wrong. See comment in the
268             ### source code (tar.c) to GNU cpio.
269 182 100       1986         next if $chunk eq TAR_END;
270              
271             ### according to the posix spec, the last 12 bytes of the header are
272             ### null bytes, to pad it to a 512 byte block. That means if these
273             ### bytes are NOT null bytes, it's a corrrupt header. See:
274             ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
275             ### line 111
276 92         732         { my $nulls = join '', "\0" x 12;
  92         1004  
277 92 50       1020             unless( $nulls eq substr( $chunk, 500, 12 ) ) {
278 0         0                 $self->_error( qq[Invalid header block at offset $offset] );
279 0         0                 next LOOP;
280                         }
281                     }
282              
283             ### pass the realname, so we can set it 'proper' right away
284             ### some of the heuristics are done on the name, so important
285             ### to set it ASAP
286 92         866         my $entry;
287 92         723         { my %extra_args = ();
  92         868  
288 92 100       904             $extra_args{'name'} = $$real_name if defined $real_name;
289                         
290 92 50       1700             unless( $entry = Archive::Tar::File->new( chunk => $chunk,
291                                                                     %extra_args )
292                         ) {
293 0         0                 $self->_error( qq[Couldn't read chunk at offset $offset] );
294 0         0                 next LOOP;
295                         }
296                     }
297              
298             ### ignore labels:
299             ### http://www.gnu.org/manual/tar/html_node/tar_139.html
300 92 50       1118         next if $entry->is_label;
301              
302 92 100 66     1060         if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
      66        
303              
304 72 50 33     794             if ( $entry->is_file && !$entry->validate ) {
305             ### sometimes the chunk is rather fux0r3d and a whole 512
306             ### bytes ends p in the ->name area.
307             ### clean it up, if need be
308 0         0                 my $name = $entry->name;
309 0 0       0                 $name = substr($name, 0, 100) if length $name > 100;
310 0         0                 $name =~ s/\n/ /g;
311              
312 0         0                 $self->_error( $name . qq[: checksum error] );
313 0         0                 next LOOP;
314                         }
315              
316 72         1112             my $block = BLOCK_SIZE->( $entry->size );
317              
318 72         1129             $data = $entry->get_content_by_ref;
319              
320             ### just read everything into memory
321             ### can't do lazy loading since IO::Zlib doesn't support 'seek'
322             ### this is because Compress::Zlib doesn't support it =/
323             ### this reads in the whole data in one read() call.
324 72 50       1300             if( $handle->read( $$data, $block ) < $block ) {
325 0         0                 $self->_error( qq[Read error on tarfile (missing data) '].
326                                                 $entry->full_path ."' at offset $offset" );
327 0         0                 next LOOP;
328                         }
329              
330             ### throw away trailing garbage ###
331 72         32652             substr ($$data, $entry->size) = "";
332              
333             ### part II of the @LongLink munging -- need to do /after/
334             ### the checksum check.
335 72 100       924             if( $entry->is_longlink ) {
336             ### weird thing in tarfiles -- if the file is actually a
337             ### @LongLink, the data part seems to have a trailing ^@
338             ### (unprintable) char. to display, pipe output through less.
339             ### but that doesn't *always* happen.. so check if the last
340             ### character is a control character, and if so remove it
341             ### at any rate, we better remove that character here, or tests
342             ### like 'eq' and hashlook ups based on names will SO not work
343             ### remove it by calculating the proper size, and then
344             ### tossing out everything that's longer than that size.
345              
346             ### count number of nulls
347 20         222                 my $nulls = $$data =~ tr/\0/\0/;
348       </