File Coverage

lib/Archive/Extract.pm
Criterion Covered Total %
statement 67 67 100.0
branch 2 4 50.0
condition n/a
subroutine 22 22 100.0
pod n/a
total 91 93 97.8


line stmt bran cond sub pod time code
1             package Archive::Extract;
2              
3 1     1   48 use strict;
  1         9  
  1         67  
4              
5 1     1   27 use Cwd qw[cwd];
  1         11  
  1         284  
6 1     1   17 use Carp qw[carp];
  1         29  
  1         39  
7 1     1   25 use IPC::Cmd qw[run can_run];
  1         9  
  1         22  
8 1     1   14 use FileHandle;
  1         11  
  1         85  
9 1     1   20 use File::Path qw[mkpath];
  1         9  
  1         67  
10 1     1   15 use File::Spec;
  1         9  
  1         19  
11 1     1   15 use File::Basename qw[dirname basename];
  1         9  
  1         201  
12 1     1   21 use Params::Check qw[check];
  1         9  
  1         18  
13 1     1   16 use Module::Load::Conditional qw[can_load check_install];
  1         9  
  1         16  
14 1     1   16 use Locale::Maketext::Simple Style => 'gettext';
  1         9  
  1         15  
15              
16             ### solaris has silly /bin/tar output ###
17 1 50   1   18 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
  1         9  
  1         24  
18 1 50   1   16 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
  1         10  
  1         13  
  40         32042  
19              
20             ### If these are changed, update @TYPES and the new() POD
21 1     1   42 use constant TGZ => 'tgz';
  1         10  
  1         13  
22 1     1   15 use constant TAR => 'tar';
  1         9  
  1         13  
23 1     1   20 use constant GZ => 'gz';
  1         9  
  1         14  
24 1     1   15 use constant ZIP => 'zip';
  1         9  
  1         13  
25 1     1   15 use constant BZ2 => 'bz2';
  1         9  
  1         13  
26 1     1   99 use constant TBZ => 'tbz';
  1         10  
  1         13  
27              
28 1     1   15 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
  1         9  
  1         15  
29              
30             $VERSION        = '0.16';
31             $PREFER_BIN     = 0;
32             $WARN           = 1;
33             $DEBUG          = 0;
34             my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ ); # same as all constants
35              
36             local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
37              
38             =pod
39            
40             =head1 NAME
41            
42             Archive::Extract - A generic archive extracting mechanism
43            
44             =head1 SYNOPSIS
45            
46             use Archive::Extract;
47            
48             ### build an Archive::Extract object ###
49             my $ae = Archive::Extract->new( archive => 'foo.tgz' );
50            
51             ### extract to cwd() ###
52             my $ok = $ae->extract;
53            
54             ### extract to /tmp ###
55             my $ok = $ae->extract( to => '/tmp' );
56            
57             ### what if something went wrong?
58             my $ok = $ae->extract or die $ae->error;
59            
60             ### files from the archive ###
61             my $files = $ae->files;
62            
63             ### dir that was extracted to ###
64             my $outdir = $ae->extract_path;
65            
66            
67             ### quick check methods ###
68             $ae->is_tar # is it a .tar file?
69             $ae->is_tgz # is it a .tar.gz or .tgz file?
70             $ae->is_gz; # is it a .gz file?
71             $ae->is_zip; # is it a .zip file?
72             $ae->is_bz2; # is it a .bz2 file?
73             $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
74            
75             ### absolute path to the archive you provided ###
76             $ae->archive;
77            
78             ### commandline tools, if found ###
79             $ae->bin_tar # path to /bin/tar, if found
80             $ae->bin_gzip # path to /bin/gzip, if found
81             $ae->bin_unzip # path to /bin/unzip, if found
82             $ae->bin_bunzip2 # path to /bin/bunzip2 if found
83            
84             =head1 DESCRIPTION
85            
86             Archive::Extract is a generic archive extraction mechanism.
87            
88             It allows you to extract any archive file of the type .tar, .tar.gz,
89             .gz, tar.bz2, .tbz, .bz2 or .zip without having to worry how it does
90             so, or use different interfaces for each type by using either perl
91             modules, or commandline tools on your system.
92            
93             See the C<HOW IT WORKS> section further down for details.
94            
95             =cut
96              
97              
98             ### see what /bin/programs are available ###
99             $PROGRAMS = {};
100             for my $pgm (qw[tar unzip gzip bunzip2]) {
101                 $PROGRAMS->{$pgm} = can_run($pgm);
102             }
103              
104             ### mapping from types to extractor methods ###
105             my $Mapping = {
106                 is_tgz => '_untar',
107                 is_tar => '_untar',
108                 is_gz => '_gunzip',
109                 is_zip => '_unzip',
110                 is_tbz => '_untar',
111                 is_bz2 => '_bunzip2',
112             };
113              
114             {
115                 my $tmpl = {
116                     archive => { required => 1, allow => FILE_EXISTS },
117                     type => { default => '', allow => [ @Types ] },
118                 };
119              
120             ### build accesssors ###
121                 for my $method( keys %$tmpl,
122                                 qw[_extractor _gunzip_to files extract_path],
123                                 qw[_error_msg _error_msg_long]
124                 ) {
125 1     1   19         no strict 'refs';
  1         10  
  1         16  
126                     *$method = sub {
127                                     my $self = shift;
128                                     $self->{$method} = $_[0] if @_;
129                                     return $self->{$method};
130                                 }
131                 }
132              
133             =head1 METHODS
134            
135             =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
136            
137             Creates a new C<Archive::Extract> object based on the archive file you
138             passed it. Automatically determines the type of archive based on the
139             extension, but you can override that by explicitly providing the
140             C<type> argument.
141            
142             Valid values for C<type> are:
143            
144             =over 4
145            
146             =item tar
147            
148             Standard tar files, as produced by, for example, C</bin/tar>.
149             Corresponds to a C<.tar> suffix.
150            
151             =item tgz
152            
153             Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
154             Corresponds to a C<.tgz> or C<.tar.gz> suffix.
155            
156             =item gz
157            
158             Gzip compressed file, as produced by, for example C</bin/gzip>.
159             Corresponds to a C<.gz> suffix.
160            
161             =item zip
162            
163             Zip compressed file, as produced by, for example C</bin/zip>.
164             Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
165            
166             =item bz2
167            
168             Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
169             Corresponds to a C<.bz2> suffix.
170            
171             =item tbz
172            
173             Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
174             Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
175            
176             =back
177            
178             Returns a C<Archive::Extract> object on success, or false on failure.
179            
180             =cut
181              
182             ### constructor ###
183                 sub new {
184                     my $class = shift;
185                     my %hash = @_;
186              
187                     my $parsed = check( $tmpl, \%hash ) or return;
188              
189             ### make sure we have an absolute path ###
190                     my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
191              
192             ### figure out the type, if it wasn't already specified ###
193                     unless ( $parsed->{type} ) {
194                         $parsed->{type} =
195                             $ar =~ /.+?\.(?:tar\.gz)|tgz$/i ? TGZ :
196                             $ar =~ /.+?\.gz$/i ? GZ :
197                             $ar =~ /.+?\.tar$/i ? TAR :
198                             $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
199                             $ar =~ /.+?\.(?:tbz|tar\.bz2?)$/i ? TBZ :
200                             $ar =~ /.+?\.bz2$/i ? BZ2 :
201                             '';
202              
203                     }
204              
205             ### don't know what type of file it is ###
206                     return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
207                                             $parsed->{archive} )) unless $parsed->{type};
208              
209                     return bless $parsed, $class;
210                 }
211             }
212              
213             =head2 $ae->extract( [to => '/output/path'] )
214            
215             Extracts the archive represented by the C<Archive::Extract> object to
216             the path of your choice as specified by the C<to> argument. Defaults to
217             C<cwd()>.
218            
219             Since C<.gz> files never hold a directory, but only a single file; if
220             the C<to> argument is an existing directory, the file is extracted
221             there, with it's C<.gz> suffix stripped.
222             If the C<to> argument is not an existing directory, the C<to> argument
223             is understood to be a filename, if the archive type is C<gz>.
224             In the case that you did not specify a C<to> argument, the output
225             file will be the name of the archive file, stripped from it's C<.gz>
226             suffix, in the current working directory.
227            
228             C<extract> will try a pure perl solution first, and then fall back to
229             commandline tools if they are available. See the C<GLOBAL VARIABLES>
230             section below on how to alter this behaviour.
231            
232             It will return true on success, and false on failure.
233            
234             On success, it will also set the follow attributes in the object:
235            
236             =over 4
237            
238             =item $ae->extract_path
239            
240             This is the directory that the files where extracted to.
241            
242             =item $ae->files
243            
244             This is an array ref with the paths of all the files in the archive,
245             relative to the C<to> argument you specified.
246             To get the full path to an extracted file, you would use:
247            
248             File::Spec->catfile( $to, $ae->files->[0] );
249            
250             Note that all files from a tar archive will be in unix format, as per
251             the tar specification.
252            
253             =back
254            
255             =cut
256              
257             sub extract {
258                 my $self = shift;
259                 my %hash = @_;
260              
261                 my $to;
262                 my $tmpl = {
263                     to => { default => '.', store => \$to }
264                 };
265              
266                 check( $tmpl, \%hash ) or return;
267              
268             ### so 'to' could be a file or a dir, depending on whether it's a .gz
269             ### file, or basically anything else.
270             ### so, check that, then act accordingly.
271             ### set an accessor specifically so _gunzip can know what file to extract
272             ### to.
273                 my $dir;
274                 { ### a foo.gz file
275                     if( $self->is_gz or $self->is_bz2 ) {
276                 
277                         my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2)$//i;
278                     
279             ### to is a dir?
280                         if ( -d $to ) {
281                             $dir = $to;
282                             $self->_gunzip_to( basename($cp) );
283              
284             ### then it's a filename
285                         } else {
286                             $dir = dirname($to);
287                             $self->_gunzip_to( basename($to) );
288                         }
289              
290             ### not a foo.gz file
291                     } else {
292                         $dir = $to;
293                     }
294                 }
295              
296             ### make the dir if it doesn't exist ###
297                 unless( -d $dir ) {
298                     eval { mkpath( $dir ) };
299              
300                     return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
301                         if $@;
302                 }
303              
304             ### get the current dir, to restore later ###
305                 my $cwd = cwd();
306              
307                 my $ok = 1;
308                 EXTRACT: {
309              
310             ### chdir to the target dir ###
311                     unless( chdir $dir ) {
312                         $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
313                         $ok = 0; last EXTRACT;
314                     }
315              
316             ### set files to an empty array ref, so there's always an array
317             ### ref IN the accessor, to avoid errors like:
318             ### Can't use an undefined value as an ARRAY reference at
319             ### ../lib/Archive/Extract.pm line 742. (rt #19815)
320                     $self->files( [] );
321              
322             ### find what extractor method to use ###
323                     while( my($type,$method) = each %$Mapping ) {
324              
325             ### call the corresponding method if the type is OK ###
326                         if( $self->$type) {
327                             $ok = $self->$method();
328                         }
329                     }
330              
331             ### warn something went wrong if we didn't get an OK ###
332                     $self->_error(loc("Extract failed, no extractor found"))
333                         unless $ok;
334              
335                 }
336              
337             ### and chdir back ###
338                 unless( chdir $cwd ) {
339                     $self->_error(loc("Could not chdir back to start dir '%1': %2'",
340                                         $cwd, $!));
341                 }
342              
343                 return $ok;
344             }
345              
346             =pod
347            
348             =head1 ACCESSORS
349            
350             =head2 $ae->error([BOOL])
351            
352             Returns the last encountered error as string.
353             Pass it a true value to get the C<Carp::longmess()> output instead.
354            
355             =head2 $ae->extract_path
356            
357             This is the directory the archive got extracted to.
358             See C<extract()> for details.
359            
360             =head2 $ae->files
361            
362             This is an array ref holding all the paths from the archive.
363             See C<extract()> for details.
364            
365             =head2 $ae->archive
366            
367             This is the full path to the archive file represented by this
368             C<Archive::Extract> object.
369            
370