File Coverage

lib/Archive/Extract.pm
Criterion Covered Total %
statement 67 67 100.0
branch 2 4 50.0
condition n/a
subroutine 22 22 100.0
pod n/a
total 91 93 97.8


line stmt bran cond sub pod time code
1             package Archive::Extract;
2              
3 1     1   48 use strict;
  1         9  
  1         67  
4              
5 1     1   27 use Cwd qw[cwd];
  1         11  
  1         284  
6 1     1   17 use Carp qw[carp];
  1         29  
  1         39  
7 1     1   25 use IPC::Cmd qw[run can_run];
  1         9  
  1         22  
8 1     1   14 use FileHandle;
  1         11  
  1         85  
9 1     1   20 use File::Path qw[mkpath];
  1         9  
  1         67  
10 1     1   15 use File::Spec;
  1         9  
  1         19  
11 1     1   15 use File::Basename qw[dirname basename];
  1         9  
  1         201  
12 1     1   21 use Params::Check qw[check];
  1         9  
  1         18  
13 1     1   16 use Module::Load::Conditional qw[can_load check_install];
  1         9  
  1         16  
14 1     1   16 use Locale::Maketext::Simple Style => 'gettext';
  1         9  
  1         15  
15              
16             ### solaris has silly /bin/tar output ###
17 1 50   1   18 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
  1         9  
  1         24  
18 1 50   1   16 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
  1         10  
  1         13  
  40         32042  
19              
20             ### If these are changed, update @TYPES and the new() POD
21 1     1   42 use constant TGZ => 'tgz';
  1         10  
  1         13  
22 1     1   15 use constant TAR => 'tar';
  1         9  
  1         13  
23 1     1   20 use constant GZ => 'gz';
  1         9  
  1         14  
24 1     1   15 use constant ZIP => 'zip';
  1         9  
  1         13  
25 1     1   15 use constant BZ2 => 'bz2';
  1         9  
  1         13  
26 1     1   99 use constant TBZ => 'tbz';
  1         10  
  1         13  
27              
28 1     1   15 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG];
  1         9  
  1         15  
29              
30             $VERSION        = '0.16';
31             $PREFER_BIN     = 0;
32             $WARN           = 1;
33             $DEBUG          = 0;
34             my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ ); # same as all constants
35              
36             local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
37              
38             =pod
39            
40             =head1 NAME
41            
42             Archive::Extract - A generic archive extracting mechanism
43            
44             =head1 SYNOPSIS
45            
46             use Archive::Extract;
47            
48             ### build an Archive::Extract object ###
49             my $ae = Archive::Extract->new( archive => 'foo.tgz' );
50            
51             ### extract to cwd() ###
52             my $ok = $ae->extract;
53            
54             ### extract to /tmp ###
55             my $ok = $ae->extract( to => '/tmp' );
56            
57             ### what if something went wrong?
58             my $ok = $ae->extract or die $ae->error;
59            
60             ### files from the archive ###
61             my $files = $ae->files;
62            
63             ### dir that was extracted to ###
64             my $outdir = $ae->extract_path;
65            
66            
67             ### quick check methods ###
68             $ae->is_tar # is it a .tar file?
69             $ae->is_tgz # is it a .tar.gz or .tgz file?
70             $ae->is_gz; # is it a .gz file?
71             $ae->is_zip; # is it a .zip file?
72             $ae->is_bz2; # is it a .bz2 file?
73             $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
74            
75             ### absolute path to the archive you provided ###
76             $ae->archive;
77            
78             ### commandline tools, if found ###
79             $ae->bin_tar # path to /bin/tar, if found
80             $ae->bin_gzip # path to /bin/gzip, if found
81             $ae->bin_unzip # path to /bin/unzip, if found
82             $ae->bin_bunzip2 # path to /bin/bunzip2 if found
83            
84             =head1 DESCRIPTION
85            
86             Archive::Extract is a generic archive extraction mechanism.
87            
88             It allows you to extract any archive file of the type .tar, .tar.gz,
89             .gz, tar.bz2, .tbz, .bz2 or .zip without having to worry how it does
90             so, or use different interfaces for each type by using either perl
91             modules, or commandline tools on your system.
92            
93             See the C<HOW IT WORKS> section further down for details.
94            
95             =cut
96              
97              
98             ### see what /bin/programs are available ###
99             $PROGRAMS = {};
100             for my $pgm (qw[tar unzip gzip bunzip2]) {
101                 $PROGRAMS->{$pgm} = can_run($pgm);
102             }
103              
104             ### mapping from types to extractor methods ###
105             my $Mapping = {
106                 is_tgz => '_untar',
107                 is_tar => '_untar',
108                 is_gz => '_gunzip',
109                 is_zip => '_unzip',
110                 is_tbz => '_untar',
111                 is_bz2 => '_bunzip2',
112             };
113              
114             {
115                 my $tmpl = {
116                     archive => { required => 1, allow => FILE_EXISTS },
117                     type => { default => '', allow => [ @Types ] },
118                 };
119              
120             ### build accesssors ###
121                 for my $method( keys %$tmpl,
122                                 qw[_extractor _gunzip_to files extract_path],
123                                 qw[_error_msg _error_msg_long]
124                 ) {
125 1     1   19         no strict 'refs';
  1         10  
  1         16  
126                     *$method = sub {
127                                     my $self = shift;
128                                     $self->{$method} = $_[0] if @_;
129                                     return $self->{$method};
130                                 }
131                 }
132              
133             =head1 METHODS
134            
135             =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
136            
137             Creates a new C<Archive::Extract> object based on the archive file you
138             passed it. Automatically determines the type of archive based on the
139             extension, but you can override that by explicitly providing the
140             C<type> argument.
141            
142             Valid values for C<type> are:
143            
144             =over 4
145            
146             =item tar
147            
148             Standard tar files, as produced by, for example, C</bin/tar>.
149             Corresponds to a C<.tar> suffix.
150            
151             =item tgz
152            
153             Gzip compressed tar files, as produced by, for example C</bin/tar -z>.
154             Corresponds to a C<.tgz> or C<.tar.gz> suffix.
155            
156             =item gz
157            
158             Gzip compressed file, as produced by, for example C</bin/gzip>.
159             Corresponds to a C<.gz> suffix.
160            
161             =item zip
162            
163             Zip compressed file, as produced by, for example C</bin/zip>.
164             Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
165            
166             =item bz2
167            
168             Bzip2 compressed file, as produced by, for example, C</bin/bzip2>.
169             Corresponds to a C<.bz2> suffix.
170            
171             =item tbz
172            
173             Bzip2 compressed tar file, as produced by, for exmample C</bin/tar -j>.
174             Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
175            
176             =back
177            
178             Returns a C<Archive::Extract> object on success, or false on failure.
179            
180             =cut
181              
182             ### constructor ###
183                 sub new {
184                     my $class = shift;
185                     my %hash = @_;
186              
187                     my $parsed = check( $tmpl, \%hash ) or return;
188              
189             ### make sure we have an absolute path ###
190                     my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
191              
192             ### figure out the type, if it wasn't already specified ###
193                     unless ( $parsed->{type} ) {
194                         $parsed->{type} =
195                             $ar =~ /.+?\.(?:tar\.gz)|tgz$/i ? TGZ :
196                             $ar =~ /.+?\.gz$/i ? GZ :
197                             $ar =~ /.+?\.tar$/i ? TAR :
198                             $ar =~ /.+?\.(zip|jar|par)$/i ? ZIP :
199                             $ar =~ /.+?\.(?:tbz|tar\.bz2?)$/i ? TBZ :
200                             $ar =~ /.+?\.bz2$/i ? BZ2 :
201                             '';
202              
203                     }
204              
205             ### don't know what type of file it is ###
206                     return __PACKAGE__->_error(loc("Cannot determine file type for '%1'",
207                                             $parsed->{archive} )) unless $parsed->{type};
208              
209                     return bless $parsed, $class;
210                 }
211             }
212              
213             =head2 $ae->extract( [to => '/output/path'] )
214            
215             Extracts the archive represented by the C<Archive::Extract> object to
216             the path of your choice as specified by the C<to> argument. Defaults to
217             C<cwd()>.
218            
219             Since C<.gz> files never hold a directory, but only a single file; if
220             the C<to> argument is an existing directory, the file is extracted
221             there, with it's C<.gz> suffix stripped.
222             If the C<to> argument is not an existing directory, the C<to> argument
223             is understood to be a filename, if the archive type is C<gz>.
224             In the case that you did not specify a C<to> argument, the output
225             file will be the name of the archive file, stripped from it's C<.gz>
226             suffix, in the current working directory.
227            
228             C<extract> will try a pure perl solution first, and then fall back to
229             commandline tools if they are available. See the C<GLOBAL VARIABLES>
230             section below on how to alter this behaviour.
231            
232             It will return true on success, and false on failure.
233            
234             On success, it will also set the follow attributes in the object:
235            
236             =over 4
237            
238             =item $ae->extract_path
239            
240             This is the directory that the files where extracted to.
241            
242             =item $ae->files
243            
244             This is an array ref with the paths of all the files in the archive,
245             relative to the C<to> argument you specified.
246             To get the full path to an extracted file, you would use:
247            
248             File::Spec->catfile( $to, $ae->files->[0] );
249            
250             Note that all files from a tar archive will be in unix format, as per
251             the tar specification.
252            
253             =back
254            
255             =cut
256              
257             sub extract {
258                 my $self = shift;
259                 my %hash = @_;
260              
261                 my $to;
262                 my $tmpl = {
263                     to => { default => '.', store => \$to }
264                 };
265              
266                 check( $tmpl, \%hash ) or return;
267              
268             ### so 'to' could be a file or a dir, depending on whether it's a .gz
269             ### file, or basically anything else.
270             ### so, check that, then act accordingly.
271             ### set an accessor specifically so _gunzip can know what file to extract
272             ### to.
273                 my $dir;
274                 { ### a foo.gz file
275                     if( $self->is_gz or $self->is_bz2 ) {
276                 
277                         my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2)$//i;
278                     
279             ### to is a dir?
280                         if ( -d $to ) {
281                             $dir = $to;
282                             $self->_gunzip_to( basename($cp) );
283              
284             ### then it's a filename
285                         } else {
286                             $dir = dirname($to);
287                             $self->_gunzip_to( basename($to) );
288                         }
289              
290             ### not a foo.gz file
291                     } else {
292                         $dir = $to;
293                     }
294                 }
295              
296             ### make the dir if it doesn't exist ###
297                 unless( -d $dir ) {
298                     eval { mkpath( $dir ) };
299              
300                     return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
301                         if $@;
302                 }
303              
304             ### get the current dir, to restore later ###
305                 my $cwd = cwd();
306              
307                 my $ok = 1;
308                 EXTRACT: {
309              
310             ### chdir to the target dir ###
311                     unless( chdir $dir ) {
312                         $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
313                         $ok = 0; last EXTRACT;
314                     }
315              
316             ### set files to an empty array ref, so there's always an array
317             ### ref IN the accessor, to avoid errors like:
318             ### Can't use an undefined value as an ARRAY reference at
319             ### ../lib/Archive/Extract.pm line 742. (rt #19815)
320                     $self->files( [] );
321              
322             ### find what extractor method to use ###
323                     while( my($type,$method) = each %$Mapping ) {
324              
325             ### call the corresponding method if the type is OK ###
326                         if( $self->$type) {
327                             $ok = $self->$method();
328                         }
329                     }
330              
331             ### warn something went wrong if we didn't get an OK ###
332                     $self->_error(loc("Extract failed, no extractor found"))
333                         unless $ok;
334              
335                 }
336              
337             ### and chdir back ###
338                 unless( chdir $cwd ) {
339                     $self->_error(loc("Could not chdir back to start dir '%1': %2'",
340                                         $cwd, $!));
341                 }
342              
343                 return $ok;
344             }
345              
346             =pod
347            
348             =head1 ACCESSORS
349            
350             =head2 $ae->error([BOOL])
351            
352             Returns the last encountered error as string.
353             Pass it a true value to get the C<Carp::longmess()> output instead.
354            
355             =head2 $ae->extract_path
356            
357             This is the directory the archive got extracted to.
358             See C<extract()> for details.
359            
360             =head2 $ae->files
361            
362             This is an array ref holding all the paths from the archive.
363             See C<extract()> for details.
364            
365             =head2 $ae->archive
366            
367             This is the full path to the archive file represented by this
368             C<Archive::Extract> object.
369            
370             =head2 $ae->type
371            
372             This is the type of archive represented by this C<Archive::Extract>
373             object. See accessors below for an easier way to use this.
374             See the C<new()> method for details.
375            
376             =head2 $ae->types
377            
378             Returns a list of all known C<types> for C<Archive::Extract>'s
379             C<new> method.
380            
381             =cut
382              
383             sub types { return @Types }
384              
385             =head2 $ae->is_tgz
386            
387             Returns true if the file is of type C<.tar.gz>.
388             See the C<new()> method for details.
389            
390             =head2 $ae->is_tar
391            
392             Returns true if the file is of type C<.tar>.
393             See the C<new()> method for details.
394            
395             =head2 $ae->is_gz
396            
397             Returns true if the file is of type C<.gz>.
398             See the C<new()> method for details.
399            
400             =head2 $ae->is_zip
401            
402             Returns true if the file is of type C<.zip>.
403             See the C<new()> method for details.
404            
405             =cut
406              
407             ### quick check methods ###
408             sub is_tgz { return $_[0]->type eq TGZ }
409             sub is_tar { return $_[0]->type eq TAR }
410             sub is_gz { return $_[0]->type eq GZ }
411             sub is_zip { return $_[0]->type eq ZIP }
412             sub is_tbz { return $_[0]->type eq TBZ }
413             sub is_bz2 { return $_[0]->type eq BZ2 }
414              
415             =pod
416            
417             =head2 $ae->bin_tar
418            
419             Returns the full path to your tar binary, if found.
420            
421             =head2 $ae->bin_gzip
422            
423             Returns the full path to your gzip binary, if found
424            
425             =head2 $ae->bin_unzip
426            
427             Returns the full path to your unzip binary, if found
428            
429             =cut
430              
431             ### paths to commandline tools ###
432             sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
433             sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
434             sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
435             sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
436              
437             #################################
438             #
439             # Untar code
440             #
441             #################################
442              
443              
444             ### untar wrapper... goes to either Archive::Tar or /bin/tar
445             ### depending on $PREFER_BIN
446             sub _untar {
447                 my $self = shift;
448              
449             ### bzip2 support in A::T via IO::Uncompress::Bzip2
450                 my @methods = qw[_untar_at _untar_bin];
451                      @methods = reverse @methods unless $PREFER_BIN;
452              
453                 for my $method (@methods) {
454                     $self->_extractor($method) && return 1 if $self->$method();
455                 }
456              
457                 return $self->_error(loc("Unable to untar file '%1'", $self->archive));
458             }
459              
460             ### use /bin/tar to extract ###
461             sub _untar_bin {
462                 my $self = shift;
463              
464             ### check for /bin/tar ###
465                 return $self->_error(loc("No '%1' program found", '/bin/tar'))
466                     unless $self->bin_tar;
467              
468             ### check for /bin/gzip if we need it ###
469                 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
470                     if $self->is_tgz && !$self->bin_gzip;
471              
472                 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
473                     if $self->is_tbz && !$self->bin_bunzip2;
474              
475             ### XXX figure out how to make IPC::Run do this in one call --
476             ### currently i don't know how to get output of a command after a pipe
477             ### trapped in a scalar. Mailed barries about this 5th of june 2004.
478              
479              
480              
481             ### see what command we should run, based on whether
482             ### it's a .tgz or .tar
483              
484             ### XXX solaris tar and bsdtar are having different outputs
485             ### depending whether you run with -x or -t
486             ### compensate for this insanity by running -t first, then -x
487                 { my $cmd =
488                         $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
489                                          $self->bin_tar, '-tf', '-'] :
490                         $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|',
491                                          $self->bin_tar, '-tf', '-'] :
492                         [$self->bin_tar, '-tf', $self->archive];
493              
494             ### run the command ###
495                     my $buffer = '';
496                     unless( scalar run( command => $cmd,
497                                         buffer => \$buffer,
498                                         verbose => $DEBUG )
499                     ) {
500                         return $self->_error(loc(
501                                         "Error listing contents of archive '%1': %2",
502                                         $self->archive, $buffer ));
503                     }
504              
505             ### no buffers available?
506                     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
507                         $self->_error( $self->_no_buffer_files( $self->archive ) );
508                     
509                     } else {
510             ### if we're on solaris we /might/ be using /bin/tar, which has
511             ### a weird output format... we might also be using
512             ### /usr/local/bin/tar, which is gnu tar, which is perfectly
513             ### fine... so we have to do some guessing here =/
514                         my @files = map { chomp;
515                                       !ON_SOLARIS ? $_
516                                                   : (m|^ x \s+ # 'xtract' -- sigh
517             (.+?), # the actual file name
518             \s+ [\d,.]+ \s bytes,
519             \s+ [\d,.]+ \s tape \s blocks
520             |x ? $1 : $_);
521              
522                                 } split $/, $buffer;
523              
524             ### store the files that are in the archive ###
525                         $self->files(\@files);
526                     }
527                 }
528              
529             ### now actually extract it ###
530                 { my $cmd =
531                         $self->is_tgz ? [$self->bin_gzip, '-cdf', $self->archive, '|',
532                                          $self->bin_tar, '-xf', '-'] :
533                         $self->is_tbz ? [$self->bin_bunzip2, '-c', $self->archive, '|',
534                                          $self->bin_tar, '-xf', '-'] :
535                         [$self->bin_tar, '-xf', $self->archive];
536              
537                     my $buffer = '';
538                     unless( scalar run( command => $cmd,
539                                         buffer => \$buffer,
540                                         verbose => $DEBUG )
541                     ) {
542                         return $self->_error(loc("Error extracting archive '%1': %2",
543                                         $self->archive, $buffer ));
544                     }
545              
546             ### we might not have them, due to lack of buffers
547                     if( $self->files ) {
548             ### now that we've extracted, figure out where we extracted to
549                         my $dir = $self->__get_extract_dir( $self->files );
550                 
551             ### store the extraction dir ###
552                         $self->extract_path( $dir );
553                     }
554                 }
555              
556             ### we got here, no error happened
557                 return 1;
558             }
559              
560             ### use archive::tar to extract ###
561             sub _untar_at {
562                 my $self = shift;
563              
564             ### we definitely need A::T, so load that first
565                 { my $use_list = { 'Archive::Tar' => '0.0' };
566              
567                     unless( can_load( modules => $use_list ) ) {
568              
569                         return $self->_error(loc("You do not have '%1' installed - " .
570                                              "Please install it as soon as possible.",
571                                              'Archive::Tar'));
572                     }
573                 }
574              
575             ### we might pass it a filehandle if it's a .tbz file..
576                 my $fh_to_read = $self->archive;
577              
578             ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
579             ### if A::T's version is 0.99 or higher
580                 if( $self->is_tgz ) {
581                     my $use_list = { 'Compress::Zlib' => '0.0' };
582                        $use_list->{ 'IO::Zlib' } = '0.0'
583                             if $Archive::Tar::VERSION >= '0.99';
584              
585                     unless( can_load( modules => $use_list ) ) {
586                         my $which = join '/', sort keys %$use_list;
587              
588                         return $self->_error(loc(
589                                             "You do not have '%1' installed - Please ".
590                                             "install it as soon as possible.", $which));
591              
592                     }
593                 } elsif ( $self->is_tbz ) {
594                     my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
595                     unless( can_load( modules => $use_list ) ) {
596                         return $self->_error(loc(
597                                 "You do not have '%1' installed - Please " .
598                                 "install it as soon as possible.",
599                                  'IO::Uncompress::Bunzip2'));
600                     }
601              
602                     my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
603                         return $self->_error(loc("Unable to open '%1': %2",
604                                         $self->archive,
605                                         $IO::Uncompress::Bunzip2::Bunzip2Error));
606              
607                     $fh_to_read = $bz;
608                 }
609              
610                 my $tar = Archive::Tar->new();
611              
612             ### only tell it it's compressed if it's a .tgz, as we give it a file
613             ### handle if it's a .tbz
614                 unless( $tar->read( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) ) ) {
615                     return $self->_error(loc("Unable to read '%1': %2", $self->archive,
616                                                 $Archive::Tar::error));
617                 }
618              
619             ### workaround to prevent Archive::Tar from setting uid, which
620             ### is a potential security hole. -autrijus
621             ### have to do it here, since A::T needs to be /loaded/ first ###
622 1     1   18     { no strict 'refs'; local $^W;
  1         10  
  1         15  
623              
624             ### older versions of archive::tar <= 0.23
625                     *Archive::Tar::chown = sub {};
626                 }
627              
628             ### for version of archive::tar > 1.04
629                 local $Archive::Tar::Constant::CHOWN = 0;
630              
631                 { local $^W; # quell 'splice() offset past end of array' warnings
632             # on older versions of A::T
633              
634             ### older archive::tar always returns $self, return value slightly
635             ### fux0r3d because of it.
636                     $tar->extract()
637                         or return $self->_error(loc("Unable to extract '%1': %2",
638                                                 $self->archive, $Archive::Tar::error ));
639                 }
640              
641                 my @files = $tar->list_files;
642                 my $dir = $self->__get_extract_dir( \@files );
643              
644             ### store the files that are in the archive ###
645                 $self->files(\@files);
646              
647             ### store the extraction dir ###
648                 $self->extract_path( $dir );
649              
650             ### check if the dir actually appeared ###
651                 return 1 if -d $self->extract_path;
652              
653             ### no dir, we failed ###
654                 return $self->_error(loc("Unable to extract '%1': %2",
655                                             $self->archive, $Archive::Tar::error ));
656             }
657              
658             #################################
659             #
660             # Gunzip code
661             #
662             #################################
663              
664             ### gunzip wrapper... goes to either Compress::Zlib or /bin/gzip
665             ### depending on $PREFER_BIN
666             sub _gunzip {
667                 my $self = shift;
668              
669                 my @methods = qw[_gunzip_cz _gunzip_bin];
670                    @methods = reverse @methods if $PREFER_BIN;
671              
672                 for my $method (@methods) {
673                     $self->_extractor($method) && return 1 if $self->$method();
674                 }
675              
676                 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
677             }
678              
679             sub _gunzip_bin {
680                 my $self = shift;
681              
682             ### check for /bin/gzip -- we need it ###
683                 return $self->_error(loc("No '%1' program found", '/bin/gzip'))
684                     unless $self->bin_gzip;
685              
686              
687                 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
688                     return $self->_error(loc("Could not open '%1' for writing: %2",
689                                         $self->_gunzip_to, $! ));
690              
691                 my $cmd = [ $self->bin_gzip, '-cdf', $self->archive ];
692              
693                 my $buffer;
694                 unless( scalar run( command => $cmd,
695                                     verbose => $DEBUG,
696                                     buffer => \$buffer )
697                 ) {
698                     return $self->_error(loc("Unable to gunzip '%1': %2",
699                                                 $self->archive, $buffer));
700                 }
701              
702             ### no buffers available?
703                 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
704                     $self->_error( $self->_no_buffer_content( $self->archive ) );
705                 }
706              
707                 print $fh $buffer if defined $buffer;
708              
709                 close $fh;
710              
711             ### set what files where extract, and where they went ###
712                 $self->files( [$self->_gunzip_to] );
713                 $self->extract_path( File::Spec->rel2abs(cwd()) );
714              
715                 return 1;
716             }
717              
718             sub _gunzip_cz {
719                 my $self = shift;
720              
721                 my $use_list = { 'Compress::Zlib' => '0.0' };
722                 unless( can_load( modules => $use_list ) ) {
723                     return $self->_error(loc("You do not have '%1' installed - Please " .
724                                     "install it as soon as possible.", 'Compress::Zlib'));
725                 }
726              
727                 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
728                             return $self->_error(loc("Unable to open '%1': %2",
729                                         $self->archive, $Compress::Zlib::gzerrno));
730              
731                 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
732                     return $self->_error(loc("Could not open '%1' for writing: %2",
733                                         $self->_gunzip_to, $! ));
734              
735                 my $buffer;
736                 $fh->print($buffer) while $gz->gzread($buffer) > 0;
737                 $fh->close;
738              
739             ### set what files where extract, and where they went ###
740                 $self->files( [$self->_gunzip_to] );
741                 $self->extract_path( File::Spec->rel2abs(cwd()) );
742              
743                 return 1;
744             }
745              
746             #################################
747             #
748             # Unzip code
749             #
750             #################################
751              
752             ### unzip wrapper... goes to either Archive::Zip or /bin/unzip
753             ### depending on $PREFER_BIN
754             sub _unzip {
755                 my $self = shift;
756              
757                 my @methods = qw[_unzip_az _unzip_bin];
758                    @methods = reverse @methods if $PREFER_BIN;
759              
760                 for my $method (@methods) {
761                     $self->_extractor($method) && return 1 if $self->$method();
762                 }
763              
764                 return $self->_error(loc("Unable to gunzip file '%1'", $self->archive));
765             }
766              
767             sub _unzip_bin {
768                 my $self = shift;
769              
770             ### check for /bin/gzip if we need it ###
771                 return $self->_error(loc("No '%1' program found", '/bin/unzip'))
772                     unless $self->bin_unzip;
773              
774              
775             ### first, get the files.. it must be 2 different commands with 'unzip' :(
776                 { my $cmd = [ $self->bin_unzip, '-Z', '-1', $self->archive ];
777              
778                     my $buffer = '';
779                     unless( scalar run( command => $cmd,
780                                         verbose => $DEBUG,
781                                         buffer => \$buffer )
782                     ) {
783                         return $self->_error(loc("Unable to unzip '%1': %2",
784                                                     $self->archive, $buffer));
785                     }
786              
787             ### no buffers available?
788                     if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
789                         $self->_error( $self->_no_buffer_files( $self->archive ) );
790              
791                     } else {
792                         $self->files( [split $/, $buffer] );
793                     }
794                 }
795              
796             ### now, extract the archive ###
797                 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
798              
799                     my $buffer;
800                     unless( scalar run( command => $cmd,
801                                         verbose => $DEBUG,
802                                         buffer => \$buffer )
803                     ) {
804                         return $self->_error(loc("Unable to unzip '%1': %2",
805                                                     $self->archive, $buffer));
806                     }
807              
808                     if( scalar @{$self->files} ) {
809                         my $files = $self->files;
810                         my $dir = $self->__get_extract_dir( $files );
811              
812                         $self->extract_path( $dir );
813                     }
814                 }
815              
816                 return 1;
817             }
818              
819             sub _unzip_az {
820                 my $self = shift;
821              
822                 my $use_list = { 'Archive::Zip' => '0.0' };
823                 unless( can_load( modules => $use_list ) ) {
824                     return $self->_error(loc("You do not have '%1' installed - Please " .
825                                     "install it as soon as possible.", 'Archive::Zip'));
826                 }
827              
828                 my $zip = Archive::Zip->new();
829              
830                 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
831                     return $self->_error(loc("Unable to read '%1'", $self->archive));
832                 }
833              
834                 my @files;
835             ### have to extract every memeber individually ###
836                 for my $member ($zip->members) {
837                     push @files, $member->{fileName};
838              
839                     unless( $zip->extractMember($member) == &Archive::Zip::AZ_OK ) {
840                         return $self->_error(loc("Extraction of '%1' from '%2' failed",
841                                     $member->{fileName}, $self->archive ));
842                     }
843                 }
844              
845                 my $dir = $self->__get_extract_dir( \@files );
846              
847             ### set what files where extract, and where they went ###
848                 $self->files( \@files );
849                 $self->extract_path( File::Spec->rel2abs($dir) );
850              
851                 return 1;
852             }
853              
854             sub __get_extract_dir {
855                 my $self = shift;
856                 my $files = shift || [];
857              
858                 return unless scalar @$files;
859              
860                 my($dir1, $dir2);
861                 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
862                     my($dir,$pos) = @$aref;
863              
864             ### add a catdir(), so that any trailing slashes get
865             ### take care of (removed)
866             ### also, a catdir() normalises './dir/foo' to 'dir/foo';
867             ### which was the problem in bug #23999
868                     my $res = -d $files->[$pos]
869                                 ? File::Spec->catdir( $files->[$pos], '' )
870                                 : File::Spec->catdir( dirname( $files->[$pos] ) );
871              
872                     $$dir = $res;
873                 }
874              
875             ### if the first and last dir don't match, make sure the
876             ### dirname is not set wrongly
877                 my $dir;
878              
879             ### dirs are the same, so we know for sure what the extract dir is
880                 if( $dir1 eq $dir2 ) {
881                     $dir = $dir1;
882                 
883             ### dirs are different.. do they share the base dir?
884             ### if so, use that, if not, fall back to '.'
885                 } else {
886                     my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
887                     my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
888                     
889                     $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
890                 }
891              
892                 return File::Spec->rel2abs( $dir );
893             }
894              
895             #################################
896             #
897             # Bunzip2 code
898             #
899             #################################
900              
901             ### bunzip2 wrapper...
902             sub _bunzip2 {
903                 my $self = shift;
904              
905                 my @methods = qw[_bunzip2_cz2 _bunzip2_bin];
906                    @methods = reverse @methods if $PREFER_BIN;
907              
908                 for my $method (@methods) {
909                     $self->_extractor($method) && return 1 if $self->$method();
910                 }
911              
912                 return $self->_error(loc("Unable to bunzip2 file '%1'", $self->archive));
913             }
914              
915             sub _bunzip2_bin {
916                 my $self = shift;
917              
918             ### check for /bin/gzip -- we need it ###
919                 return $self->_error(loc("No '%1' program found", '/bin/bunzip2'))
920                     unless $self->bin_bunzip2;
921              
922              
923                 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
924                     return $self->_error(loc("Could not open '%1' for writing: %2",
925                                         $self->_gunzip_to, $! ));
926              
927                 my $cmd = [ $self->bin_bunzip2, '-c', $self->archive ];
928              
929                 my $buffer;
930                 unless( scalar run( command => $cmd,
931                                     verbose => $DEBUG,
932                                     buffer => \$buffer )
933                 ) {
934                     return $self->_error(loc("Unable to bunzip2 '%1': %2",
935                                                 $self->archive, $buffer));
936                 }
937              
938             ### no buffers available?
939                 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
940                     $self->_error( $self->_no_buffer_content( $self->archive ) );
941                 }
942                 
943                 print $fh $buffer if defined $buffer;
944              
945                 close $fh;
946              
947             ### set what files where extract, and where they went ###
948                 $self->files( [$self->_gunzip_to] );
949                 $self->extract_path( File::Spec->rel2abs(cwd()) );
950              
951                 return 1;
952             }
953              
954             ### using cz2, the compact versions... this we use mainly in archive::tar
955             ### extractor..
956             # sub _bunzip2_cz1 {
957             # my $self = shift;
958             #
959             # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
960             # unless( can_load( modules => $use_list ) ) {
961             # return $self->_error(loc("You do not have '%1' installed - Please " .
962             # "install it as soon as possible.",
963             # 'IO::Uncompress::Bunzip2'));
964             # }
965             #
966             # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
967             # return $self->_error(loc("Unable to open '%1': %2",
968             # $self->archive,
969             # $IO::Uncompress::Bunzip2::Bunzip2Error));
970             #
971             # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
972             # return $self->_error(loc("Could not open '%1' for writing: %2",
973             # $self->_gunzip_to, $! ));
974             #
975             # my $buffer;
976             # $fh->print($buffer) while $bz->read($buffer) > 0;
977             # $fh->close;
978             #
979             # ### set what files where extract, and where they went ###
980             # $self->files( [$self->_gunzip_to] );
981             # $self->extract_path( File::Spec->rel2abs(cwd()) );
982             #
983             # return 1;
984             # }
985              
986             sub _bunzip2_cz2 {
987                 my $self = shift;
988              
989                 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
990                 unless( can_load( modules => $use_list ) ) {
991                     return $self->_error(loc("You do not have '%1' installed - Please " .
992                                     "install it as soon as possible.",
993                                     'IO::Uncompress::Bunzip2'));
994                 }
995              
996                 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
997                     or return $self->_error(loc("Unable to uncompress '%1': %2",
998                                         $self->archive,
999                                         $IO::Uncompress::Bunzip2::Bunzip2Error));
1000              
1001             ### set what files where extract, and where they went ###
1002                 $self->files( [$self->_gunzip_to] );
1003                 $self->extract_path( File::Spec->rel2abs(cwd()) );
1004              
1005                 return 1;
1006             }
1007              
1008              
1009             #################################
1010             #
1011             # Error code
1012             #
1013             #################################
1014              
1015             sub _error {
1016                 my $self = shift;
1017                 my $error = shift;
1018                 
1019                 $self->_error_msg( $error );
1020                 $self->_error_msg_long( Carp::longmess($error) );
1021                 
1022             ### set $Archive::Extract::WARN to 0 to disable printing
1023             ### of errors
1024                 if( $WARN ) {
1025                     carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1026                 }
1027              
1028                 return;
1029             }
1030              
1031             sub error {
1032                 my $self = shift;
1033                 return shift() ? $self->_error_msg_long : $self->_error_msg;
1034             }
1035              
1036             sub _no_buffer_files {
1037                 my $self = shift;
1038                 my $file = shift or return;
1039                 return loc("No buffer captured, unable to tell ".
1040                            "extracted files or extraction dir for '%1'", $file);
1041             }
1042              
1043             sub _no_buffer_content {
1044                 my $self = shift;
1045                 my $file = shift or return;
1046                 return loc("No buffer captured, unable to get content for '%1'", $file);
1047             }
1048             1;
1049              
1050             =pod
1051            
1052             =head1 HOW IT WORKS
1053            
1054             C<Archive::Extract> tries first to determine what type of archive you
1055             are passing it, by inspecting its suffix. It does not do this by using
1056             Mime magic, or something related. See C<CAVEATS> below.
1057            
1058             Once it has determined the file type, it knows which extraction methods
1059             it can use on the archive. It will try a perl solution first, then fall
1060             back to a commandline tool if that fails. If that also fails, it will
1061             return false, indicating it was unable to extract the archive.
1062             See the section on C<GLOBAL VARIABLES> to see how to alter this order.
1063            
1064             =head1 CAVEATS
1065            
1066             =head2 File Extensions
1067            
1068             C<Archive::Extract> trusts on the extension of the archive to determine
1069             what type it is, and what extractor methods therefore can be used. If
1070             your archives do not have any of the extensions as described in the
1071             C<new()> method, you will have to specify the type explicitly, or
1072             C<Archive::Extract> will not be able to extract the archive for you.
1073            
1074             =head2 Bzip2 Support
1075            
1076             There's currently no very reliable pure perl Bzip2 implementation
1077             available, so C<Archive::Extract> can only extract C<bzip2>
1078             compressed archives if you have a C</bin/bunzip2> program.
1079            
1080             =head1 GLOBAL VARIABLES
1081            
1082             =head2 $Archive::Extract::DEBUG
1083            
1084             Set this variable to C<true> to have all calls to command line tools
1085             be printed out, including all their output.
1086             This also enables C<Carp::longmess> errors, instead of the regular
1087             C<carp> errors.
1088            
1089             Good for tracking down why things don't work with your particular
1090             setup.
1091            
1092             Defaults to C<false>.
1093            
1094             =head2 $Archive::Extract::WARN
1095            
1096             This variable controls whether errors encountered internally by
1097             C<Archive::Extract> should be C<carp>'d or not.
1098            
1099             Set to false to silence warnings. Inspect the output of the C<error()>
1100             method manually to see what went wrong.
1101            
1102             Defaults to C<true>.
1103            
1104             =head2 $Archive::Extract::PREFER_BIN
1105            
1106             This variables controls whether C<Archive::Extract> should prefer the
1107             use of perl modules, or commandline tools to extract archives.
1108            
1109             Set to C<true> to have C<Archive::Extract> prefer commandline tools.
1110            
1111             Defaults to C<false>.
1112            
1113             =head1 TODO
1114            
1115             =over 4
1116            
1117             =item Mime magic support
1118            
1119             Maybe this module should use something like C<File::Type> to determine
1120             the type, rather than blindly trust the suffix.
1121            
1122             =head1 AUTHORS
1123            
1124             This module by
1125             Jos Boumans E<lt>kane@cpan.orgE<gt>.
1126            
1127             =head1 COPYRIGHT
1128            
1129             This module is
1130             copyright (c) 2004-2007 Jos Boumans E<lt>kane@cpan.orgE<gt>.
1131             All rights reserved.
1132            
1133             This library is free software;
1134             you may redistribute and/or modify it under the same
1135             terms as Perl itself.
1136            
1137             =cut
1138              
1139             # Local variables:
1140             # c-indentation-style: bsd
1141             # c-basic-offset: 4
1142             # indent-tabs-mode: nil
1143             # End:
1144             # vim: expandtab shiftwidth=4:
1145              
1146