File Coverage

blib/lib/Ace/Sequence/Feature.pm
Criterion Covered Total %
statement 76 100 76.0
branch 18 28 64.3
condition 10 17 58.8
subroutine 17 29 58.6
pod 12 20 60.0
total 133 194 68.6


line stmt bran cond sub pod time code
1             package Ace::Sequence::Feature;
2 1     1   15 use strict;
  1         15  
  1         15  
3              
4 1     1   15 use Ace qw(:DEFAULT rearrange);
  1         10  
  1         18  
5 1     1   16 use Ace::Object;
  1         10  
  1         16  
6 1     1   34 use Ace::Sequence::Homol;
  1         10  
  1         23  
7 1     1   18 use Carp;
  1         9  
  1         17  
8 1     1   16 use AutoLoader 'AUTOLOAD';
  1         9  
  1         14  
9 1     1   15 use vars '@ISA','%REV';
  1         9  
  1         56  
10             @ISA = 'Ace::Sequence'; # for convenience sake only
11             %REV = ('+1' => '-1',
12             '-1' => '+1'); # war is peace, &c.
13              
14             use overload
15 1         15   '""' => 'asString',
16 1     1   17   ;
  1         9  
17              
18             # parse a line from a sequence list
19             sub new {
20 166     166 0 1456   my $pack = shift;
21 166         1842   my ($parent,$ref,$r_offset,$r_strand,$abs,$gff_line,$db) = @_;
22 166         3440   my ($sourceseq,$method,$type,$start,$end,$score,$strand,$frame,$group) = split "\t",$gff_line;
23 166 50       1639   if (defined($strand)) {
24 166 100       1568     $strand = $strand eq '-' ? '-1' : '+1';
25               } else {
26 0         0     $strand = 0;
27               }
28              
29             # for efficiency/performance, we don't use superclass new() method, but modify directly
30             # handling coordinates. See SCRAPS below for what should be in here
31 166 50 66     1950   $strand = '+1' if $strand < 0 && $r_strand < 0; # two wrongs do make a right
32 166 100       1988   ($start,$end) = ($end,$start) if $strand < 0;
33 166         1529   my $offset = $start - 1;
34 166 100       1526   my $length = ($end > $start) ? $end - $offset : $end - $offset - 2;
35              
36             # handle negative strands
37 166   100     1528   $offset ||= 0;
38 166 100 66     1612   $offset *= -1 if $r_strand < 0 && $strand != $r_strand;
39              
40 166         5654   my $self= bless {
41             obj      => $ref,
42             offset   => $offset,
43             length   => $length,
44             parent   => $parent,
45             p_offset => $r_offset,
46             refseq   => [$ref,$r_offset,$r_strand],
47             strand   => $r_strand,
48             fstrand  => $strand,
49             absolute => $abs,
50             info     => {
51             seqname=> $sourceseq,
52             method => $method,
53             type   => $type,
54             score  => $score,
55             frame  => $frame,
56             group  => $group,
57             db     => $db,
58             }
59             },$pack;
60 166         2572   return $self;
61             }
62              
63 0     0 0 0 sub smapped { 1; }
64              
65             # $_[0] is field name, $_[1] is self, $_[2] is optional replacement value
66             sub _field {
67 444     444   4147   my $self = shift;
68 444         4226   my $field = shift;
69 444         4477   my $v = $self->{info}{$field};
70 444 50       5059   $self->{info}{$field} = shift if @_;
71 444 50 66     5973   return if defined $v && $v eq '.';
72 444         5244   return $v;
73             }
74              
75 40     40 1 537 sub strand { return $_[0]->{fstrand} }
76              
77             sub seqname {
78 0     0 1 0   my $self = shift;
79 0         0   my $seq = $self->_field('seqname');
80 0         0   $self->db->fetch(Sequence=>$seq);
81             }
82              
83 0     0 1 0 sub method { shift->_field('method',@_) } # ... I prefer "method"
84 0     0 1 0 sub subtype { shift->_field('method',@_) } # ... or even "subtype"
85 174     174 1 1984 sub type { shift->_field('type',@_) } # ... I prefer "type"
86 0     0 1 0 sub score { shift->_field('score',@_) } # float indicating some sort of score
87 0     0 1 0 sub frame { shift->_field('frame',@_) } # one of 1, 2, 3 or undef
88             sub info { # returns Ace::Object(s) with info about the feature
89 106     106 1 1210   my $self = shift;
90 106 100       1248   unless ($self->{group}) {
91 104   33     1504     my $info = $self->{info}{group} || 'Method "'.$self->method.'"';
92 104         1110     $info =~ s/(\"[^\"]*);([^\"]*\")/$1$;$2/g;
93 104         1447     my @data = split(/\s*;\s*/,$info);
94 104         1196     foreach (@data) { s/$;/;/g }
  132         3745  
95 104         988     $self->{group} = [map {$self->toAce($_)} @data];
  132         2034  
96               }
97 106 50       1534   return wantarray ? @{$self->{group}} : $self->{group}->[0];
  0         0  
98             }
99              
100             # bioperl compatibility
101 0     0 0 0 sub primary_tag { shift->type(@_) }
102 0     0 0 0 sub source_tag { shift->subtype(@_) }
103              
104             sub db { # database identifier (from Ace::Sequence::Multi)
105 270     270 1 6252   my $self = shift;
106 270         3677   my $db = $self->_field('db',@_);
107 270   33     6572   return $db || $self->SUPER::db;
108             }
109              
110 0     0 1 0 sub group { $_[0]->info; }
111 0     0 1 0 sub target { $_[0]->info; }
112              
113             sub asString {
114 53     53 1 566   my $self = shift;
115 53         1167   my $name = $self->SUPER::asString;
116 53         2973   my $type = $self->type;
117 53         619   return "$type:$name";
118             }
119              
120             # unique ID
121             sub id {
122 0     0 0 0   my $self = shift;
123 0         0   my $source = $self->source->name;
124 0         0   my $start = $self->start;
125 0         0   my $end = $self->end;
126 0         0   return "$source/$start,$end";
127             }
128              
129             # map info into a reasonable set of ace objects
130             sub toAce {
131 132     132 0 1180     my $self = shift;
132 132         1136     my $thing = shift;
133 132         2143     my ($tag,@values) = $thing=~/(\"[^\"]+?\"|\S+)/g;
134 132         1370     foreach (@values) { # strip the damn quotes
135 102         2568       s/^\"(.*)\"$/$1/; # get rid of leading and trailing quotes
136                 }
137 132         1748     return $self->tag2ace($tag,@values);
138             }
139              
140             # synthesize an artificial Ace object based on the tag
141             sub tag2ace {
142 132     132 0 1193     my $self = shift;
143 132         4066     my ($tag,@data) = @_;
144              
145             # Special cases, hardcoded in Ace GFF code...
146 132         1396     my $db = $self->db;;
147 132         2500     my $class = $db->class;
148              
149             # for Notes we just return a text, no database associated
150 132 50       1418     return $class->new(Text=>$data[0]) if $tag eq 'Note';
151              
152             # for homols, we create the indicated Protein or Sequence object
153             # then generate a bogus Homology object (for future compatability??)
154 132 50       1403     if ($tag eq 'Target') {
155 0         0 my ($objname,$start,$end) = @data;
156 0         0 my ($classe,$name) = $objname =~ /^(\w+):(.+)/;
157 0         0 return Ace::Sequence::Homol->new_homol($classe,$name,$db,$start,$end);
158                 }
159              
160             # General case:
161 132         3958     my $obj = $class->new($tag=>$data[0],$self->db);
162              
163 132 50       2677     return $obj if defined $obj;
164              
165             # Last resort, return a Text
166 0               return $class->new(Text=>$data[0]);
167             }
168              
169             sub sub_SeqFeature {
170 0 0   0 0     return wantarray ? () : 0;
171             }
172              
173             1;
174              
175             =head1 NAME
176            
177             Ace::Sequence::Feature - Examine Sequence Feature Tables
178            
179             =head1 SYNOPSIS
180            
181             # open database connection and get an Ace::Object sequence
182             use Ace::Sequence;
183            
184             # get a megabase from the middle of chromosome I
185             $seq = Ace::Sequence->new(-name => 'CHROMOSOME_I,
186             -db => $db,
187             -offset => 3_000_000,
188             -length => 1_000_000);
189            
190             # get all the homologies (a list of Ace::Sequence::Feature objs)
191             @homol = $seq->features('Similarity');
192            
193             # Get information about the first one
194             $feature = $homol[0];
195             $type = $feature->type;
196             $subtype = $feature->subtype;
197             $start = $feature->start;
198             $end = $feature->end;
199             $score = $feature->score;
200            
201             # Follow the target
202             $target = $feature->info;
203            
204             # print the target's start and end positions
205             print $target->start,'-',$target->end, "\n";
206            
207             =head1 DESCRIPTION
208            
209             I<Ace::Sequence::Feature> is a subclass of L<Ace::Sequence::Feature>
210             specialized for returning information about particular features in a
211             GFF format feature table.
212            
213             =head1 OBJECT CREATION
214            
215             You will not ordinarily create an I<Ace::Sequence::Feature> object
216             directly. Instead, objects will be created in response to a feature()
217             call to an I<Ace::Sequence> object. If you wish to create an
218             I<Ace::Sequence::Feature> object directly, please consult the source
219             code for the I<new()> method.
220            
221             =head1 OBJECT METHODS
222            
223             Most methods are inherited from I<Ace::Sequence>. The following
224             methods are also supported:
225            
226             =over 4
227            
228             =item seqname()
229            
230             $object = $feature->seqname;
231            
232             Return the ACeDB Sequence object that this feature is attached to.
233             The return value is an I<Ace::Object> of the Sequence class. This
234             corresponds to the first field of the GFF format and does not
235             necessarily correspond to the I<Ace::Sequence> object from which the
236             feature was obtained (use source_seq() for that).
237            
238             =item source()
239            
240             =item method()
241            
242             =item subtype()
243            
244             $source = $feature->source;
245            
246             These three methods are all synonyms for the same thing. They return
247             the second field of the GFF format, called "source" in the
248             documentation. This is usually the method or algorithm used to
249             predict the feature, such as "GeneFinder" or "tRNA" scan. To avoid
250             ambiguity and enhance readability, the method() and subtype() synonyms
251             are also recognized.
252            
253             =item feature()
254            
255             =item type()
256            
257             $type = $feature->type;
258            
259             These two methods are also synonyms. They return the type of the
260             feature, such as "exon", "similarity" or "Predicted_gene". In the GFF
261             documentation this is called the "feature" field. For readability,
262             you can also use type() to fetch the field.
263            
264             =item abs_start()
265            
266             $start = $feature->abs_start;
267            
268             This method returns the absolute start of the feature within the
269             sequence segment indicated by seqname(). As in the I<Ace::Sequence>
270             method, use start() to obtain the start of the feature relative to its
271             source.
272            
273             =item abs_start()
274            
275             $start = $feature->abs_start;
276            
277             This method returns the start of the feature relative to the sequence
278             segment indicated by seqname(). As in the I<Ace::Sequence> method,
279             you will more usually use the inherited start() method to obtain the
280             start of the feature relative to its source sequence (the
281             I<Ace::Sequence> from which it was originally derived).
282            
283             =item abs_end()
284            
285             $start = $feature->abs_end;
286            
287             This method returns the end of the feature relative to the sequence
288             segment indicated by seqname(). As in the I<Ace::Sequence> method,
289             you will more usually use the inherited end() method to obtain the end
290             of the feature relative to the I<Ace::Sequence> from which it was
291             derived.
292            
293             =item score()
294            
295             $score = $feature->score;
296            
297             For features that are associated with a numeric score, such as
298             similarities, this returns that value. For other features, this
299             method returns undef.
300            
301             =item strand()
302            
303             $strand = $feature->strand;
304            
305             Returns the strandedness of this feature, either "+1" or "-1". For
306             features that are not stranded, returns 0.
307            
308             =item reversed()
309            
310             $reversed = $feature->reversed;
311            
312             Returns true if the feature is reversed relative to its source
313             sequence.
314            
315             =item frame()
316            
317             $frame = $feature->frame;
318            
319             For features that have a frame, such as a predicted coding sequence,
320             returns the frame, either 0, 1 or 2. For other features, returns undef.
321            
322             =item group()
323            
324             =item info()
325            
326             =item target()
327            
328             $info = $feature->info;
329            
330             These methods (synonyms for one another) return an Ace::Object
331             containing other information about the feature derived from the 8th
332             field of the GFF format, the so-called "group" field. The type of the
333             Ace::Object is dependent on the nature of the feature. The
334             possibilities are shown in the table below:
335            
336             Feature Type Value of Group Field
337             ------------ --------------------
338            
339             note A Text object containing the note.
340            
341             similarity An Ace::Sequence::Homology object containing
342             the target and its start/stop positions.
343            
344             intron An Ace::Object containing the gene from
345             exon which the feature is derived.
346             misc_feature
347            
348             other A Text object containing the group data.
349            
350             =item asString()
351            
352             $label = $feature->asString;
353            
354             Returns a human-readable identifier describing the nature of the
355             feature. The format is:
356            
357             $type:$name/$start-$end
358            
359             for example:
360            
361             exon:ZK154.3/1-67
362            
363             This method is also called automatically