File Coverage

blib/lib/Ace/Sequence.pm
Criterion Covered Total %
statement 278 393 70.7
branch 107 204 52.5
condition 38 81 46.9
subroutine 40 58 69.0
pod 19 34 55.9
total 482 770 62.6


line stmt bran cond sub pod time code
1             package Ace::Sequence;
2 1     1   14 use strict;
  1         14  
  1         13  
3              
4 1     1   14 use Carp;
  1         8  
  1         22  
5 1     1   15 use strict;
  1         9  
  1         11  
6 1     1   30 use Ace 1.50 qw(:DEFAULT rearrange);
  1         24  
  1         34  
7 1     1   41 use Ace::Sequence::FeatureList;
  1         12  
  1         22  
8 1     1   37 use Ace::Sequence::Feature;
  1         10  
  1         27  
9 1     1   172 use AutoLoader 'AUTOLOAD';
  1         22  
  1         17  
10 1     1   18 use vars '$VERSION';
  1         10  
  1         14  
11             my %CACHE;
12              
13             $VERSION = '1.51';
14              
15 1     1   16 use constant CACHE => 1;
  1         10  
  1         15  
16              
17             use overload
18 1         16   '""' => 'asString',
19               cmp => 'cmp',
20 1     1   16 ;
  1         9  
21              
22             # synonym: stop = end
23             *stop = \&end;
24             *abs = \&absolute;
25             *source_seq = \&source;
26             *source_tag = \&subtype;
27             *primary_tag = \&type;
28              
29             my %plusminus = ( '+' => '-',
30             '-' => '+',
31             '.' => '.');
32              
33             # internal keys
34             # parent => reference Sequence in "+" strand
35             # p_offset => our start in the parent
36             # length => our length
37             # strand => our strand (+ or -)
38             # refseq => reference Sequence for coordinate system
39              
40             # object constructor
41             # usually called like this:
42             # $seq = Ace::Sequence->new($object);
43             # but can be called like this:
44             # $seq = Ace::Sequence->new(-db=>$db,-name=>$name);
45             # or
46             # $seq = Ace::Sequence->new(-seq => $object,
47             # -offset => $offset,
48             # -length => $length,
49             # -ref => $refseq
50             # );
51             # $refseq, if provided, will be used to establish the coordinate
52             # system. Otherwise the first base pair will be set to 1.
53             sub new {
54 25     25 0 245   my $pack = shift;
55 25         889   my ($seq,$start,$end,$offset,$length,$refseq,$db) =
56                 rearrange([
57             ['SEQ','SEQUENCE','SOURCE'],
58             'START',
59             ['END','STOP'],
60             ['OFFSET','OFF'],
61             ['LENGTH','LEN'],
62             'REFSEQ',
63             ['DATABASE','DB'],
64             ],@_);
65              
66             # Object must have a parent sequence and/or a reference
67             # sequence. In some cases, the parent sequence will be the
68             # object itself. The reference sequence is used to set up
69             # the frame of reference for the coordinate system.
70              
71             # fetch the sequence object if we don't have it already
72 25 50 33     470   croak "Please provide either a Sequence object or a database and name"
      66        
73                 unless ref($seq) || ($seq && $db);
74              
75             # convert start into offset
76 25 50 33     253   $offset = $start - 1 if defined($start) and !defined($offset);
77              
78             # convert stop/end into length
79 25 0 33     244   $length = ($end > $start) ? $end - $offset : $end - $offset - 2
    50          
80                 if defined($end) && !defined($length);
81              
82             # if just a string is passed, try to fetch a Sequence object
83 25 100       281   my $obj = ref($seq) ? $seq : $db->fetch('Sequence'=>$seq);
84 25 50       291   unless ($obj) {
85 0         0     Ace->error("No Sequence named $obj found in database");
86 0         0     return;
87               }
88              
89             # get parent coordinates and length of this sequence
90             # the parent is an Ace Sequence object in the "+" strand
91 25         433   my ($parent,$p_offset,$p_length,$strand) = find_parent($obj);
92 25 50       1625   return unless $parent;
93              
94             # handle negative strands
95 25         271   my $r_strand = $strand;
96 25         234   my $r_offset = $p_offset;
97 25   100     286   $offset ||= 0;
98 25 100       241   $offset *= -1 if $strand < 0;
99              
100             # handle feature objects
101 25 100       632   $offset += $obj->offset if $obj->can('smapped');
102              
103             # get source
104 25 100       340   my $source = $obj->can('smapped') ? $obj->source : $obj;
105              
106             # store the object into our instance variables
107 25   66     1025   my $self = bless {
108             obj        => $source,
109             offset     => $offset,
110             length     => $length || $p_length,
111             parent     => $parent,
112             p_offset   => $p_offset,
113             refseq     => [$source,$r_offset,$r_strand],
114             strand     => $strand,
115             absolute   => 0,
116             automerge  => 1,
117             },$pack;
118              
119             # set the reference sequence
120 25 100 50     269   eval { $self->refseq($refseq) } or return if defined $refseq;
  17         175  
121              
122             # wheww!
123 25         391   return $self;
124             }
125              
126             # return the "source" object that the user offset from
127             sub source {
128 141     141 0 1920   $_[0]->{obj};
129             }
130              
131             # return the parent object
132 79     79 0 890 sub parent { $_[0]->{parent} }
133              
134             # return the length
135             #sub length { $_[0]->{length} }
136             sub length {
137 6     6 1 56   my $self = shift;
138 6         57   my ($start,$end) = ($self->start,$self->end);
139 6 100       132   return $end - $start + ($end > $start ? 1 : -1); # for stupid 1-based adjustments
140             }
141              
142 0     0 1 0 sub reversed { return shift->strand < 0; }
143              
144             sub automerge {
145 10     10 1 88   my $self = shift;
146 10         196   my $d = $self->{automerge};
147 10 50       97   $self->{automerge} = shift if @_;
148 10         106   $d;
149             }
150              
151             # return reference sequence
152             sub refseq {
153 1018     1018 1 8496   my $self = shift;
154 1018         10917   my $prev = $self->{refseq};
155 1018 100       14215   if (@_) {
156 18         155     my $refseq = shift;
157 18         148     my $arrayref;
158              
159 18 50       172   BLOCK: {
160 18         148       last BLOCK unless defined ($refseq);
161              
162 18 50 66     291       if (ref($refseq) && ref($refseq) eq 'ARRAY') {
163 0         0 $arrayref = $refseq;
164 0         0 last BLOCK;
165                   }
166              
167 18 100 66     314       if (ref($refseq) && ($refseq->can('smapped'))) {
168 16 50       160 croak "Reference sequence has no common ancestor with sequence"
169             unless $self->parent eq $refseq->parent;
170 16         203 my ($a,$b,$c) = @{$refseq->{refseq}};
  16         175  
171             # $b += $refseq->offset;
172 16         160 $b += $refseq->offset;
173 16         202 $arrayref = [$refseq,$b,$refseq->strand];
174 16         178 last BLOCK;
175                   }
176              
177              
178             # look up reference sequence in database if we aren't given
179             # database object already
180 2 50       51       $refseq = $self->db->fetch('Sequence' => $refseq)
181             unless $refseq->isa('Ace::Object');
182 2 50       21       croak "Invalid reference sequence" unless $refseq;
183              
184             # find position of ref sequence in parent strand
185 2         25       my ($r_parent,$r_offset,$r_length,$r_strand) = find_parent($refseq);
186 2 50       22       croak "Reference sequence has no common ancestor with sequence"
187             unless $r_parent eq $self->{parent};
188              
189             # set to array reference containing this information
190 2         22       $arrayref = [$refseq,$r_offset,$r_strand];
191                 }
192 18         202     $self->{refseq} = $arrayref;
193               }
194 1018 50       11667   return unless $prev;
195 1018 50       9912   return $self->parent if $self->absolute;
196 1018 100       12546   return wantarray ? @{$prev} : $prev->[0];
  534         8019  
197             }
198              
199             # return strand
200 0     0 1 0 sub strand { return $_[0]->{strand} }
201              
202             # return reference strand
203             sub r_strand {
204 134     134 0 1125   my $self = shift;
205 134 50       1417   return "+1" if $self->absolute;
206 134 50       1332   if (my ($ref,$r_offset,$r_strand) = $self->refseq) {
207 134         2565     return $r_strand;
208               } else {
209 0         0     return $self->{strand}
210               }
211             }
212              
213 32     32 1 365 sub offset { $_[0]->{offset} }
214 0     0 0 0 sub p_offset { $_[0]->{p_offset} }
215              
216 0     0 0 0 sub smapped { 1; }
217 0     0 0 0 sub type { 'Sequence' }
218 0     0 0 0 sub subtype { }
219              
220             sub debug {
221 24     24 0 228   my $self = shift;
222 24         221   my $d = $self->{_debug};
223 24 50       218   $self->{_debug} = shift if @_;
224 24         292   $d;
225             }
226              
227             # return the database this sequence is associated with
228             sub db {
229 284   66 284 1 3818   return Ace->name2db($_[0]->{db} ||= $_[0]->source->db);
230             }
231              
232             sub start {
233 425     425 1 5482   my ($self,$abs) = @_;
234 425 100       4894   $abs = $self->absolute unless defined $abs;
235 425 100       4219   return $self->{p_offset} + $self->{offset} + 1 if $abs;
236              
237 389 50       3812   if ($self->refseq) {
238 389         4468     my ($ref,$r_offset,$r_strand) = $self->refseq;
239 389 100       5833     return $r_strand < 0 ? 1 + $r_offset - ($self->{p_offset} + $self->{offset})
240                                      : 1 + $self->{p_offset} + $self->{offset} - $r_offset;
241               }
242              
243               else {
244 0         0     return $self->{offset} +1;
245               }
246              
247             }
248              
249             sub end {
250 106     106 1 1196   my ($self,$abs) = @_;
251 106         1027   my $start = $self->start($abs);
252 106 100       1448   my $f = $self->{length} > 0 ? 1 : -1; # for stupid 1-based adjustments
253 106 100 100     1140   if ($abs && $self->refseq ne $self->parent) {
254 17         249     my $r_strand = $self->r_strand;
255 17 100 66     328     return $start - $self->{length} + $f
      100        
256                   if $r_strand < 0 or $self->{strand} < 0 or $self->{length} < 0;
257 8         97     return $start + $self->{length} - $f
258               }
259 89 100       1177   return $start + $self->{length} - $f if $self->r_strand eq $self->{strand};
260 7         112   return $start - $self->{length} + $f;
261             }
262              
263             # turn on absolute coordinates (relative to reference sequence)
264             sub absolute {
265 1611     1611 1 15964   my $self = shift;
266 1611         17364   my $prev = $self->{absolute};
267 1611 50       18331   $self->{absolute} = $_[0] if defined $_[0];
268 1611         17478   return $prev;
269             }
270              
271             # human readable string (for debugging)
272             sub asString {
273 59     59 1 598   my $self = shift;
274 59 50       1540   if ($self->absolute) {
    50          
275 0         0     return join '',$self->parent,'/',$self->start,',',$self->end;
276               } elsif (my $ref = $self->refseq){
277 59 50       944     my $label = $ref->isa('Ace::Sequence::Feature') ? $ref->info : "$ref";
278 59         789     return join '',$label,'/',$self->start,',',$self->end;
279              
280               } else {
281 0         0     join '',$self->source,'/',$self->start,',',$self->end;
282               }
283             }
284              
285             sub cmp {
286 3     3 0 33   my ($self,$arg,$reversed) = @_;
287 3 50 33     67   if (ref($arg) and $arg->isa('Ace::Sequence')) {
288 0   0     0     my $cmp = $self->parent cmp $arg->parent
289                   || $self->start <=> $arg->start;
290 0 0       0     return $reversed ? -$cmp : $cmp;
291               }
292 3         46   my $name = $self->asString;
293 3 50       34   return $reversed ? $arg cmp $name : $name cmp $arg;
294             }
295              
296             # Return the DNA
297             sub dna {
298 12     12 1 173   my $self = shift;
299 12 50       126   return $self->{dna} if $self->{dna};
300 12         144   my $raw = $self->_query('seqdna');
301 12         276   $raw=~s/^>.*\n//m;
302 12         150   $raw=~s/^\/\/.*//mg;
303 12         208   $raw=~s/\n//g;
304 12         126   $raw =~ s/\0+\Z//; # blasted nulls!
305 12 100       805   my $effective_strand = $self->end >= $self->start ? '+1' : '-1';
306 12 100       144   _complement(\$raw) if $self->r_strand ne $effective_strand;
307 12         225   return $self->{dna} = $raw;
308             }
309              
310             # return a gff file
311             sub gff {
312 6     6 1 56   my $self = shift;
313 6         169   my ($abs,$features) = rearrange([['ABS','ABSOLUTE'],'FEATURES'],@_);
314 6 50       91   $abs = $self->absolute unless defined $abs;
315              
316             # can provide list of feature names, such as 'similarity', or 'all' to get 'em all
317             # !THIS IS BROKEN; IT SHOULD LOOK LIKE FEATURE()!
318 6         68   my $opt = $self->_feature_filter($features);
319              
320 6         65   my $gff = $self->_gff($opt);
321 6 50       133   warn $gff if $self->debug;
322              
323 6 50       83   $self->transformGFF(\$gff) unless $abs;
324 6         130   return $gff;
325             }
326              
327             # return a GFF object using the optional GFF.pm module
328             sub GFF {
329 0     0 1 0   my $self = shift;
330 0         0   my ($filter,$converter) = @_; # anonymous subs
331 0 0       0   croak "GFF module not installed" unless require GFF;
332 0         0   require GFF::Filehandle;
333              
334 0         0   my @lines = grep !/^\/\//,split "\n",$self->gff(@_);
335 0         0   local *IN;
336 0         0   local ($^W) = 0; # prevent complaint by GFF module
337 0         0   tie *IN,'GFF::Filehandle',\@lines;
338 0         0   my $gff = GFF::GeneFeatureSet->new;
339 0 0       0   $gff->read(\*IN,$filter,$converter) if $gff;
340 0         0   return $gff;
341             }
342              
343             # Get the features table. Can filter by type/subtype this way:
344             # features('similarity:EST','annotation:assembly_tag')
345             sub features {
346 5     5 1 111   my $self = shift;
347 5         57   my ($filter,$opt) = $self->_make_filter(@_);
348              
349             # get raw gff file
350 5         185   my $gff = $self->gff(-features=>$opt);
351              
352             # turn it into a list of features
353 5         62   my @features = $self->_make_features($gff,$filter);
354              
355 5 50       123   if ($self->automerge) { # automatic merging
356             # fetch out constructed transcripts and clones
357 5         51     my %types = map {lc($_)=>1} (@$opt,@_);
  14         168  
358 5 50       57     if ($types{'transcript'}) {
359 0         0       push @features,$self->_make_transcripts(\@features);
360 0         0       @features = grep {$_->type !~ /^(intron|exon)$/ } @features;
  0         0  
361                 }
362 5 50       50     push @features,$self->_make_clones(\@features) if $types{'clone'};
363 5 50       57     if ($types{'similarity'}) {
364 0         0       my @f = $self->_make_alignments(\@features);
365 0         0       @features = grep {$_->type ne 'similarity'} @features;
  0         0  
366 0         0       push @features,@f;
367                 }
368               }
369              
370 5 50       110   return wantarray ? @features : \@features;
371             }
372              
373             # A little bit more complex - assemble a list of "transcripts"
374             # consisting of Ace::Sequence::Transcript objects. These objects
375             # contain a list of exons and introns.
376             sub transcripts {
377 1     1 1 102   my $self = shift;
378 1         72   my $curated = shift;
379 1 50       14   my $ef = $curated ? "exon:curated" : "exon";
380 1 50       163   my $if = $curated ? "intron:curated" : "intron";
381 1 50       13   my $sf = $curated ? "Sequence:curated" : "Sequence";
382 1         13   my @features = $self->features($ef,$if,$sf);
383 1 50       21   return unless @features;
384 1         14   return $self->_make_transcripts(\@features);
385             }
386              
387             sub _make_transcripts {
388 1     1   11   my $self = shift;
389 1         9   my $features = shift;
390              
391 1         274   require Ace::Sequence::Transcript;
392 1         11   my %transcripts;
393              
394 1         12   for my $feature (@$features) {
395 104         2329     my $transcript = $feature->info;
396 104 100       1206     next unless $transcript;
397 102 100       1360     if ($feature->type =~ /^(exon|intron|cds)$/) {
    50          
398 83         911       my $type = $1;
399 83         680       push @{$transcripts{$transcript}{$type}},$feature;
  83         1140  
400                 } elsif ($feature->type eq 'Sequence') {
401 19   50     203       $transcripts{$transcript}{base} ||= $feature;
402                 }
403               }
404              
405             # get rid of transcripts without exons
406 1         28   foreach (keys %transcripts) {
407 19 100       207     delete $transcripts{$_} unless exists $transcripts{$_}{exon}
408               }
409              
410             # map the rest onto Ace::Sequence::Transcript objects
411 1         17   return map {Ace::Sequence::Transcript->new($transcripts{$_})} keys %transcripts;
  11         120  
412             }
413              
414             # Reassemble clones from clone left and right ends
415             sub clones {
416 0     0 1 0   my $self = shift;
417 0         0   my @clones = $self->features('Clone_left_end','Clone_right_end','Sequence');
418 0         0   my %clones;
419 0 0       0   return unless @clones;
420 0         0   return $self->_make_clones(\@clones);
421             }
422              
423             sub _make_clones {
424 0     0   0   my $self = shift;
425 0         0   my $features = shift;
426              
427 0         0   my (%clones,@canonical_clones);
428 0 0       0   my $start_label = $self->strand < 0 ? 'end' : 'start';
429 0 0       0   my $end_label = $self->strand < 0 ? 'start' : 'end';
430 0         0   for my $feature (@$features) {
431 0 0       0     $clones{$feature->info}{$start_label} = $feature->start if $feature->type eq 'Clone_left_end';
432 0 0       0     $clones{$feature->info}{$end_label} = $feature->start if $feature->type eq 'Clone_right_end';
433              
434 0 0       0     if ($feature->type eq 'Sequence') {
435 0         0       my $info = $feature->info;
436 0 0       0       next if $info =~ /LINK|CHROMOSOME|\.\w+$/;
437 0 0       0       if ($info->Genomic_canonical(0)) {
438 0 0       0 push (@canonical_clones,$info->Clone) if $info->Clone;
439                   }
440                 }
441               }
442              
443 0         0   foreach (@canonical_clones) {
444 0   0     0     $clones{$_} ||= {};
445               }
446              
447 0         0   my @features;
448 0         0   my ($r,$r_offset,$r_strand) = $self->refseq;
449 0         0   my $parent = $self->parent;
450 0         0   my $abs = $self->absolute;
451 0 0       0   if ($abs) {
452 0         0     $r_offset = 0;
453 0         0     $r = $parent;
454 0         0     $r_strand = '+1';
455               }
456              
457             # BAD HACK ALERT. WE DON'T KNOW WHERE THE LEFT END OF THE CLONE IS SO WE USE
458             # THE MAGIC NUMBER -99_999_999 to mean "off left end" and
459             # +99_999_999 to mean "off right end"
460 0         0   for my $clone (keys %clones) {
461 0   0     0     my $start = $clones{$clone}{start} || -99_999_999;
462 0   0     0     my $end = $clones{$clone}{end} || +99_999_999;
463 0         0     my $phony_gff = join "\t",($parent,'Clone','structural',$start,$end,'.','.','.',qq(Clone "$clone"));
464 0         0     push @features,Ace::Sequence::Feature->new($parent,$r,$r_offset,$r_strand,$abs,$phony_gff);
465               }
466 0         0   return @features;
467             }
468              
469             # Assemble a list of "GappedAlignment" objects. These objects
470             # contain a list of aligned segments.
471             sub alignments {
472 0     0 0 0   my $self = shift;
473 0         0   my @subtypes = @_;
474 0         0   my @types = map { "similarity:\^$_\$" } @subtypes;
  0         0  
475 0 0       0   push @types,'similarity' unless @types;
476 0         0   return $self->features(@types);
477             }
478              
479             sub segments {
480 0     0 0 0   my $self = shift;
481 0         0   return;
482             }
483              
484             sub _make_alignments {
485 0     0   0   my $self = shift;
486 0         0   my $features = shift;
487 0         0   require Ace::Sequence::GappedAlignment;
488              
489 0         0   my %homol;
490              
491 0         0   for my $feature (@$features) {
492 0 0       0     next unless $feature->type eq 'similarity';
493 0         0     my $target = $feature->info;
494 0         0     my $subtype = $feature->subtype;
495 0         0     push @{$homol{$target,$subtype}},$feature;
  0         0  
496               }
497              
498             # map onto Ace::Sequence::GappedAlignment objects
499 0         0   return map {Ace::Sequence::GappedAlignment->new($homol{$_})} keys %homol;
  0         0  
500             }
501              
502             # return list of features quickly
503             sub feature_list {
504 0     0 1 0   my $self = shift;
505 0 0       0   return $self->{'feature_list'} if $self->{'feature_list'};
506 0 0       0   return unless my $raw = $self->_query('seqfeatures -version 2 -list');
507 0         0   return $self->{'feature_list'} = Ace::Sequence::FeatureList->new($raw);
508             }
509              
510             # transform a GFF file into the coordinate system of the sequence
511             sub transformGFF {
512 6     6 0 55   my $self = shift;
513 6         84   my $gff = shift;
514 6         65   my $parent = $self->parent;
515 6         60   my $strand = $self->{strand};
516 6         66   my $source = $self->source;
517 6         83   my ($ref_source,$ref_offset,$ref_strand) = $self->refseq;
518 6   50     94   $ref_source ||= $source;
519 6   50     66   $ref_strand ||= $strand;
520              
521 6 100       72   if ($ref_strand > 0) {
522 5 50       50     my $o = defined($ref_offset) ? $ref_offset : ($self->p_offset + $self->offset);
523             # find anything that looks like a numeric field and subtract offset from it
524 5         372     $$gff =~ s/(?<!\")\s+(-?\d+)\s+(-?\d+)/"\t" . ($1 - $o) . "\t" . ($2 - $o)/eg;
  333         8320  
525 5         57     $$gff =~ s/^$parent/$source/mg;
526 5         50     $$gff =~ s/\#\#sequence-region\s+\S+/##sequence-region $ref_source/m;
527 5         53     $$gff =~ s/FMAP_FEATURES\s+"\S+"/FMAP_FEATURES "$ref_source"/m;
528 5         113     return;
529               } else { # strand eq '-'
530 1 50       14     my $o = defined($ref_offset) ? (2 + $ref_offset) : (2 + $self->p_offset - $self->offset);
531 1         38     $$gff =~ s/(?<!\")\s+(-?\d+)\s+(-?\d+)\s+([.\d]+)\s+(\S)/join "\t",'',$o-$2,$o-$1,$3,$plusminus{$4}/eg;
  5         149  
532 1         13     $$gff =~ s/(Target \"[^\"]+\" )(-?\d+) (-?\d+)/$1 $3 $2/g;
533 1         11     $$gff =~ s/^$parent/$source/mg;
534 1         22     $$gff =~ s/\#\#sequence-region\s+\S+\s+(-?\d+)\s+(-?\d+)/"##sequence-region $ref_source " . ($o - $2) . ' ' . ($o - $1) . ' (reversed)'/em;
  1         10  
535 1         151     $$gff =~ s/FMAP_FEATURES\s+"\S+"\s+(-?\d+)\s+(-?\d+)/"FMAP_FEATURES \"$ref_source\" " . ($o - $2) . ' ' . ($o - $1) . ' (reversed)'/em;
  1         18  
536               }
537              
538             }
539              
540             # return a name for the object
541             sub name {
542 0     0 1 0   return shift->source_seq->name;
543             }
544              
545             # for compatibility with Ace::Sequence::Feature
546             sub info {
547 0     0 0 0   return shift->source_seq;
548             }
549              
550             ###################### internal functions #################
551             # not necessarily object-oriented!!
552              
553             # return parent, parent offset and strand
554             sub find_parent {
555 27     27 0 271   my $obj = shift;
556              
557             # first, if we are passed an Ace::Sequence, then we can inherit
558             # these settings directly
559 27 100       500   return (@{$obj}{qw(parent p_offset length)},$obj->r_strand)
  16         234  
560                 if $obj->isa('Ace::Sequence');
561              
562             # otherwise, if we are passed an Ace::Object, then we must
563             # traverse upwards until we find a suitable parent
564 11 50       171   return _traverse($obj) if $obj->isa('Ace::Object');
565              
566             # otherwise, we don't know what to do...
567 0         0   croak "Source sequence not an Ace::Object or an Ace::Sequence";
568             }
569              
570             sub _get_parent {
571 0     0   0   my $obj = shift;
572             # ** DANGER DANGER WILL ROBINSON! **
573             # This is an experiment in caching parents to speed lookups. Probably eats memory voraciously.
574 0 0       0   return $CACHE{$obj} if CACHE && exists $CACHE{$obj};
575 0   0     0   my $p = $obj->get(S_Parent=>2)|| $obj->get(Source=>1);
576 0 0       0   return unless $p;
577 0         0   return CACHE ? $CACHE{$obj} = $p->fetch
578                            : $p->fetch;
579             }
580              
581             sub _get_children {
582 0     0   0   my $obj = shift;
583 0         0   my @pieces = $obj->get(S_Child=>2);
584 0 0       0   return @pieces if @pieces;
585 0         0   return @pieces = $obj->get('Subsequence');
586             }
587              
588             # get sequence, offset and strand of topmost container
589             sub _traverse {
590 11     11   91   my $obj = shift;
591 11         92   my ($offset,$length);
592              
593             # invoke seqget to find the top-level container for this sequence
594 11         106   my ($tl,$tl_start,$tl_end) = _get_toplevel($obj);
595 11   50     157   $tl_start ||= 0;
596 11   50     104   $tl_end ||= 0;
597              
598             # make it an object
599 11         310   $tl = ref($obj)->new(-name=>$tl,-class=>'Sequence',-db=>$obj->db);
600              
601 11         152   $offset += $tl_start - 1; # offset to beginning of toplevel
602 11   50     147   $length ||= abs($tl_end - $tl_start) + 1;
603 11 100       165   my $strand = $tl_start < $tl_end ? +1 : -1;
604              
605 11 100       212   return ($tl,$offset,$strand < 0 ? ($length,'-1') : ($length,'+1') ) if $length;
    50          
606             }
607              
608             sub _get_toplevel {
609 11     11   92   my $obj = shift;
610 11         254   my $class = $obj->class;
611 11         123   my $name = $obj->name;
612              
613 11         126   my $smap = $obj->db->raw_query("gif smap -from $class:$name");
614 11         275   my ($parent,$pstart,$pstop,$tstart,$tstop,$map_type) =
615                 $smap =~ /^SMAP\s+(\S+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)\s+(.+)/;
616              
617 11   50     198   $parent ||= '';
618 11         162   $parent =~ s/^Sequence://; # remove this in next version of Acedb
619 11         425   return ($parent,$pstart,$pstop);
620             }
621              
622             # create subroutine that filters GFF files for certain feature types
623             sub _make_filter {
624 5     5   44   my $self = shift;
625 5         2088   my $automerge = $self->automerge;
626              
627             # parse out the filter
628 5         41   my %filter;
629 5         51   foreach (@_) {
630 7         83     my ($type,$filter) = split(':',$_,2);
631 7 50 33     123     if ($automerge && lc($type) eq 'transcript') {
    50 33        
632 0         0       @filter{'exon','intron','Sequence','cds'} = ([undef],[undef],[undef],[undef]);
633                 } elsif ($automerge && lc($type) eq 'clone') {
634 0         0       @filter{'Clone_left_end','Clone_right_end','Sequence'} = ([undef],[undef],[undef]);
635                 } else {
636 7         60       push @{$filter{$type}},$filter;
  7         94  
637                 }
638               }
639              
640             # create pattern-match sub
641 5         42   my $sub;
642 5         42   my $promiscuous; # indicates that there is a subtype without a type
643              
644 5 50       71   if (%filter) {
645 5         44     my $s = "sub { my \@d = split(\"\\t\",\$_[0]);\n";
646 5         153     for my $type (keys %filter) {
647 7         59       my $expr;
648 7         60       my $subtypes = $filter{$type};
649 7 50       63       if ($type ne '') {
650 7         63 for my $st (@$subtypes) {
651 7 50       113 $expr .= defined $st ? "return 1 if \$d[2]=~/$type/i && \$d[1]=~/$st/i;\n"
652             : "return 1 if \$d[2]=~/$type/i;\n"
653             }
654                   } else { # no type, only subtypes
655 0         0 $promiscuous++;
656 0         0 for my $st (@$subtypes) {
657 0 0       0 next unless defined $st;
658 0         0 $expr .= "return 1 if \$d[1]=~/$st/i;\n";
659             }
660                   }
661 7         72       $s .= $expr;
662                 }
663 5         49     $s .= "return;\n }";
664              
665 5         41     $sub = eval $s;
666 5 50       339     croak $@ if $@;
667               } else {
668 0     0   0     $sub = sub { 1; }
669 0         0   }
670 5 50       96   return ($sub,$promiscuous ? [] : [keys %filter]);
671             }
672              
673             # turn a GFF file and a filter into a list of Ace::Sequence::Feature objects
674             sub _make_features {
675 5     5   44   my $self = shift;
676 5         181   my ($gff,$filter) = @_;
677              
678 5         57   my ($r,$r_offset,$r_strand) = $self->refseq;
679 5         56   my $parent = $self->parent;
680 5         50   my $abs = $self->absolute;
681 5 50       51   if ($abs) {
682 0         0     $r_offset = 0;
683 0         0     $r = $parent;
684 0         0     $r_strand = '+1';
685               }
686 5   66     246   my @features = map {Ace::Sequence::Feature->new($parent,$r,$r_offset,$r_strand,$abs,$_)}
  166         2060  
687                              grep !m@^(?:\#|//)@ && $filter->($_),split("\n",$gff);
688             }
689              
690              
691             # low level GFF call, no changing absolute to relative coordinates
692             sub _gff {
693 6     6   388   my $self = shift;
694 6         56   my ($opt,$db) = @_;
695 6         72   my $data = $self->_query("seqfeatures -version 2 $opt",$db);
696 6         127   $data =~ s/\0+\Z//;
697 6         230   return $data; #blasted nulls!
698             }
699              
700             # shortcut for running a gif query
701             sub _query {
702 18     18   162   my $self = shift;
703 18         164   my $command = shift;
704 18   33     230   my $db = shift || $self->db;
705              
706 18         283   my $parent = $self->parent;
707 18         268   my $start = $self->start(1);
708 18         187   my $end = $self->end(1);
709 18 100       211   ($start,$end) = ($end,$start) if $start > $end; #flippity floppity
710              
711 18         196   my $coord = "-coords $start $end";
712              
713             # BAD BAD HACK ALERT - CHECKS THE QUERY THAT IS PASSED DOWN
714             # ALSO MAKES THINGS INCOMPATIBLE WITH PRIOR 4.9 servers.
715             # my $opt = $command =~ /seqfeatures/ ? '-nodna' : '';
716 18         157   my $opt = '-noclip';
717              
718 18         214   my $query = "gif seqget $parent $opt $coord ; $command";
719 18 50       281   warn $query if $self->debug;
720              
721 18         243   return $db->raw_query("gif seqget $parent $opt $coord ; $command");
722             }
723              
724             # utility function -- reverse complement
725             sub _complement {
726 8     8   73   my $dna = shift;
727 8         82   $$dna =~ tr/GATCgatc/CTAGctag/;
728 8         87   $$dna = scalar reverse $$dna;
729             }
730              
731             sub _feature_filter {
732 6     6   51   my $self = shift;
733 6         53   my $features = shift;
734 6 100       83   return '' unless $features;
735 5         88   my $opt = '';
736 5 50 33     89   $opt = '+feature ' . join('|',@$features) if ref($features) eq 'ARRAY' && @$features;
737 5 50       50   $opt = "+feature $features" unless ref $features;
738 5         52   $opt;
739             }
740              
741             1;
742              
743             =head1 NAME
744            
745             Ace::Sequence - Examine ACeDB Sequence Objects
746            
747             =head1 SYNOPSIS
748            
749             # open database connection and get an Ace::Object sequence
750             use Ace::Sequence;
751            
752             $db = Ace->connect(-host => 'stein.cshl.org',-port => 200005);
753             $obj = $db->fetch(Predicted_gene => 'ZK154.3');
754            
755             # Wrap it in an Ace::Sequence object
756             $seq = Ace::Sequence->new($obj);
757            
758             # Find all the exons
759             @exons = $seq->features('exon');
760            
761             # Find all the exons predicted by various versions of "genefinder"
762             @exons = $seq->features('exon:genefinder.*');
763            
764             # Iterate through the exons, printing their start, end and DNA
765             for my $exon (@exons) {
766             print join "\t",$exon->start,$exon->end,$exon->dna,"\n";
767             }
768            
769             # Find the region 1000 kb upstream of the first exon
770             $sub = Ace::Sequence->new(-seq=>$exons[0],
771             -offset=>-1000,-length=>1000);
772            
773             # Find all features in that area
774             @features = $sub->features;
775            
776             # Print its DNA
777             print $sub->dna;
778            
779             # Create a new Sequence object from the first 500 kb of chromosome 1
780             $seq = Ace::Sequence->new(-name=>'CHROMOSOME_I',-db=>$db,
781             -offset=>0,-length=>500_000);
782            
783             # Get the GFF dump as a text string
784             $gff = $seq->gff;
785            
786             # Limit dump to Predicted_genes
787             $gff_genes = $seq->gff(-features=>'Predicted_gene');
788            
789             # Return a GFF object (using optional GFF.pm module from Sanger)
790             $gff_obj = $seq->GFF;
791            
792             =head1 DESCRIPTION
793            
794             I<Ace::Sequence>, and its allied classes L<Ace::Sequence::Feature> and
795             L<Ace::Sequence::FeatureList>, provide a convenient interface to the
796             ACeDB Sequence classes and the GFF sequence feature file format.
797            
798             Using this class, you can define a region of the genome by using a
799             landmark (sequenced clone, link, superlink, predicted gene), an offset
800             from that landmark, and a distance. Offsets and distances can be
801             positive or negative. This will return an I<Ace::Sequence> object.
802             Once a region is defined, you may retrieve its DNA sequence, or query
803             the database for any features that may be contained within this
804             region. Features can be returned as objects (using the
805             I<Ace::Sequence::Feature> class), as GFF text-only dumps, or in the
806             form of the GFF class defined by the Sanger Centre's GFF.pm module.
807            
808             This class builds on top of L<Ace> and L<Ace::Object>. Please see
809             their manual pages before consulting this one.
810            
811             =head1 Creating New Ace::Sequence Objects, the new() Method
812            
813             $seq = Ace::Sequence->new($object);
814            
815             $seq = Ace::Sequence->new(-source => $object,
816             -offset => $offset,
817             -length => $length,
818             -refseq => $reference_sequence);
819            
820             $seq = Ace::Sequence->new(-name => $name,
821             -db => $db,
822             -offset => $offset,
823             -length => $length,
824             -refseq => $reference_sequence);
825            
826             In order to create an I<Ace::Sequence> you will need an active I<Ace>
827             database accessor. Sequence regions are defined using a "source"
828             sequence, an offset, and a length. Optionally, you may also provide a
829             "reference sequence" to establish the coordinate system for all
830             inquiries. Sequences may be generated from existing I<Ace::Object>
831             sequence objects, from other I<Ace::Sequence> and
832             I<Ace::Sequence::Feature> objects, or from a sequence name and a
833             database handle.
834            
835             The class method named new() is the interface to these facilities. In
836             its simplest, one-argument form, you provide new() with a
837             previously-created I<Ace::Object> that points to Sequence or
838             sequence-like object (the meaning of "sequence-like" is explained in
839             more detail below.) The new() method will return an I<Ace::Sequence>
840             object extending from the beginning of the object through to its
841             natural end.
842            
843             In the named-parameter form of new(), the following arguments are
844             recognized:
845            
846             =over 4
847            
848             =item -source
849            
850             The sequence source. This must be an I<Ace::Object> of the "Sequence"
851             class, or be a sequence-like object containing the SMap tag (see
852             below).
853            
854             =item -offset
855            
856             An offset from the beginning of the source sequence. The retrieved
857             I<Ace::Sequence> will begin at this position. The offset can be any
858             positive or negative integer. Offets are B<0-based>.
859            
860             =item -length
861            
862             The length of the sequence to return. Either a positive or negative
863             integer can be specified. If a negative length is given, the returned
864             sequence will be complemented relative to the source sequence.
865            
866             =item -refseq
867            
868             The sequence to use to establish the coordinate system for the
869             returned sequence. Normally the source sequence is used to establish
870             the coordinate system, but this can be used to override that choice.
871             You can provide either an I<Ace::Object> or just a sequence name for
872             this argument. The source and reference sequences must share a common
873             ancestor, but do not have to be directly related. An attempt to use a
874             disjunct reference sequence, such as one on a different chromosome,
875             will fail.
876            
877             =item -name
878            
879             As an alternative to using an I<Ace::Object> with the B<-source>
880             argument, you may specify a source sequence using B<-name> and B<-db>.
881             The I<Ace::Sequence> module will use the provided database accessor to
882             fetch a Sequence object with the specified name. new() will return
883             undef is no Sequence by this name is known.
884            
885             =item -db
886            
887             This argument is required if the source sequence is specified by name
888             rather than by object reference.
889            
890             =back
891            
892             If new() is successful, it will create an I<Ace::Sequence> object and
893             return it. Otherwise it will return undef and return a descriptive
894             message in Ace->error(). Certain programming errors, such as a
895             failure to provide required arguments, cause a fatal error.
896            
897             =head2 Reference Sequences and the Coordinate System
898            
899             When retrieving information from an I<Ace::Sequence>, the coordinate
900             system is based on the sequence segment selected at object creation
901             time. That is, the "+1" strand is the natural direction of the
902             I<Ace::Sequence> object, and base pair 1 is its first base pair. This
903             behavior can be overridden by providing a reference sequence to the
904             new() method, in which case the orientation and position of the
905             reference sequence establishes the coordinate system for the object.
906            
907             In addition to the reference sequence, there are two other sequences
908             used by I<Ace::Sequence> for internal bookeeping. The "source"
909             sequence corresponds to the smallest ACeDB sequence object that
910             completely encloses the selected sequence segment. The "parent"
911             sequence is the smallest ACeDB sequence object that contains the
912             "source". The parent is used to derive the length and orientation of
913             source sequences that are not directly associated with DNA objects.
914            
915             In many cases, the source sequence will be identical to the sequence
916             initially passed to the new() method. However, there are exceptions
917             to this rule. One common exception occurs when the offset and/or
918             length cross the boundaries of the passed-in sequence. In this case,
919             the ACeDB database is searched for the smallest sequence that contains
920             both endpoints of the I<Ace::Sequence> object.
921            
922             The other common exception occurs in Ace 4.8, where there is support
923             for "sequence-like" objects that contain the C<SMap> ("Sequence Map")
924             tag. The C<SMap> tag provides genomic location information for
925             arbitrary object -- not just those descended from the Sequence class.
926             This allows ACeDB to perform genome map operations on objects that are
927             not directly related to sequences, such as genetic loci that have been
928             interpolated onto the physical map. When an C<SMap>-containing object
929             is passed to the I<Ace::Sequence> new() method, the module will again
930             choose the smallest ACeDB Sequence object that contains both
931             end-points of the desired region.
932            
933             If an I<Ace::Sequence> object is used to create a new I<Ace::Sequence>
934             object, then the original object's source is inherited.
935            
936             =head1 Object Methods
937            
938             Once an I<Ace::Sequence> object is created, you can query it using the
939             following methods:
940            
941             =head2 asString()
942            
943             $name = $seq->asString;
944            
945             Returns a human-readable identifier for the sequence in the form
946             I<Source/start-end>, where "Source" is the name of the source
947             sequence, and "start" and "end" are the endpoints of the sequence
948             relative to the source (using 1-based indexing). This method is
949             called automatically when the I<Ace::Sequence> is used in a string
950             context.
951            
952             =head2 source_seq()
953            
954             $source = $seq->source_seq;
955            
956             Return the source of the I<Ace::Sequence>.
957            
958             =head2 parent_seq()
959            
960             $parent = $seq->parent_seq;
961            
962             Return the immediate ancestor of the sequence. The parent of the
963             top-most sequence (such as the CHROMOSOME link) is itself. This
964             method is used internally to ascertain the length of source sequences
965             which are not associated with a DNA object.
966            
967             NOTE: this procedure is a trifle funky and cannot reliably be used to
968             traverse upwards to the top-most sequence. The reason for this is
969             that it will return an I<Ace::Sequence> in some cases, and an
970             I<Ace::Object> in others. Use get_parent() to traverse upwards
971             through a uniform series of I<Ace::Sequence> objects upwards.
972            
973             =head2 refseq([$seq])
974            
975             $refseq = $seq->refseq;
976            
977             Returns the reference sequence, if one is defined.
978            
979             $seq->refseq($new_ref);
980            
981             Set the reference sequence. The reference sequence must share the same
982             ancestor with $seq.
983            
984             =head2 start()
985            
986             $start = $seq->start;
987            
988             Start of this sequence, relative to the source sequence, using 1-based
989             indexing.
990            
991             =head2 end()
992            
993             $end = $seq->end;
994            
995             End of this sequence, relative to the source sequence, using 1-based
996             indexing.
997            
998             =head2 offset()
999            
1000             $offset = $seq->offset;
1001            
1002             Offset of the beginning of this sequence relative to the source
1003             sequence, using 0-based indexing. The offset may be negative if the
1004             beginning of the sequence is to the left of the beginning of the
1005             source sequence.
1006            
1007             =head2 length()
1008            
1009             $length = $seq->length;
1010            
1011             The length of this sequence, in base pairs. The length may be
1012             negative if the sequence's orientation is reversed relative to the
1013             source sequence. Use abslength() to obtain the absolute value of
1014             the sequence length.
1015            
1016             =head2 abslength()
1017            
1018             $length = $seq->abslength;
1019            
1020             Return the absolute value of the length of the sequence.
1021            
1022             =head2 strand()
1023            
1024             $strand = $seq->strand;
1025            
1026             Returns +1 for a sequence oriented in the natural direction of the
1027             genomic reference sequence, or -1 otherwise.
1028            
1029             =head2 reversed()
1030            
1031             Returns true if the segment is reversed relative to the canonical
1032             genomic direction. This is the same as $seq->strand < 0.
1033            
1034             =head2 dna()
1035            
1036             $dna = $seq->dna;
1037            
1038             Return the DNA corresponding to this sequence. If the sequence length
1039             is negative, the reverse complement of the appropriate segment will be
1040             returned.
1041            
1042             ACeDB allows Sequences to exist without an associated DNA object
1043             (which typically happens during intermediate stages of a sequencing
1044             project. In such a case, the returned sequence will contain the
1045             correct number of "-" characters.
1046            
1047             =head2 name()
1048            
1049             $name = $seq->name;
1050            
1051             Return the name of the source sequence as a string.
1052            
1053             =head2 get_parent()
1054            
1055             $parent = $seq->parent;
1056            
1057             Return the immediate ancestor of this I<Ace::Sequence> (i.e., the
1058             sequence that contains this one). The return value is a new
1059             I<Ace::Sequence> or undef, if no parent sequence exists.
1060            
1061             =head2 get_children()
1062            
1063             @children = $seq->get_children();
1064            
1065             Returns all subsequences that exist as independent objects in the
1066             ACeDB database. What exactly is returned is dependent on the data
1067             model. In older ACeDB databases, the only subsequences are those
1068             under the catchall Subsequence tag. In newer ACeDB databases, the
1069             objects returned correspond to objects to the right of the S_Child
1070             subtag using a tag[2] syntax, and may include Predicted_genes,
1071             Sequences, Links, or other objects. The return value is a list of
1072             I<Ace::Sequence> objects.
1073            
1074             =head2 features()
1075            
1076             @features = $seq->features;
1077             @features = $seq->features('exon','intron','Predicted_gene');
1078             @features = $seq->features('exon:GeneFinder','Predicted_gene:hand.*');
1079            
1080             features() returns an array of I<Sequence::Feature> objects. If
1081             called without arguments, features() returns all features that cross
1082             the sequence region. You may also provide a filter list to select a
1083             set of features by type and subtype. The format of the filter list
1084             is:
1085            
1086             type:subtype
1087            
1088             Where I<type> is the class of the feature (the "feature" field of the
1089             GFF format), and I<subtype> is a description of how the feature was
1090             derived (the "source" field of the GFF format). Either of these
1091             fields can be absent, and either can be a regular expression. More
1092             advanced filtering is not supported, but is provided by the Sanger
1093             Centre's GFF module.
1094            
1095             The order of the features in the returned list is not specified. To
1096             obtain features sorted by position, use this idiom:
1097            
1098             @features = sort { $a->start <=> $b->start } $seq->features;
1099            
1100             =head2 feature_list()
1101            
1102             my $list = $seq->feature_list();
1103            
1104             This method returns a summary list of the features that cross the
1105             sequence in the form of a L<Ace::Feature::List> object. From the
1106             L<Ace::Feature::List> object you can obtain the list of feature names
1107             and the number of each type. The feature list is obtained from the
1108             ACeDB server with a single short transaction, and therefore has much
1109             less overhead than features().
1110            
1111             See L<Ace::Feature::List> for more details.
1112            
1113             =head2 transcripts()
1114            
1115             This returns a list of Ace::Sequence::Transcript objects, which are
1116             specializations of Ace::Sequence::Feature. See L<Ace::Sequence::Transcript>
1117             for details.
1118            
1119             =head2 clones()
1120            
1121             This returns a list of Ace::Sequence::Feature objects containing
1122             reconstructed clones. This is a nasty hack, because ACEDB currently
1123             records clone ends, but not the clones themselves, meaning that we
1124             will not always know both ends of the clone. In this case the missing
1125             end has a synthetic position of -99,999,999 or +99,999,999. Sorry.
1126            
1127             =head2 gff()
1128            
1129             $gff = $seq->gff();
1130             $gff = $seq->gff(-abs => 1,
1131             -features => ['exon','intron:GeneFinder']);
1132            
1133             This method returns a GFF file as a scalar. The following arguments
1134             are optional:
1135            
1136             =over 4
1137            
1138             =item -abs
1139            
1140             Ordinarily the feature entries in the GFF file will be returned in
1141             coordinates relative to the start of the I<Ace::Sequence> object.
1142             Position 1 will be the start of the sequence object, and the "+"
1143             strand will be the sequence object's natural orientation. However if
1144             a true value is provided to B<-abs>, the coordinate system used will
1145             be relative to the start of the source sequence, i.e. the native ACeDB
1146             Sequence object (usually a cosmid sequence or a link).
1147            
1148             If a reference sequence was provided when the I<Ace::Sequence> was
1149             created, it will be used by default to set the coordinate system.
1150             Relative coordinates can be reenabled by providing a false value to
1151             B<-abs>.
1152            
1153             Ordinarily the coordinate system manipulations automatically "do what
1154             you want" and you will not need to adjust them. See also the abs()
1155             method described below.
1156            
1157             =item -features
1158            
1159             The B<-features> argument filters the features according to a list of
1160             types and subtypes. The format is identical to the one described for
1161             the features() method. A single filter may be provided as a scalar
1162             string. Multiple filters may be passed as an array reference.
1163            
1164             =back
1165            
1166             See also the GFF() method described next.
1167            
1168             =head2 GFF()
1169            
1170             $gff_object = $seq->gff;
1171             $gff_object = $seq->gff(-abs => 1,
1172             -features => ['exon','intron:GeneFinder']);
1173            
1174             The GFF() method takes the same arguments as gff() described above,
1175             but it returns a I<GFF::GeneFeatureSet> object from the GFF.pm
1176             module. If the GFF module is not installed, this method will generate
1177             a fatal error.
1178            
1179             =head2 absolute()
1180            
1181             $abs = $seq->absolute;
1182             $abs = $seq->absolute(1);
1183            
1184             This method controls whether the coordinates of features are returned
1185             in absolute or relative coordinates. "Absolute" coordinates are
1186             relative to the underlying source or reference sequence. "Relative"
1187             coordinates are relative to the I<Ace::Sequence> object. By default,
1188             coordinates are relative unless new() was provided with a reference
1189             sequence. This default can be examined and changed using absolute().
1190            
1191             =head2 automerge()
1192            
1193             $merge = $seq->automerge;
1194             $seq->automerge(0);
1195            
1196             This method controls whether groups of features will automatically be
1197             merged together by the features() call. If true (the default), then
1198             the left and right end of clones will be merged into "clone" features,
1199             introns, exons and CDS entries will be merged into
1200             Ace::Sequence::Transcript objects, and similarity entries will be
1201             merged into Ace::Sequence::GappedAlignment objects.
1202            
1203             =head2 db()
1204            
1205             $db = $seq->db;
1206            
1207             Returns the L<Ace> database accessor associated with this sequence.
1208            
1209             =head1 SEE ALSO
1210            
1211             L<Ace>, L<Ace::Object>, L<Ace::Sequence::Feature>,
1212             L<Ace::Sequence::FeatureList>, L<GFF>
1213            
1214             =head1 AUTHOR
1215            
1216             Lincoln Stein <lstein@cshl.org> with extensive help from Jean
1217             Thierry-Mieg <mieg@kaa.crbm.cnrs-mop.fr>
1218            
1219             Many thanks to David Block <dblock@gene.pbi.nrc.ca> for finding and
1220             fixing the nasty off-by-one errors.
1221            
1222             Copyright (c) 1999, Lincoln D. Stein
1223            
1224             This library is free software; you can redistribute it and/or modify
1225             it under the same terms as Perl itself. See DISCLAIMER.txt for
1226             disclaimers of warranty.
1227            
1228             =cut
1229              
1230             __END__
1231